#!/bin/bash ### ### AUTHORSTATS - Get author statistics from Internet Drafts and RFCs ### ### Version 2.5.2 ### ### Written in 2005-2008 by Jari Arkko ### Donated to the public domain. ### ### 2.5.0 Cleaned up HTML ### 2.5.1 Added support for authors moving, fixed country and company capitalization ### 2.5.2 Fixed monthly publication graph style ### ### Usage: ### ### authorstats draft-foo-00.txt ... ### ### ### Initialize ### debug=0 doctype=draft authordata=none wgdata=none oldwgdata=none rfcdata=none chairdata=none iabdata=none iaocdata=none popdata=none topic="active I-Ds" tmpbase=/tmp/$$-tmp ### ### Process options ### for i in $* do case x$1 in (x--doctype)shift doctype=$1 shift;; (x--debug) debug=1; shift;; (x--data) shift; authordata=$1; shift;; (x--wgs) shift wgdata=$1; shift;; (x--oldwgs) shift oldwgdata=$1; shift;; (x--rfcs) shift rfcdata=$1; shift;; (x--chairs) shift chairdata=$1; shift;; (x--iab) shift iabdata=$1; shift;; (x--iaoc) shift iaocdata=$1; shift;; (x--population) shift popdata=$1; shift;; (x--topic) shift topic="$1"; shift;; (x-*) echo 'authorstats: Unrecognized option -- exit'; exit 1;; esac done ### ### Get author data from the drafts ### if [ x$authordata = xnone ] then authordata=${tmpbase}-a getauthors $* > $authordata fi ### ### Test that we have author data ### if [ -s $authordata ] then ok=ok else echo 'There is no author data in file '$authordata' -- exit' exit 1 fi ### ### Get WG data from the IETF site ### if [ x$wgdata = xnone ] then (cd /tmp; rm -f wg-dir.html; wget -q http://www.ietf.org/html.charters/wg-dir.html) wgdata=/tmp/wg-dir.html fi ### ### Get old WG data from the IETF site ### if [ x$oldwgdata = xnone ] then (cd /tmp; rm -f oldindex.html; wget -q -O oldindex.html http://www.ietf.org/html.charters/OLD/index.html) oldwgdata=/tmp/oldindex.html fi ### ### Get RFC data from the IETF tools site ### if [ x$rfcdata = xnone ] then (cd /tmp; rm -f rfc_id.txt; wget -q http://tools.ietf.org/id/rfc_id.txt) rfcdata=/tmp/rfc_id.txt fi ### ### Get chair/AD data from the IETF site ### if [ x$chairdata = xnone ] then (cd /tmp; rm -f rfc_id.txt; wget -q http://www.ietf.org/ietf/1wg-summary.txt) chairdata=/tmp/1wg-summary.txt fi ### ### Get IAB data from the IAB site ### if [ x$iabdata = xnone ] then (cd /tmp; rm -f rfc_id.txt; wget -q http://www.iab.org/about/members.html) iabdata=/tmp/members.html fi ### ### Get IAOC data from the temporary IAOC site ### if [ x$iaocdata = xnone ] then (cd /tmp; rm -f index.html #wget -q http://koi.uoregon.edu/~iaoc/index.html ) iaocdata=/tmp/index.html fi ### ### Get country population data from the wikipedia site ### if [ x$popdata = xnone ] then (cd /tmp; rm -rf List_of_countries_by_population List_of_countries_by_population.html List_of_countries_by_population.files; wget -q http://en.wikipedia.org/wiki/List_of_countries_by_population) popdata=/tmp/List_of_countries_by_population fi ### ### Process WG data ### wgs=/tmp/wgdata.txt rm -f $wgs if [ $debug = 1 ] then echo authorstats: parsing wg data fi awk < $wgdata ' BEGIN { area = ""; } /.* Area<.h2><.a>$/ { i1 = index($0,"

"); area = substr($0,i1+4); i2 = index(area," Area<"); area = substr(area,1,i2 - 1); next; } /-charter.html/ { if (area != "") { i1 = index($0,"html.charters/"); wg = substr($0,i1 + 5 + 8 + 1); i2 = index(wg,"-charter.html"); wg = substr(wg,1,i2 - 1); printf("0-wgareadefinition:%s:%s\n", area, wg); } } /.*/ { next; } END { } ' > $wgs ### ### Process old WG data ### oldwgs=/tmp/oldindex.txt rm -f $oldwgs if [ $debug = 1 ] then echo authorstats: parsing old wg data fi awk < $oldwgdata ' BEGIN { area = ""; } /.* Area<.[hH]2>/ { i1 = index($0,"

"); area = substr($0,i1+4); i2 = index(area," Area<"); area = substr(area,1,i2 - 1); #printf("9-debug: saw area %s on line %s\n", area, $0); } /-charter.html/ { #printf("9-debug: saw potential wg on line %s\n", $0); if (area != "") { i1 = index($0,"HREF="); wg = substr($0,i1 + 5 + 1 ); i2 = index(wg,"-charter.html"); wg = substr(wg,1,i2 - 1); printf("0-oldwgareadefinition:%s:%s\n", area, wg); } } /.*/ { next; } END { } ' > $oldwgs ### ### Process IAB data ### if [ $debug = 1 ] then echo authorstats: parsing iab data fi iab=/tmp/iab.txt rm -f $iab awk < $iabdata ' BEGIN { iniab = 0; } /IAB MEMBERS/ { iniab = 1; next; } /EX-OFFICIO/ { iniab = 0; next; } /vacancy/ { next; } /blue-dot/ { if (iniab) { z = $0; gsub(/^.*[<]b[>]/,"",z); gsub(/[<].b[>].*$/,"",z); printf("z3-iab:%s\n",z); } next; } /.*/ { next; } END { }' | sed 's/ä/a/g' | sed 's/ö/o/g' | cat > $iab ### ### Process IAOC data ### if [ $debug = 1 ] then echo authorstats: parsing iaoc data fi iaoc=/tmp/iaoc.txt rm -f $iaoc awk < $iaocdata ' BEGIN { iniaoc = 0; } /IAOC Membership/ { iniaoc = 1; #printf("starting...\n"); } /IAOC Responsibilities/ { iniaoc = 0; #printf("ending...\n"); } /^[<]li[>] .*,/ { #printf("candidate: %s\n", $0); if (iniaoc) { z = $0; z = substr(z,6); z = substr(z,1,index(z,",") - 1); printf("z4-iaoc:%s\n", z); } next; } /.*/ { next; } END { }' | sed 's/Kurtis Lindquist/Kurtis Lindqvist/' | cat > $iaoc ### ### Process chair data ### if [ $debug = 1 ] then echo authorstats: parsing chair data fi chairs=/tmp/chairdata.txt rm -f $chairs awk < $chairdata ' BEGIN { area = ""; wg = ""; } /IETF Working Group Summary .By Area./ { next; } /-----------------/ { next; } /^ +WG Mail:/ { next; } /^ +To Join:/ { next; } /^ +In Body:/ { next; } /^ +Archive:/ { next; } /^[A-Z].* Area .[a-z]+.$/ { pos = index($0," Area "); area = substr($0,1,pos - 1); wg = ""; next; } /^[A-Z].* [(].*[)]$/ { z = $0; gsub(/^.*[(]/,"",z); gsub(/[)]$/,"",z); wg = z; } /^ +[A-Za-z .0-9():]+ [<].*[>]$/ { z = $0; gsub(/ +Chairs.s.: +/,"",z); gsub(/^ +/,"",z); gsub(/ [<].*$/,"",z); if (wg == "") { printf("z1-ad:%s:%s\n", area, z); } else { printf("z2-chair:%s:%s\n", wg, z); } next; } END { } ' | sed 's/Kurt Zeilenga/Kurt D. Zeilenga/' | sed 's/Russ Housley/Russell Housley/' | sed 's/Gregory M[.] Lebovitz/Gregory Lebovitz/' | cat > $chairs ### ### Process population data ### pops=/tmp/popdata.txt rm -f $pops cat $popdata | tr -d "'" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed 's/[Ff]lag[ _]+of[ _]+//g' | sed 's/[Ff]lag of //g' | sed 's/svg:/:/g' | fgrep -v 'cite_ref-overseas_france' | # avoid overriding france tee /tmp/poptmp.txt | awk ' function isalpha(c) { return((c >= "a" && c <= "z") || (c >= "A" && c <= "Z")); } function findchar(c) { for (y = 1; y <= 255; y++) { buf = sprintf("%c", y); if (buf == c) return(y); } return(0); } function printstring(what,s) { printf("# debug 4: %s\n", what); for (h = 1; h <= length(s); h++) { printf("# %s (%d)\n", substr(s,h,1), findchar(substr(s,h,1))); } } BEGIN { inskip=1; # # Not all names in Wikipedia are in the same form # as the ones used by getauthors. Map the special # ones so that they match. # mapit["belgium civil"] = "belgium"; mapit["the czech republic"] = "czech republic"; mapit["the solomon islands"] = "solomon islands"; mapit["the peoples republic of china"] = "china"; mapit["peoples republic of china"] = "china"; mapit["the united states"] = "usa"; mapit["united states"] = "usa"; mapit["the united kingdom"] = "united kingdom"; mapit["republic of ireland"] = "ireland"; mapit["netherlands"] = "the netherlands"; mapit["the republic of china"] = "taiwan"; mapit["republic of china"] = "taiwan"; mapit["the united arab emirates"] = "united arab emirates"; mapit["the central african republic"] = "central african republic"; mapit["the gambia"] = "gambia"; mapit["the comoros"] = "comoros"; mapit["the bahamas"] = "bahamas"; mapit["the vatican city"] = "vatican city"; } /^.td.1..td.$/ { inskip = 0; next; } /^.td align="left"./ { if (!inskip) { s = $0; while (length(s) > 0 && substr(s,1,6) != "title=") s = substr(s,2); s = substr(s,8); n = ""; while (length(s) > 0 && substr(s,1,1) != sprintf("%c",34)) { if (isalpha(substr(s,1,1)) || substr(s,1,1) == " " || substr(s,1,1) == "-" || substr(s,1,1) == "_") { n = n substr(s,1,1); } s = substr(s,2); } country = tolower(n); gsub(/[.]*svg/,"",country); # printf("# debug 1: %s - %s\n", n, country); # printf("# debug 2: %s - %s\n", country, mapit[country]); # country == "republic of ireland"); # printstring("input", country); # printstring("const", "republic of ireland"); if (mapit[country] != "") country = mapit[country]; } next; } /^.td.[0-9][0-9.,e+]+..td.$/ { if (inskip == 0 && country != "") { p = substr($0,5); p = substr(p,1,length(p) - 5); gsub(/,/,"",p); pv = p + 0; printf("0-population:%s:%d:%d\n",country,pv,NR); # printf("# debug 4: %s\n", country); country = ""; } next; } /^.td.[0-9][0-9,]+.sup id.*..sup...td.$/ { if (inskip == 0 && country != "") { p = substr($0,5); p = substr(p,1,index(p,"sup") - 2); gsub(/,/,"",p); printf("0-population:%s:%s:%d\n",country,p,NR); # printf("# debug 5: %s\n", country); country = ""; } next; } /.*/ { next; } ' > $pops ### ### Process RFC data ### if [ $debug = 1 ] then echo authorstats: parsing rfc data fi rfcs=/tmp/rfcdata.txt rm -f $rfcs ### ### Safety check to avoid bad characters etc ### if [ $debug = 1 ] then echo authorstats: process the database fi (cat $wgs; cat $pops; cat $chairs; cat $iab; cat $iaoc; cat $authordata) | sort | tr -d '*?"{}/%";&<>\341\207\351\355' | tr -d "'" | tee /tmp/inputdb.txt | ### ### Process the database ### awk -v topic="$topic" \ -v doctype=$doctype \ -v debug=$debug \ ' function inittabs() { areaacronym["applications"] = "app"; areaacronym["general"] = "gen"; areaacronym["internet"] = "int"; areaacronym["ip: next generation"] = "nextgen"; areaacronym["network management area"] = "ops"; areaacronym["operational requirements"] = "ops"; areaacronym["operations and management"] = "ops"; areaacronym["osi integration"] = "osi"; areaacronym["real-time applications and infrastructure"] = "rai"; areaacronym["routing"] = "rtg"; areaacronym["security"] = "sec"; areaacronym["sub-ip"] = "sub"; areaacronym["transport"] = "tsv"; areaacronym["user services"] = "user"; monthname["1"] = "January"; monthname["2"] = "February"; monthname["3"] = "March"; monthname["4"] = "April"; monthname["5"] = "May"; monthname["6"] = "June"; monthname["7"] = "July"; monthname["8"] = "August"; monthname["9"] = "September"; monthname["10"] = "October"; monthname["11"] = "November"; monthname["12"] = "December"; isineu["finland"] = 1; isineu["sweden"] = 1; isineu["denmark"] = 1; isineu["united kingdom"] = 1; isineu["germany"] = 1; isineu["the netherlands"] = 1; isineu["france"] = 1; isineu["belgium"] = 1; isineu["italy"] = 1; isineu["spain"] = 1; isineu["portugal"] = 1; isineu["austria"] = 1; isineu["cyprys"] = 1; isineu["czech republic"] = 1; isineu["estonia"] = 1; isineu["greece"] = 1; isineu["hungary"] = 1; isineu["ireland"] = 1; isineu["latvia"] = 1; isineu["lithuania"] = 1; isineu["luxembourg"] = 1; isineu["malta"] = 1; isineu["poland"] = 1; isineu["slovakia"] = 1; isineu["slovenia"] = 1; isineu["monaco"] = 1; population["european union"] = 0; for (eucountry in isineu) { continentof[eucountry] = "europe"; } continentof["switzerland"] = "europe"; continentof["norway"] = "europe"; continentof["bulgaria"] = "europe"; continentof["romania"] = "europe"; continentof["russia"] = "europe"; continentof["st. lucia"] = "europe"; continentof["ukraine"] = "europe"; continentof["turkey"] = "europe"; continentof["croatia"] = "europe"; continentof["australia"] = "australia"; continentof["new zealand"] = "australia"; continentof["tonga"] = "australia"; continentof["solomon islands"] = "australia"; continentof["armenia"] = "asia"; continentof["east timor"] = "asia"; continentof["south korea"] = "asia"; continentof["pakistan"] = "asia"; continentof["vietnam"] = "asia"; continentof["japan"] = "asia"; continentof["china"] = "asia"; continentof["israel"] = "asia"; continentof["jordan"] = "asia"; continentof["hong kong"] = "asia"; continentof["thailand"] = "asia"; continentof["india"] = "asia"; continentof["syria"] = "asia"; continentof["united arab emirates"] = "asia"; continentof["singapore"] = "asia"; continentof["saudi arabia"] = "asia"; continentof["lebanon"] = "asia"; continentof["mongolia"] = "asia"; continentof["cocos (keeling) isl."] = "asia"; continentof["taiwan"] = "asia"; continentof["macau"] = "asia"; continentof["malaysia"] = "asia"; continentof["bhutan"] = "asia"; continentof["sudan"] = "africa"; continentof["tunisia"] = "africa"; continentof["egypt"] = "africa"; continentof["st. tome and principe"] = "africa"; continentof["netherland antilles"] = "south america"; continentof["venezuela"] = "south america"; continentof["aruba"] = "south america"; continentof["panama"] = "south america"; continentof["chile"] = "south america"; continentof["brazil"] = "south america"; continentof["argentina"] = "south america"; continentof["costa rica"] = "south america"; continentof["uruguay"] = "south america"; continentof["st. lucia"] = "south america"; continentof["antigua and barbuda"] = "south america"; continentof["mexico"] = "north america"; continentof["usa"] = "north america"; continentof["canada"] = "north america"; continentof["greenland"] = "north america"; continentof["antarctica"] = "antarctica" monthnameshort[1] = "jan"; monthnameshort[2] = "feb"; monthnameshort[3] = "mar"; monthnameshort[4] = "apr"; monthnameshort[5] = "may"; monthnameshort[6] = "jun"; monthnameshort[7] = "jul"; monthnameshort[8] = "aug"; monthnameshort[9] = "sep"; monthnameshort[10] = "oct"; monthnameshort[11] = "nov"; monthnameshort[12] = "dec"; malefirstname["abraham"] = 1; malefirstname["adam"] = 1; malefirstname["adrian"] = 1; malefirstname["ajit"] = 1; malefirstname["alan"] = 1; malefirstname["albert"] = 1; malefirstname["alec"] = 1; malefirstname["alex"] = 1; malefirstname["alexander"] = 1; malefirstname["alexandru"] = 1; malefirstname["alfred"] = 1; malefirstname["alper"] = 1; malefirstname["alpesh"] = 1; malefirstname["anders"] = 1; malefirstname["andre"] = 1; malefirstname["andrei"] = 1; malefirstname["andrew"] = 1; malefirstname["angelica"] = 1; malefirstname["angelos"] = 1; malefirstname["anthony"] = 1; malefirstname["anton"] = 1; malefirstname["antti"] = 1; malefirstname["ari"] = 1; malefirstname["art"] = 1; malefirstname["arthur"] = 1; malefirstname["artur"] = 1; malefirstname["atle"] = 1; malefirstname["avi"] = 1; malefirstname["barney"] = 1; malefirstname["barry"] = 1; malefirstname["basavaraj"] = 1; malefirstname["ben"] = 1; malefirstname["benjamin"] = 1; malefirstname["bernard"] = 1; malefirstname["bernhard"] = 1; malefirstname["bernie"] = 1; malefirstname["bert"] = 1; malefirstname["bertrand"] = 1; malefirstname["bill"] = 1; malefirstname["billy"] = 1; malefirstname["bjorn"] = 1; malefirstname["bob"] = 1; malefirstname["brad"] = 1; malefirstname["brent"] = 1; malefirstname["bret"] = 1; malefirstname["brett"] = 1; malefirstname["brian"] = 1; malefirstname["bruce"] = 1; malefirstname["bryan"] = 1; malefirstname["burt"] = 1; malefirstname["burton"] = 1; malefirstname["butler"] = 1; malefirstname["buz"] = 1; malefirstname["buzz"] = 1; malefirstname["carlo"] = 1; malefirstname["carl-uno"] = 1; malefirstname["carsten"] = 1; malefirstname["cesar"] = 1; malefirstname["charles"] = 1; malefirstname["charley"] = 1; malefirstname["charlie"] = 1; malefirstname["chip"] = 1; malefirstname["chris"] = 1; malefirstname["christian"] = 1; malefirstname["christina"] = 1; malefirstname["christophe"] = 1; malefirstname["christopher"] = 1; malefirstname["chuck"] = 1; malefirstname["cliff"] = 1; malefirstname["clifford"] = 1; malefirstname["colin"] = 1; malefirstname["craig"] = 1; malefirstname["cullen"] = 1; malefirstname["curt"] = 1; malefirstname["curtis"] = 1; malefirstname["dan"] = 1; malefirstname["daniel"] = 1; malefirstname["danny"] = 1; malefirstname["dave"] = 1; malefirstname["david"] = 1; malefirstname["dean"] = 1; malefirstname["dennis"] = 1; malefirstname["derek"] = 1; malefirstname["dick"] = 1; malefirstname["dimitri"] = 1; malefirstname["dimitrios"] = 1; malefirstname["dirk"] = 1; malefirstname["dirk-willem"] = 1; malefirstname["don"] = 1; malefirstname["donald"] = 1; malefirstname["dorian"] = 1; malefirstname["doug"] = 1; malefirstname["doughan"] = 1; malefirstname["douglas"] = 1; malefirstname["drew"] = 1; malefirstname["duncan"] = 1; malefirstname["dwight"] = 1; malefirstname["ed"] = 1; malefirstname["edgar"] = 1; malefirstname["edmund"] = 1; malefirstname["eduard"] = 1; malefirstname["eduardo"] = 1; malefirstname["edward"] = 1; malefirstname["edwin"] = 1; malefirstname["einar"] = 1; malefirstname["eliot"] = 1; malefirstname["elmer"] = 1; malefirstname["enrique"] = 1; malefirstname["eric"] = 1; malefirstname["erik"] = 1; malefirstname["ernesto"] = 1; malefirstname["eugene"] = 1; malefirstname["fabio"] = 1; malefirstname["farid"] = 1; malefirstname["farooq"] = 1; malefirstname["fernando"] = 1; malefirstname["florent"] = 1; malefirstname["francis"] = 1; malefirstname["francois"] = 1; malefirstname["frangois"] = 1; malefirstname["frank"] = 1; malefirstname["fred"] = 1; malefirstname["frederic"] = 1; malefirstname["fredrik"] = 1; malefirstname["gabriel"] = 1; malefirstname["gary"] = 1; malefirstname["geoff"] = 1; malefirstname["geoffrey"] = 1; malefirstname["georg"] = 1; malefirstname["george"] = 1; malefirstname["georgios"] = 1; malefirstname["gerald"] = 1; malefirstname["gerard"] = 1; malefirstname["gerben"] = 1; malefirstname["gerry"] = 1; malefirstname["gery"] = 1; malefirstname["gilbert"] = 1; malefirstname["gilles"] = 1; malefirstname["glen"] = 1; malefirstname["glenn"] = 1; malefirstname["gonzalo"] = 1; malefirstname["gopal"] = 1; malefirstname["goran"] = 1; malefirstname["gordon"] = 1; malefirstname["graham"] = 1; malefirstname["grant"] = 1; malefirstname["greg"] = 1; malefirstname["gregg"] = 1; malefirstname["gregor"] = 1; malefirstname["gregory"] = 1; malefirstname["guenter"] = 1; malefirstname["gunnar"] = 1; malefirstname["guy"] = 1; malefirstname["hannes"] = 1; malefirstname["hanns"] = 1; malefirstname["hans"] = 1; malefirstname["hans-bernhard"] = 1; malefirstname["hans-werner"] = 1; malefirstname["harald"] = 1; malefirstname["harri"] = 1; malefirstname["harry"] = 1; malefirstname["henning"] = 1; malefirstname["henrik"] = 1; malefirstname["henry"] = 1; malefirstname["hesham"] = 1; malefirstname["holger"] = 1; malefirstname["horst"] = 1; malefirstname["igor"] = 1; malefirstname["isaac"] = 1; malefirstname["ivano"] = 1; malefirstname["jack"] = 1; malefirstname["jacob"] = 1; malefirstname["jake"] = 1; malefirstname["jakob"] = 1; malefirstname["james"] = 1; malefirstname["jan"] = 1; malefirstname["jari"] = 1; malefirstname["jarkko"] = 1; malefirstname["jarno"] = 1; malefirstname["jason"] = 1; malefirstname["javier"] = 1; malefirstname["jean-louis"] = 1; malefirstname["jean-marc"] = 1; malefirstname["jean-michel"] = 1; malefirstname["jean-philippe"] = 1; malefirstname["jean-yves"] = 1; malefirstname["jed"] = 1; malefirstname["jeff"] = 1; malefirstname["jeffery"] = 1; malefirstname["jeffrey"] = 1; malefirstname["jeremy"] = 1; malefirstname["jerome"] = 1; malefirstname["jerry"] = 1; malefirstname["jesse"] = 1; malefirstname["jim"] = 1; malefirstname["jimmy"] = 1; malefirstname["jinhyeock"] = 1; malefirstname["jinmei"] = 1; malefirstname["joachim"] = 1; malefirstname["joe"] = 1; malefirstname["joel"] = 1; malefirstname["joerg"] = 1; malefirstname["johan"] = 1; malefirstname["johannes"] = 1; malefirstname["john"] = 1; malefirstname["johnny"] = 1; malefirstname["johnson"] = 1; malefirstname["jon"] = 1; malefirstname["jonathan"] = 1; malefirstname["jonne"] = 1; malefirstname["jordi"] = 1; malefirstname["jorge"] = 1; malefirstname["jose"] = 1; malefirstname["joseph"] = 1; malefirstname["josh"] = 1; malefirstname["joshua"] = 1; malefirstname["jouni"] = 1; malefirstname["juergen"] = 1; malefirstname["juha"] = 1; malefirstname["juha-pekka"] = 1; malefirstname["juin-hwey"] = 1; malefirstname["jukka"] = 1; malefirstname["jun-ichiro"] = 1; malefirstname["kaj"] = 1; malefirstname["karim"] = 1; malefirstname["karl"] = 1; malefirstname["keiichi"] = 1; malefirstname["keith"] = 1; malefirstname["ken"] = 1; malefirstname["kendall"] = 1; malefirstname["kenneth"] = 1; malefirstname["kenny"] = 1; malefirstname["kevin"] = 1; malefirstname["klaus"] = 1; malefirstname["krister"] = 1; malefirstname["kristofer"] = 1; malefirstname["kuntal"] = 1; malefirstname["kurt"] = 1; malefirstname["kurtis"] = 1; malefirstname["lakshminath"] = 1; malefirstname["lance"] = 1; malefirstname["larry"] = 1; malefirstname["lars"] = 1; malefirstname["lars-ake"] = 1; malefirstname["lars-erik"] = 1; malefirstname["leo"] = 1; malefirstname["leon"] = 1; malefirstname["leonard"] = 1; malefirstname["linus"] = 1; malefirstname["loa"] = 1; malefirstname["lou"] = 1; malefirstname["louis"] = 1; malefirstname["louis-nicolas"] = 1; malefirstname["luis"] = 1; malefirstname["luke"] = 1; malefirstname["lyndon"] = 1; malefirstname["madjid"] = 1; malefirstname["magnus"] = 1; malefirstname["marc"] = 1; malefirstname["marcello"] = 1; malefirstname["marco"] = 1; malefirstname["marcus"] = 1; malefirstname["mark"] = 1; malefirstname["markku"] = 1; malefirstname["markus"] = 1; malefirstname["marshall"] = 1; malefirstname["martin"] = 1; malefirstname["marvin"] = 1; malefirstname["masataka"] = 1; malefirstname["mathias"] = 1; malefirstname["mats"] = 1; malefirstname["matt"] = 1; malefirstname["matthew"] = 1; malefirstname["max"] = 1; malefirstname["maximilian"] = 1; malefirstname["michael"] = 1; malefirstname["michel"] = 1; malefirstname["mick"] = 1; malefirstname["miguel"] = 1; malefirstname["miguel-angel"] = 1; malefirstname["mika"] = 1; malefirstname["mikael"] = 1; malefirstname["mike"] = 1; malefirstname["mikhail"] = 1; malefirstname["mitchell"] = 1; malefirstname["mohamad"] = 1; malefirstname["mohamed"] = 1; malefirstname["mohan"] = 1; malefirstname["mohit"] = 1; malefirstname["morgan"] = 1; malefirstname["nathan"] = 1; malefirstname["nathaniel"] = 1; malefirstname["neal"] = 1; malefirstname["ned"] = 1; malefirstname["neil"] = 1; malefirstname["nevil"] = 1; malefirstname["nevin"] = 1; malefirstname["nic"] = 1; malefirstname["nick"] = 1; malefirstname["nico"] = 1; malefirstname["nicolas"] = 1; malefirstname["nigel"] = 1; malefirstname["nikolaj"] = 1; malefirstname["nils"] = 1; malefirstname["noel"] = 1; malefirstname["norm"] = 1; malefirstname["norman"] = 1; malefirstname["north"] = 1; malefirstname["olaf"] = 1; malefirstname["olafur"] = 1; malefirstname["oliver"] = 1; malefirstname["olivier"] = 1; malefirstname["osama"] = 1; malefirstname["oskar"] = 1; malefirstname["osmund"] = 1; malefirstname["owen"] = 1; malefirstname["paolo"] = 1; malefirstname["parviz"] = 1; malefirstname["pascal"] = 1; malefirstname["pasi"] = 1; malefirstname["patrick"] = 1; malefirstname["patrik"] = 1; malefirstname["paul"] = 1; malefirstname["pavel"] = 1; malefirstname["pedro"] = 1; malefirstname["pekka"] = 1; malefirstname["per"] = 1; malefirstname["perry"] = 1; malefirstname["pete"] = 1; malefirstname["peter"] = 1; malefirstname["petri"] = 1; malefirstname["phil"] = 1; malefirstname["philip"] = 1; malefirstname["philipp"] = 1; malefirstname["phill"] = 1; malefirstname["phillip"] = 1; malefirstname["pierre"] = 1; malefirstname["raj"] = 1; malefirstname["rajeev"] = 1; malefirstname["ralph"] = 1; malefirstname["rami"] = 1; malefirstname["ramon"] = 1; malefirstname["ran"] = 1; malefirstname["randall"] = 1; malefirstname["randy"] = 1; malefirstname["ray"] = 1; malefirstname["raymond"] = 1; malefirstname["reinaldo"] = 1; malefirstname["reiner"] = 1; malefirstname["rene"] = 1; malefirstname["rich"] = 1; malefirstname["richard"] = 1; malefirstname["rick"] = 1; malefirstname["rickard"] = 1; malefirstname["rob"] = 1; malefirstname["robert"] = 1; malefirstname["rodney"] = 1; malefirstname["roger"] = 1; malefirstname["rohan"] = 1; malefirstname["roland"] = 1; malefirstname["rolf"] = 1; malefirstname["rolland"] = 1; malefirstname["ron"] = 1; malefirstname["ronald"] = 1; malefirstname["ross"] = 1; malefirstname["roy"] = 1; malefirstname["rudolf"] = 1; malefirstname["russ"] = 1; malefirstname["russel"] = 1; malefirstname["russell"] = 1; malefirstname["ryan"] = 1; malefirstname["sam"] = 1; malefirstname["sami"] = 1; malefirstname["samuel"] = 1; malefirstname["scott"] = 1; malefirstname["sean"] = 1; malefirstname["sebastian"] = 1; malefirstname["shawn"] = 1; malefirstname["simon"] = 1; malefirstname["skip"] = 1; malefirstname["spencer"] = 1; malefirstname["stan"] = 1; malefirstname["stanislav"] = 1; malefirstname["stanley"] = 1; malefirstname["stefaan"] = 1; malefirstname["stefan"] = 1; malefirstname["stefano"] = 1; malefirstname["stephan"] = 1; malefirstname["stephen"] = 1; malefirstname["steqven"] = 1; malefirstname["steve"] = 1; malefirstname["steven"] = 1; malefirstname["stewart"] = 1; malefirstname["stig"] = 1; malefirstname["stuart"] = 1; malefirstname["suresh"] = 1; malefirstname["sven"] = 1; malefirstname["tatu"] = 1; malefirstname["ted"] = 1; malefirstname["tero"] = 1; malefirstname["terry"] = 1; malefirstname["theodore"] = 1; malefirstname["thierry"] = 1; malefirstname["thomas"] = 1; malefirstname["thommy"] = 1; malefirstname["tim"] = 1; malefirstname["timo"] = 1; malefirstname["timothy"] = 1; malefirstname["tom"] = 1; malefirstname["tomi"] = 1; malefirstname["ton"] = 1; malefirstname["toni"] = 1; malefirstname["tony"] = 1; malefirstname["trevor"] = 1; malefirstname["troy"] = 1; malefirstname["tuomas"] = 1; malefirstname["uri"] = 1; malefirstname["vern"] = 1; malefirstname["vernon"] = 1; malefirstname["vesa"] = 1; malefirstname["vic"] = 1; malefirstname["victor"] = 1; malefirstname["vijay"] = 1; malefirstname["vilho"] = 1; malefirstname["vince"] = 1; malefirstname["vincent"] = 1; malefirstname["vint"] = 1; malefirstname["vladimir"] = 1; malefirstname["walt"] = 1; malefirstname["walter"] = 1; malefirstname["warren"] = 1; malefirstname["wayne"] = 1; malefirstname["werner"] = 1; malefirstname["wesley"] = 1; malefirstname["weston"] = 1; malefirstname["wilfred"] = 1; malefirstname["will"] = 1; malefirstname["willem"] = 1; malefirstname["william"] = 1; malefirstname["wilson"] = 1; malefirstname["winston"] = 1; malefirstname["wolfgang"] = 1; malefirstname["yakov"] = 1; malefirstname["yoshihiro"] = 1; malefirstname["yves"] = 1; femalefirstname["alice"] = 1; femalefirstname["alison"] = 1; femalefirstname["allison"] = 1; femalefirstname["amy"] = 1; femalefirstname["angela"] = 1; femalefirstname["anita"] = 1; femalefirstname["anja"] = 1; femalefirstname["ann"] = 1; femalefirstname["anna"] = 1; femalefirstname["anne"] = 1; femalefirstname["annette"] = 1; femalefirstname["barbara"] = 1; femalefirstname["betty"] = 1; femalefirstname["carol"] = 1; femalefirstname["cathy"] = 1; femalefirstname["cecilia"] = 1; femalefirstname["celeste"] = 1; femalefirstname["cheryl"] = 1; femalefirstname["claire"] = 1; femalefirstname["claudia"] = 1; femalefirstname["cornelia"] = 1; femalefirstname["cristina"] = 1; femalefirstname["cyndi"] = 1; femalefirstname["cynthia"] = 1; femalefirstname["dana"] = 1; femalefirstname["debbie"] = 1; femalefirstname["debby"] = 1; femalefirstname["deborah"] = 1; femalefirstname["debra"] = 1; femalefirstname["deirdre"] = 1; femalefirstname["diana"] = 1; femalefirstname["donna"] = 1; femalefirstname["dorothy"] = 1; femalefirstname["eleanor"] = 1; femalefirstname["elena"] = 1; femalefirstname["elisabetta"] = 1; femalefirstname["elizabeth"] = 1; femalefirstname["elise"] = 1; femalefirstname["ellen"] = 1; femalefirstname["emily"] = 1; femalefirstname["eva"] = 1; femalefirstname["evan"] = 1; femalefirstname["eve"] = 1; femalefirstname["faye"] = 1; femalefirstname["geraldine"] = 1; femalefirstname["helen"] = 1; femalefirstname["hilarie"] = 1; femalefirstname["ileana"] = 1; femalefirstname["ingrid"] = 1; femalefirstname["irina"] = 1; femalefirstname["janet"] = 1; femalefirstname["jeanette"] = 1; femalefirstname["jeanne"] = 1; femalefirstname["jennifer"] = 1; femalefirstname["jessica"] = 1; femalefirstname["jill"] = 1; femalefirstname["joan"] = 1; femalefirstname["joann"] = 1; femalefirstname["joanne"] = 1; femalefirstname["judith"] = 1; femalefirstname["judy"] = 1; femalefirstname["julie"] = 1; femalefirstname["karen"] = 1; femalefirstname["kathleen"] = 1; femalefirstname["kathryn"] = 1; femalefirstname["laura"] = 1; femalefirstname["lauren"] = 1; femalefirstname["leena"] = 1; femalefirstname["leslie"] = 1; femalefirstname["lila"] = 1; femalefirstname["lili"] = 1; femalefirstname["linda"] = 1; femalefirstname["lisa"] = 1; femalefirstname["lixia"] = 1; femalefirstname["lorna"] = 1; femalefirstname["lorrie"] = 1; femalefirstname["lucy"] = 1; femalefirstname["madelyn"] = 1; femalefirstname["magdalena"] = 1; femalefirstname["marcia"] = 1; femalefirstname["margaret"] = 1; femalefirstname["maria"] = 1; femalefirstname["maria-carmen"] = 1; femalefirstname["marianne"] = 1; femalefirstname["marie"] = 1; femalefirstname["marilyn"] = 1; femalefirstname["marjorie"] = 1; femalefirstname["martha"] = 1; femalefirstname["mary"] = 1; femalefirstname["maryann"] = 1; femalefirstname["maureen"] = 1; femalefirstname["melinda"] = 1; femalefirstname["michelle"] = 1; femalefirstname["monique"] = 1; femalefirstname["nancy"] = 1; femalefirstname["nora"] = 1; femalefirstname["olga"] = 1; femalefirstname["patricia"] = 1; femalefirstname["peggy"] = 1; femalefirstname["petra"] = 1; femalefirstname["radia"] = 1; femalefirstname["rebecca"] = 1; femalefirstname["rosanna"] = 1; femalefirstname["ruth"] = 1; femalefirstname["sally"] = 1; femalefirstname["sandra"] = 1; femalefirstname["sandy"] = 1; femalefirstname["samita"] = 1; femalefirstname["sharon"] = 1; femalefirstname["sheila"] = 1; femalefirstname["silvia"] = 1; femalefirstname["sue"] = 1; femalefirstname["susan"] = 1; femalefirstname["suzanne"] = 1; femalefirstname["tanja"] = 1; femalefirstname["tracy"] = 1; femalefirstname["trudy"] = 1; femalefirstname["vicky"] = 1; femalefirstname["wendy"] = 1; } function initstats() { nbaddrafts = 0; nbadauthorlines = 0; nbadauthoremaillines = 0; ndrafts = 0; nauthors = 0; nietfdrafts = 0; nirtfdrafts = 0; niabdrafts = 0; niesgdrafts = 0; nindividualdrafts = 0; nauthoroccurrences = 0; ncountries = 0; ncompanies = 0; nunknowncountry = 0; nunknowncompany = 0; euauthors = 0; eudrafts = 0; nalldrafts = 0; nareas = 0; nwgs = 0; nietfdrafts = 0; nirtfdrafts = 0; niesgdrafts = 0; niabdrafts = 0; nrfceddrafts = 0; nindividualdrafts = 0; npostscriptdrafts = 0; npdfdrafts = 0; nnontextformats = 0; nfigures = 0; nformats = 0; nabnfs = 0; nasn1s = 0; ncodes = 0; nxmls = 0; nkwds = 0; nsecconss = 0; nianaconss = 0; nerratas = 0; startyear = 0; endyear = 0; nsorts = 0; } function html_file_css_head(title,csshtmlfile,slogan) { csshtmlfileseensection[csshtmlfile] = 0; if (substr(csshtmlfile,1,5) != "/dev/") close(csshtmlfile); printf("\n", 34, 34, 34, 34) > csshtmlfile; printf("\n", 34, 34, 34, 34, 34, 34) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n", 34, 34, 34, 34) >> csshtmlfile; printf("\n", 34, 34, 34, 34, 34, 34) >> csshtmlfile; printf("%s\n", title) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("\n", 34, 34, 34, 34) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> htmlfile; printf("\n") >> htmlfile; printf("\n") >> htmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf(" %cIETF%c

\n", 34, 34, 34, 34, 34, 34, 34, 34) >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf("
\n") >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf("
    Docs
\n", 34, 34, 34, 34) >> csshtmlfile; printf("
    IANA
\n", 34, 34, 34, 34) >> csshtmlfile; printf("
    IESG
\n", 34, 34, 34, 34) >> csshtmlfile; printf("
    Lifecycle
\n", 34, 34, 34, 34) >> csshtmlfile; printf("
    RFC Ed
\n", 34, 34, 34, 34) >> csshtmlfile; printf("
    Misc
\n", 34, 34, 34, 34) >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("

%s

\n", title) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("“%s”\n", slogan) >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n", 34, 34) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n\n\n") >> htmlfile; printf("\n") >> htmlfile; printf("\n") >> htmlfile; printf("
\n", 34, 34) >> csshtmlfile; } function html_file_css_section(title,csshtmlfile) { if (csshtmlfileseensection[csshtmlfile] != 0) { printf("\n
\n", 34, 34) >> csshtmlfile; } csshtmlfileseensection[csshtmlfile] = 1; printf("\n

%s

\n\n", title) >> csshtmlfile; } function html_file_css_section_nextcol(title,csshtmlfile) { csshtmlfileseensection[csshtmlfile] = 1; printf("\n
\n", 34, 34) >> csshtmlfile; printf("\n

%s

\n\n", title) >> csshtmlfile; } function html_file_css_end(htmlfile) { printf("
\n") >> htmlfile; printf("
\n") >> htmlfile; printf("
\n") >> htmlfile; printf("
\n") >> htmlfile; printf("\n") >> htmlfile; printf("\n") >> htmlfile; close(htmlfile); } function checkareaacronym(a) { a = tolower(a); if (areaacronym[a] == "") { printf("authorstats: Fatal error -- area %s acronym unknown\n", a); exit(1); } } function fontify(f,x) { if (f == "") return(x); else return(sprintf("%s", 34, f, 34, x)); } function removever(x) { sub(/-[0-9][0-9][.]txt/,"",x); return(x); } function docprocessref(draftnam) { if (docgettype(draftnam) == "rfc") { if (!realdraftnameknown[draftnam]) { realdraftnameknown[draftnam] = 1; tmpreffile = "/tmp/docprocref.txt"; system("getrfcdraftname --nover " draftnam " > " tmpreffile); getline realname < tmpreffile; close(tmpreffile); realdraftname[draftnam] = realname; } if (realdraftname[draftnam] == "") { return(fontify("","(no timeline available)")); } else { return(fontify("",sprintf("(timeline)", 34, realdraftname[draftnam], 34))); } } else if (docgettype(draftnam) == "draft") { return(fontify("",sprintf("(timeline)", 34, removever(draftnam), 34))); } else { return(""); } } BEGIN { FS=":"; hrefprefix = "../allstats/"; quote = sprintf("%c",34); inittabs(); initstats(); activeauthor = 1; activecompany = 1; ntopcompanies = 15 ntopcountries = 14; minimumcompletion = 0.1; if (doctype == "draft") { docname = "draft"; docnameu = "Draft"; docprefix = "http://tools.ietf.org/html/"; } else if (doctype == "rfc") { docname = "RFC"; docnameu = "RFC"; docprefix = "http://tools.ietf.org/html/"; } else if (doctype == "all") { docname = "document"; docnameu = "Document"; docprefix = "http://tools.ietf.org/html/"; } else { print("authorstats: Unrecognized doctype\n"); exit(1); } } /^0-wgareadefinition:/ { if (isarea[$2] == "") { isarea[$2] = $2; nareas++; checkareaacronym($2); } iswg[$3] = $3; nwgs++; wgarea[$3] = $2; next; } /^0-population:/ { if (population[$2] != "") { printf("authorstats: Redefinition of population for %s in Wikipedia from line %d -- exit\n", $2,$4); exit(1); } population[$2] = $3; if (isineu[$2]) { population["european union"] += $3; } next; } /^9-debug:/ { next; } /^z1-ad:/ { if (isarea[$2] == "") { printf("authorstats: error - area director defined for unknown area (%s)\n", $2); } if (authorad[$3] != "") { printf("authorstats: error - %s can not be area director for both %s and %s\n", $3, $2, authorad[$3]); } if (isauthor[$3] == "") { #printf("authorstats: area director %s is an unknown author (not published anything)\n", $3); } authorad[$3] = $2; next; } /^z2-chair:/ { if (iswg[$2] == "") { printf("authorstats: error - chair defined for unknown wg (%s)\n", $2); } if (isauthor[$3] == "") { #printf("authorstats: chair %s is an unknown author (not published anything)\n", $3); } if (authorchairs[$3] != "") authorchairs[$3] = authorchairs[$3] "%"; authorchairs[$3] = authorchairs[$3] $2; next; } /^z3-iab:/ { if (isauthor[$2] == "") { #printf("authorstats: iab member %s is an unknown author (not published anything)\n", $2); } authoriab[$2] = $2; next; } /^z4-iaoc:/ { if (isauthor[$2] == "") { #printf("authorstats: iaoc member %s is an unknown author (not published anything)\n", $2); } authoriaoc[$2] = $2; next; } /UNRECOGNIZED/ { allstatinc($1,""); baddraft($1); badauthoremailline($1,"all"); next; } /.*/ { #printf("authorstats: debug: Processing line %s...\n", $0); draft = $1; author = $2; company = $3; email = $4; country = $5; pages = $6; month = $7; year = $8; day = $9 features = $10; goodstatinc(draft,author,country,company,pages,month,year,features); addauthor(draft,author); if (company == "UNKNOWN") badauthorline($1,$2,"unknown affiliation"); if (country == "UNKNOWN") badauthorline($1,$2,"unknown location"); if (pages == "UNKNOWN") badauthorline($1,$2,"unknown page count"); if (month == "UNKNOWN") badauthorline($1,$2,"unknown publication month"); if (year == "UNKNOWN") badauthorline($1,$2,"unknown publication year"); if (email == "UNKNOWN") badauthoremailline($1,$2); } END { #printf("authorstats: debug: End of input...\n", $0); finalizeauthors(); calculatelastyear(); reportbaddrafts("baddrafts.html"); reportbadauthorlines("badauthors.html"); reportbadauthoremaillines("bademails.html"); reportoverall("index.html"); reportindividualauthors("authors.html"); reportindividualcompanies("companies.html"); reportindividualcountries("countries.html"); } function calculatelastyear() { tmpfile = "/tmp/as.date"; system("rm -f " tmpfile); system("date +%Y > " tmpfile); system("date +%m >> " tmpfile); system("date +%d >> " tmpfile); getline nowyear < tmpfile; getline nowmonth < tmpfile; getline nowday < tmpfile; if (nowyear < 2000 || nowyear > 3000) { printf("authorstats: error - can not find out current year -- exit\n"); exit(1); } if (nowmonth < 1 || nowmonth > 12) { printf("authorstats: error - can not find out current month -- exit\n"); exit(1); } if (nowday < 1 || nowday > 31) { printf("authorstats: error - can not find out current day -- exit\n"); exit(1); } lastyearcompletion = (nowmonth - 1) / 12.0 + ((nowday - 1) / 31.0) / 12.0; #printf("authorstats: lastyear is %f complete (%d %d %d)\n", lastyearcompletion, nowyear, nowmonth, nowday); system("rm -f " tmpfile); } function addauthor(draft,author) { if (draft in draftauthors) { draftauthors[draft] = draftauthors[draft] "%" author; draftnauthors[draft]++; } else { draftauthors[draft] = author; draftnauthors[draft] = 1; } if (author in authordrafts) { #printf("authorstats: debug: Author %s known, adding doc %s...\n", author, draft); authordrafts[author] = authordrafts[author] "%" draft; authorndrafts[author]++; #printf("authorstats: debug: Doc list for him now %s...\n", authordrafts[author]); } else { #printf("authorstats: debug: Author %s not known, adding doc %s...\n", author, draft); authordrafts[author] = draft; authorndrafts[author] = 1; } } function allstatinc(draft,author) { if (isdraft[draft] == "") { isdraft[draft] = draft; nalldrafts = nalldrafts + 1; } #nbadauthorlines = nbadauthorlines + 1; } function goodstatinc(draft,author,country,company,pages,month,year,features) { if (isdraft[draft] == "") { isdraft[draft] = draft; ndrafts = ndrafts + 1; nalldrafts = nalldrafts + 1; draftpagecounts[draft] = pages; if (month != "UNKNOWN") { draftmonths[draft] = month; } if (year != "UNKNOWN") { if (year <= 0 || year > 2020) { printf("authorstats: Invalid year (%s) for %s -- exit\n", year, draft); exit(1); } draftyears[draft] = year; if (startyear == 0 || year < startyear) startyear = year; if (endyear == 0 || year > endyear) endyear = year; } if (pagecounts[pages] == "") { pagecounts[pages] = 1; } else { pagecounts[pages] = pagecounts[pages] + 1; } if (draft ~ /^draft-ietf-/) { nietfdrafts++; } else if (draft ~ /^draft-irtf-/) { nirtfdrafts++; } else if (draft ~ /^draft-iesg-/) { niesgdrafts++; } else if (draft ~ /^draft-iab-/) { niabdrafts++; } else if (draft ~ /^draft-rfc/) { nrfceddrafts++; } else { nindividualdrafts++; } if (index(features,"postscript")) { npostscriptdrafts++; } if (index(features,"pdf")) { npdfdrafts++; } if (index(features,"postscript") || index(features,"pdf")) { nnontextformats++; } if (index(features,"figure")) { nfigures++; } if (index(features,"format")) { nformats++; } if (index(features,"abnf")) { nabnfs++; } if (index(features,"kwd")) { nkwds++; } if (index(features,"asn1")) { nasn1s++; } if (index(features,"code")) { ncodes++; } if (index(features,"xml")) { nxmls++; } if (index(features,"seccons")) { nsecconss++; } if (index(features,"ianacons")) { nianaconss++; } if (index(features,"errata")) { nerratas++; } } lyear = (year == "UNKNOWN" ? 0 : year); if (isauthor[author] == "") { isauthor[author] = author; nauthors = nauthors + 1; nallauthors = nallauthors + 1; newauthor = 1; } else { newauthor = 0; } if (country != "UNKNOWN" && country != "") { if (iscountry[country] == "") { iscountry[country] = country; countryauthors[country] = 0; countrydrafts[country] = 0; ncountries++; } if (index(countrydraftslist[country],draft) == 0) { countrydrafts[country] = countrydrafts[country] + 1; if (countrydraftslist[country] == "") { countrydraftslist[country] = draft; } else { countrydraftslist[country] = countrydraftslist[country] "%" draft; } if (isineu[country] == 1) eudrafts++; } if (isauthorcountry[author] == "") { isauthorcountry[author] = country; #printf("setting isauthorcountry[%s] = %s\n", author, country); isauthorcountrytime[author] = lyear; } else if (isauthorcountry[author] == country) { if (isauthorcountrytime[author] < lyear) isauthorcountrytime[author] = lyear; } else if (isauthorcountrytime[author] < lyear) { if (index(isauthorcountryothers[author],isauthorcountry[author]) == 0) { if (isauthorcountryothers[author] != "") isauthorcountryothers[author] = isauthorcountryothers[author] "%"; isauthorcountryothers[author] = isauthorcountryothers[author] isauthorcountry[author]; } isauthorcountry[author] = country; #printf("re-setting isauthorcountry[%s] = %s\n", author, country); isauthorcountrytime[author] = lyear; } else if (index(isauthorcountryothers[author],country) == 0) { if (isauthorcountryothers[author] != "") isauthorcountryothers[author] = isauthorcountryothers[author] "%"; isauthorcountryothers[author] = isauthorcountryothers[author] country; } #printf("isauthorcountry[%s] left as %s\n", author, isauthorcountry[author]); # if (newauthor) { # countryauthors[country] = countryauthors[country] + 1; # if (isineu[country] == 1) euauthors++; # if (countryauthorslist[country] == "") { # countryauthorslist[country] = author; # } else { # countryauthorslist[country] = countryauthorslist[country] ", " author; # } # } # } else { # if (newauthor) nunknowncountry++; } else { isauthorcountry[author] = "UNKNOWN"; #printf("setting isauthorcountry[%s] = %s\n", author, "UNKNOWN"); } if (company != "UNKNOWN" && company != "") { company = tolower(company); if (iscompany[company] == "") { iscompany[company] = company; companyauthors[company] = 0; companydrafts[company] = 0; ncompanies++; } if (isauthorcompany[author] == "") { isauthorcompany[author] = company; isauthorcompanytime[author] = lyear; #printf(" first company\n") >> "/tmp/oc.txt"; } else if (isauthorcompany[author] == company) { if (isauthorcompanytime[author] < lyear) isauthorcompanytime[author] = lyear; #printf(" same company\n") >> "/tmp/oc.txt"; } else if (isauthorcompanytime[author] < lyear) { if (index(isauthorcompanyothers[author],isauthorcompany[author]) == 0) { if (isauthorcompanyothers[author] != "") isauthorcompanyothers[author] = isauthorcompanyothers[author] "%"; isauthorcompanyothers[author] = isauthorcompanyothers[author] isauthorcompany[author]; } isauthorcompany[author] = company; isauthorcompanytime[author] = lyear; #printf(" later company\n") >> "/tmp/oc.txt"; } else if (index(isauthorcompanyothers[author],company) == 0) { #printf(" unseen company\n") >> "/tmp/oc.txt"; if (isauthorcompanyothers[author] != "") isauthorcompanyothers[author] = isauthorcompanyothers[author] "%"; isauthorcompanyothers[author] = isauthorcompanyothers[author] company; } #printf(" post-company = %s, others = %s\n", isauthorcompany[author], isauthorcompanyothers[author]) >> "/tmp/oc.txt"; if (index(companydraftslist[company],draft) == 0) { companydrafts[company] = companydrafts[company] + 1; if (companydraftslist[company] == "") { companydraftslist[company] = draft; } else { companydraftslist[company] = companydraftslist[company] "%" draft; } } # if (newauthor) companyauthors[company] = companyauthors[company] + 1; # } else { # if (newauthor) nunknowncompany++; } else { isauthorcompany[author] = "UNKNOWN"; } nauthoroccurrences = nauthoroccurrences + 1; } function finalizeauthors() { #printf("finalizeauthors...\n"); for (author in isauthor) { finalizeauthor(author); } } function finalizeauthor(author) { #printf("finalizeauthor(%s)...\n", author); country = isauthorcountry[author]; if (country != "UNKNOWN" && country != "") { countryauthors[country] = countryauthors[country] + 1; #printf("setting countryauthors[%s] = %d\n", country, countryauthors[country]); if (isineu[country] == 1) euauthors++; if (countryauthorslist[country] == "") { countryauthorslist[country] = author; } else { countryauthorslist[country] = countryauthorslist[country] ", " author; } } else { if (newauthor) nunknowncountry++; } company = isauthorcompany[author]; if (company != "UNKNOWN" && company != "") { companyauthors[company] = companyauthors[company] + 1; } else { nunknowncompany++; } } function badauthorline(draft,author,problem) { nbadauthorlines++; badauthorlines[nbadauthorlines] = author " - " draft ": " problem; } function badauthoremailline(draft,author) { nbadauthoremaillines++; badauthoremaillines[nbadauthoremaillines] = author " - " draft; } function baddraft(s) { nbaddrafts = nbaddrafts + 1; baddrafts[nbaddrafts] = s; } function reportbaddrafts(file) { openhtml(file,"Unrecognized " docname "s"); html_file_css_section("Problems",file); printf("\n") >> file; closehtml(file); } function reportbadauthorlines(file) { openhtml(file,"Unrecognized authors in " docname "s"); html_file_css_section("Problems",file); printf("\n") >> file; closehtml(file); } function reportbadauthoremaillines(file) { openhtml(file,"Unrecognized or incomplete e-mail addresses in " docname "s"); html_file_css_section("Problems",file); printf("\n") >> file; closehtml(file); } function reportindividualcountries(topfile) { openhtml(topfile,"Countries"); printf("\n") >> topfile; closehtml(topfile); } function reportindividualcountry(country,n,file) { openhtml(file,countrycapitalize(country) " Data"); html_file_css_section("Basic Information",file); countrywgs = ""; for (author in isauthorcompany) { if (isauthorcountry[author] == country) { s1 = authorwgs[author]; split(s1,tempwgs,"%"); for (tempwgi in tempwgs) { tempwg = tempwgs[tempwgi]; if (index(countrywgs,tempwg) == 0) { if (countrywgs == "") countrywgs = tempwg; else countrywgs = countrywgs "%" tempwg; } } } } if (doctype == "draft" || doctype == "all") { printwglist(countrywgs,"

The authors in this country appear to be working in the following WG%s: ","

\n",file); } html_file_css_section("Authors",file); printf("

There are %d authors in %s:

\n", n, country) >> file; printf("\n") >> file; closehtml(file); } function capitalize(s) { if (length(s) >= 1 && substr(s,1,1) >= "a" && substr(s,1,1) <= "z") { return(toupper(substr(s,1,1)) substr(s,2)); } else { return(s); } } function countrycapitalize(cn) { if (cn == "usa") return("USA"); cnn = split(cn,cntab," "); cnres = ""; for (cni = 1; cni <= cnn; cni++) { if (cnres != "") cnres = cnres " "; cnres = cnres capitalize(cntab[cni]); } return(cnres); } function areacapitalize(ar) { return(countrycapitalize(ar)); } function companycapitalize(cp) { if (cp == "vpnc" || cp == "nsn" || cp == "ibm" || cp == "att" || cp == "bbn" || cp == "mit" || cp == "sri" || cp == "hp" || cp == "isoc" || cp == "isc" || cp == "dod" || cp == "nec" || cp == "dec" || cp == "mci" || cp == "nasa" || cp == "cnri" || cp == "bt" || cp ~ /^uc/ || cp ~ /^us/ || cp ~ /^ui/) return(toupper(cp)); return(countrycapitalize(cp)); } function coname(s) { if (s == "united arab emirates") s = "arab emirates"; return(s); } function tofname(s) { s = tolower(s) ".html"; gsub(/ /,"",s); gsub(/[()]/,"",s); return(s); } function tofname_comp(s) { s = "c_" tolower(s) ".html"; gsub(/ /,"",s); gsub(/[()]/,"",s); return(s); } function reportindividualcompanies(topfile) { openhtml(topfile,"All companies"); printf("\n") >> topfile; closehtml(topfile); } function startsort(inputfile) { sortfiles[nsorts] = inputfile; sortfile = sprintf("sort%d.tmp",nsorts); nsorts++; if (debug) printf("debug: start sort %d in %s to %s\n", nsorts - 1, sortfile, sortfiles[nsorts-1]); system("rm -f " sortfile); system("touch " sortfile); return(sortfile); } function stopsort(sortfile) { inputfile = sortfiles[--nsorts]; if (debug) printf("debug: stop sort %d in %s to %s\n", nsorts, sortfile, inputfile); close(sortfile); close(inputfile); system("sort < " sortfile " >> " inputfile); close(sortfile); return(inputfile); } function reportindividualcompany(company,repna,repnd,file) { openhtml(file,companycapitalize(company) " Data (" topic ")"); companywgs = ""; for (author in isauthorcompany) { if (isauthorcompany[author] == company) { s1 = authorwgs[author]; split(s1,tempwgs,"%"); for (tempwgi in tempwgs) { tempwg = tempwgs[tempwgi]; if (index(companywgs,tempwg) == 0) { if (companywgs == "") companywgs = tempwg; else companywgs = companywgs "%" tempwg; } } } } html_file_css_section("WGs",file); printwglist(companywgs,"

The authors in this company appear to be working in the following WG%s: ","

\n",file); html_file_css_section("Authors",file); printf("

This company has %d authors:

\n", repna) >> file; printf("\n") >> file; reportlistofdocs("The authors from " company " have", n, compd, file); closehtml(file); } function reportindividualauthors(topfile) { openhtml(topfile,activeauthor == 1 ? "All authors" : "Most active authors"); printf("\n") >> topfile; closehtml(topfile); } function printwglist(string,format1,format2,file) { n = split(string,wgtab,"%"); printf(format1, (n == 1 ? "" : "s")) >> file; if (n == 0) { printf("(none)") >> file; } else { for (q = 1; q <= n; q++) { if (n > 1 && q == n) printf(" and ") >> file; else if (q > 1) printf(", ") >> file; printf("%s", 34, wgtab[q], 34, wgtab[q]) >> file; } } printf(format2, (n == 1 ? "" : "s")) >> file; } function removestring(x,y) { rs = ""; while (length(x) > 0) { if (length(x) >= length(y) && substr(x,1,length(y)) == y) { x = substr(x,length(y) + 1); } else { rs = rs substr(x,1,1); x = substr(x,2); } } return(rs); } function reportindividualauthor(author,n,hisdraftstab,file) { openhtml(file,author " Data (" topic ")"); html_file_css_section("Personal Information",file); split(author,authornameparts," "); if (malefirstname[tolower(authornameparts[1])]) pron = "he"; else if (femalefirstname[tolower(authornameparts[1])]) pron = "she"; else pron = "he or she"; if (isauthorcountry[author] == "") { printf("

The location of this author is unknown ") >> file; } else { printf("

This author is in %s ", 34, hrefprefix, tofname(isauthorcountry[author]), 34, countrycapitalize(isauthorcountry[author])) >> file; } if (isauthorcountrytime[author] != 0) printf(" (as of %d)", isauthorcountrytime[author]) >> file; if (isauthorcountryothers[author] != "") { printf(", previous locations include") >> file; tz = isauthorcountryothers[author]; tz = removestring(tz,isauthorcountry[author] "%"); tz = removestring(tz,"%" isauthorcountry[author]); cn = split(tz,compo,"%"); for (oi in compo) { if (oi == 1) { } else if (oi == cn - 1) { printf(" and") >> file; } else { printf(",") >> file; } printf(" %s", 34, hrefprefix, tofname(compo[oi]), 34, countrycapitalize(compo[oi])) >> file; } } printf(".\n") >> file; if (isauthorcompany[author] == "") { printf("It is not known where this author works in. ") >> file; } else { printf("This author works for %s", 34, hrefprefix, tofname_comp(isauthorcompany[author]), 34, capitalize(isauthorcompany[author])) >> file; } if (isauthorcompanytime[author] != 0) printf(" (as of %d)", isauthorcompanytime[author]) >> file; printf(". \n") >> file; if (isauthorcompanyothers[author] != "") { printf("Previous employers include") >> file; tz = isauthorcompanyothers[author]; tz = removestring(tz,isauthorcompany[author] "%"); tz = removestring(tz,"%" isauthorcompany[author]); cn = split(tz,compo,"%"); for (oi in compo) { if (oi == 1) { } else if (oi == cn - 1) { printf(" and") >> file; } else { printf(",") >> file; } printf(" %s", 34, hrefprefix, tofname_comp(compo[oi]), 34, capitalize(compo[oi])) >> file; } printf(".\n") >> file; } printf("

\n") >> file; if (doctype == "draft" || doctype == "all") { printwglist(authorwgs[author],"

The working group%s where " pron " is active appear to be ",".\n",file); } if (authorchairs[author] != "") { printwglist(authorchairs[author],capitalize(pron) " chairs the "," working group%s.\n",file); } if (authorad[author] != "") { adstats = sprintf("(click here for statistics related to this role)", 34, capitalize(authornameparts[2]), capitalize(authornameparts[1]), 34); printf(capitalize(pron) " is an area director for the %s area %s.\n", authorad[author], adstats) >> file; } if (authoriab[author] != "") { printf(capitalize(pron) " is in the IAB.\n") >> file; } if (authoriaoc[author] != "") { printf(capitalize(pron) " is in the IAOC.\n") >> file; } printf("

\n") >> file; reportlistofdocs(capitalize(pron) " has", n, hisdraftstab, file); closehtml(file); } function reportlistofdocs(who,n,hisdraftstab,file) { if (doctype == "all") { nd = 0; nr = 0; for (adraft in hisdraftstab) { if (docgettype(hisdraftstab[adraft]) == "rfc") { nr++; } else { nd++; } } #printf("authorstats: debug: Reporting author with nr=%d and nd=%d...\n", nr, nd); html_file_css_section("RFCs",file); if (nr == 0) { printf("

%s no RFCs.

\n", who) >> file; } else { printf("

%s the following %d RFCs:

\n", who, nr) >> file; printf("\n") >> file; close(file); system("rm -f docslisttmp.txt"); } html_file_css_section_nextcol("Drafts",file); if (nd == 0) { printf("

%s no drafts.

\n", who) >> file; } else { printf("

%s the following %d drafts:

\n", who, nd) >> file; printf("\n") >> file; close(file); system("rm -f docslisttmp.txt"); } } else { html_file_css_section(docname "s",file); printf("

%s the following %d %ss:

\n", who, n, docname) >> file; printf("\n") >> file; close(file); system("rm -f docslisttmp.txt"); } } function reportoverall(file) { openhtml(file,"IETF document statistics (" topic ")"); html_file_css_section(docnameu "s", file); printf("

Total number of %ss is %d.\n", docname, nalldrafts) >> file; if (doctype == "draft") { printf("Of these %d (%.2f%%) are individual %ss, %d (%.2f%%) IETF, %d (%.2f%%) IRTF, %d (%.2f%%) IAB, %d (%.2f%%) RFC Editor and %d (%.2f%%) IESG drafts.\n", nindividualdrafts, (100.0 * nindividualdrafts) / ndrafts, docname, nietfdrafts, (100.0 * nietfdrafts) / ndrafts, nirtfdrafts, (100.0 * nirtfdrafts) / ndrafts, niabdrafts, (100.0 * niabdrafts) / ndrafts, nrfceddrafts, (100.0 * nrfceddrafts) / ndrafts, niesgdrafts, (100.0 * niesgdrafts) / ndrafts) >> file; } printf("

\n") >> file; printf("

The distribution of %s ", docname) >> file; if (doctype == "draft") { printf("according to version numbers is shown here,\n", 34, 34) >> file; reportversions(); } printf("and according to number of authors here,\n", 34, 34) >> file; reportauthors(); printf("%s page count distribution looks like this.\n", docnameu, 34, 34) >> file; reportpages(); printf("The used document format types and content features are shown here.\n", 34, 34) >> file; reportformats(); if (doctype == "rfc") { printf("The growth of publication rate per year is shown here.\n", 34, 34) >> file; reportpubyears(); } else if (doctype == "draft") { printf("The publication rate per month is shown here.\n", 34, 34) >> file; reportpubyears(); } else { # do nothing } printf("

\n") >> file; html_file_css_section("Authors", file); printf("

The distribution of authors according to the number of %ss\n", docname) >> file; printf("they have is shown here.\n", 34, 34) >> file; reportauthoractivities(0); if (doctype == "draft" || doctype == "all") { printf("The same distribution for IETF (or other official) %ss is shown here.\n", docname, 34, 34) >> file; reportauthoractivities(1); printf("The authors work in one or multiple WGs according this distribution.\n", 34, 34) >> file; reportauthorcrosswg(); printf("The same distribution for cross-area work is here.\n", 34, 34) >> file; reportauthorcrossarea(); printf("Most active authors per area can be seen here.\n", 34, 34) >> file; reportauthoractivitiesperarea(); } printf("

\n") >> file; if (doctype == "draft" || doctype == "all") { html_file_css_section("Areas and WGs", file); printf("

The distribution of WGs per areas is shown in here.\n", 34, 34) >> file; reportareawgs(); printf("The distribution of %ss according to WGs is shown here,\n", docname, 34, 34) >> file; reportwgs(); printf("and according to areas is shown here.\n", 34, 34) >> file; reportareas(); printf("

\n") >> file; } html_file_css_section("Affiliations", file); printf("

Authors come from these companies, %d different companies in total. \n", 34, 34, ncompanies) >> file; reportcompanies(); if (endyear - startyear > 2 && topic != "active I-Ds") { printf("Over the years, the situation has changed like this\n", 34, 34) >> file; reporttopcompanies(0); # Normalization: OK printf(" (normalized).\n", 34, 34) >> file; reporttopcompanies(1); } printf("

\n") >> file; html_file_css_section_nextcol("Countries and Continents", file); reportcountries_draft(0); if (endyear - startyear > 2 && topic != "active I-Ds") { printf("Over the years, the situation has developed like this", 34, 34) >> file; reporttopcountries(0); # Normalization: OK printf(" (normalized)", 34, 34) >> file; reporttopcountries(1); printf(".\n") >> file; } printf("Considering EU as a country, the distribution would look like this instead.\n", 34, 34) >> file; reportcountries_draft(1); printf("The same distribution for continents can be found here.\n", 34, 34) >> file; reportcontinents_draft(); if (endyear - startyear > 2 && topic != "active I-Ds") { printf("Over the years, the situation has developed like this\n", 34, 34) >> file; reporttopcontinents(0); # Normalization: OK printf(" (normalized)", 34, 34) >> file; reporttopcontinents(1); printf(".\n") >> file; } printf("

Authors come from these countries.\n", 34, 34, ncountries) >> file; reportcountries(0); printf("Considering EU as a country, the distribution would look like this instead.\n", 34, 34) >> file; reportcountries(1); printf("The same distribution for continents can be found here.\n", 34, 34) >> file; reportcontinents(); printf("

\n") >> file; html_file_css_section("Detailed data", file); printf("

All active authors can be seen here.\n", 34, 34) >> file; printf("Countries can be seen here.\n", 34, 34) >> file; printf("And active companies can be seen here.

\n", 34, 34) >> file; html_file_css_section("Data Freshness and Reliability", file); printf("

Out of the total %d %ss, %d or %.2f%% are unparseable by this tool.\n", nalldrafts, docname, nalldrafts - ndrafts, (100.0 * (nalldrafts - ndrafts)) / nalldrafts) >> file; printf("See here for a list of %ss with problems.\n", 34, 34, docname) >> file; printf("Out of a total of %d authors entries, %d or %.2f%% had limited information. ", nauthoroccurrences, nbadauthorlines, (100.0 * nbadauthorlines) / nauthoroccurrences) >> file; printf("See here for a list of authors with problems. \n", 34, 34) >> file; printf("See here for a list of %ss with incomplete or non-working e-mail addresses. \n", 34, 34, docname) >> file; printf("Warning: See the tool description for detailed information about what can or can not be assumed about the quality of the results.

\n", 34, 34) >> file; closehtml(file); } function reportcontinents() { base = "contdistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of authors on continents"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

Location data is calculated from the first occurrence of an author.

\n") >> htmlfile; closehtml(htmlfile); } function reporttopcompanies(normalized) { base = "companydistrhist"; if (normalized) base = base "_norm"; htmlfile = base ".html"; openhtml(htmlfile,"Comparison of companies over the years"); delete reporttopcompanies_seencompany; delete multifunctiontrack; for (company in topcompanies) { multifunctiontrack[company] = company; } delete multifunctiontab; for (year = startyear; year <= endyear; year++) { yeardata = ""; for (company in topcompanies) { n = 0; split(companydraftslist[company],compd,"%"); for (drafti in compd) { draft = compd[drafti]; if (year == draftyears[draft]) { n++; } } if (reporttopcompanies_seencompany[company] != "" || n != 0) { if (reporttopcompanies_seencompany[company] == "") { if (multifunctiontab[year-1] != "") { multifunctiontab[year-1] = multifunctiontab[year-1] "%"; } multifunctiontab[year-1] = multifunctiontab[year-1] company ":0"; } if (yeardata != "") yeardata = yeardata "%"; if (year == nowyear) { if (lastyearcompletion >= minimumcompletion) { yeardata = yeardata company ":" (n * (1/lastyearcompletion)); } } else { yeardata = yeardata company ":" n; } reporttopcompanies_seencompany[company] = "seen"; } } multifunctiontab[year] = yeardata; } mavgalpha = 0.4; genericmultifunctionreport(base,htmlfile,startyear,endyear,0,normalized,mavgalpha, "Comparison of companies over the years","Year", normalized ? "% of " docnameu "s" : "# of " docnameu "s"); printf("

This tracks publication of %ss with authors from most active companies per year. Company data is calculated from the first occurrence of an author. ISI has been excluded from this graph, to make it clearer.\n", docname) >> htmlfile; if (normalized) printf("The graph is normalized to 100%% representing sum of the top companies.\n") >> htmlfile; printf("The data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile; if (lastyearcompletion >= minimumcompletion) { printf("The last year (%d) has been adjusted assuming the same publication rates as has happened during first part of the year (now %.2f %% complete).\n", nowyear, 100 * lastyearcompletion) >> htmlfile; } else { printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile; } printf("

\n") >> htmlfile; closehtml(htmlfile); } function reporttopcountries(normalized) { base = "countrydistrhist"; if (normalized) base = base "_norm"; htmlfile = base ".html"; if (normalized) openhtml(htmlfile,"Comparison of countries over the years"); else openhtml(htmlfile,"Comparison of countries over the years (normalized)"); # # Calculate the main results # delete reporttopcountries_seencountry; delete multifunctiontrack; for (country in topcountries) { multifunctiontrack[country] = country; } delete multifunctiontab; for (year = startyear; year <= endyear; year++) { yeardata = ""; for (country in topcountries) { n = 0; split(countrydraftslist[country],compd,"%"); for (drafti in compd) { draft = compd[drafti]; if (year == draftyears[draft]) { n++; } } if (reporttopcountries_seencountry[country] != "" || n != 0) { if (reporttopcountries_seencountry[country] == "") { if (multifunctiontab[year-1] != "") { multifunctiontab[year-1] = multifunctiontab[year-1] "%"; } multifunctiontab[year-1] = multifunctiontab[year-1] country ":0"; } if (yeardata != "") yeardata = yeardata "%"; if (year == nowyear) { if (lastyearcompletion >= minimumcompletion) { yeardata = yeardata country ":" (n * (1/lastyearcompletion)); } } else { yeardata = yeardata country ":" n; } reporttopcountries_seencountry[country] = "seen"; } } multifunctiontab[year] = yeardata; } # # Output the graphs # mavgalpha = 0.4; genericmultifunctionreport(base,htmlfile,startyear,endyear,1,normalized,mavgalpha, "Comparison of countries over the years","Year", normalized ? ("% of " docnameu "s") : ("# of " docnameu "s")); printf("

This tracks publication of %ss with authors from a given country. Country data is calculated from the first occurrence of an author.\n", docname) >> htmlfile; printf("The scale is logarithmic, ") >> htmlfile; if (normalized) printf("normalized to 100%% representing sum of the top countries, ") >> htmlfile; printf("and data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile; if (lastyearcompletion >= minimumcompletion) { printf("The last year (%d) has been adjusted assuming the same publication rates as has happened during first part of the year (now %.2f %% complete).\n", nowyear, 100 * lastyearcompletion) >> htmlfile; } else { printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile; } printf("

\n") >> htmlfile; closehtml(htmlfile); } function reporttopcontinents(normalized) { base = "d-contdistrhist"; if (normalized) base = base "_norm"; htmlfile = base ".html"; openhtml(htmlfile,"Comparison of continents over the years"); delete reporttopcontinents_seencontinent; delete multifunctiontrack; for (continent in continentdrafts) { multifunctiontrack[continent] = continent; } delete multifunctiontab; for (year = startyear; year <= endyear; year++) { yeardata = ""; for (continent in continentdrafts) { n = 0; split(continentdraftslist[continent],compd,"%"); for (drafti in compd) { draft = compd[drafti]; if (year == draftyears[draft]) { n++; } } if (reporttopcontinents_seencontinent[continent] != "" || n != 0) { if (reporttopcontinents_seencontinent[continent] == "") { if (multifunctiontab[year-1] != "") { multifunctiontab[year-1] = multifunctiontab[year-1] "%"; } multifunctiontab[year-1] = multifunctiontab[year-1] continent ":0"; } if (yeardata != "") yeardata = yeardata "%"; if (year == nowyear) { if (lastyearcompletion >= minimumcompletion) { yeardata = yeardata continent ":" (n * (1/lastyearcompletion)); } } else { yeardata = yeardata continent ":" n; } reporttopcontinents_seencontinent[continent] = "seen"; } } multifunctiontab[year] = yeardata; } mavgalpha = 0.4; genericmultifunctionreport(base,htmlfile,startyear,endyear,1,normalized,mavgalpha, "Comparison of continents over the years","Year", normalized ? "% of " docnameu "s" : "# of " docnameu "s"); printf("

This tracks publication of %ss with authors from a given continent.\n", docname) >> htmlfile; printf("Location data is calculated from the first occurrence of an author.\n") >> htmlfile; printf("The scale is logarithmic, ") >> htmlfile; if (normalized) printf("normalized to 100%% representing sum of the top companies, ") >> htmlfile; printf("and data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile; if (lastyearcompletion >= minimumcompletion) { printf("The last year (%d) has been adjusted assuming the same\n", nowyear) >> htmlfile; printf("publication rates as has happened during first part of the year (now %.2f %% complete).\n", 100 * lastyearcompletion) >> htmlfile; } else { printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile; } printf("

\n") >> htmlfile; closehtml(htmlfile); } function genericmultifunctionreport(base,htmlfile,startx,endx,islog,isnormalized,mavgalpha,title,xlabel,ylabel) { pngfile = base ".png"; epsfile = base ".eps"; datafilebase = "-" base ".dat"; gplfile = base".txt"; html_file_css_section("Statistics",htmlfile); printf("%cstatistics%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); for (track in multifunctiontrack) { trackfile = track datafilebase; printf("\n") > trackfile; } # # Calculate the per-x sums, if normalization needed # delete multifunctionsum; for (i = startx; i <= endx; i++) { sum = 0; e = multifunctiontab[i]; split(e,multifunctionint,"%"); for (vi in multifunctionint) { v = multifunctionint[vi]; split(v,multifunctionint2,":"); y = multifunctionint2[2]; sum += y; } multifunctionsum[i] = sum; } # # Output the main per-track plotting data # maxy = 0; for (i = startx; i <= endx; i++) { e = multifunctiontab[i]; split(e,multifunctionint,"%"); for (vi in multifunctionint) { v = multifunctionint[vi]; split(v,multifunctionint2,":"); track = multifunctionint2[1]; y = multifunctionint2[2]; if (!(track in multifunctiontrack)) { printf("authorstats: Error: track %s is unknown at %d for %s -- exit\n", track, i, htmlfile); exit(1); } trackfile = track datafilebase; if (isnormalized) { printf("%d %6.4f\n", i, multifunctionsum[i] == 0 ? 0.0 : (y * 100.0) / multifunctionsum[i]) >> trackfile; } else { printf("%d %d\n", i, y) >> trackfile; } if (y > maxy) maxy = y; } } for (track in multifunctiontrack) { trackfile = track datafilebase; close(trackfile); } if (mavgalpha > 0.0) { for (track in multifunctiontrack) { trackfile = track datafilebase; origtrackfile = "orig-" trackfile; cmd = sprintf("mv %c%s%c %c%s%c", 34, trackfile, 34, 34, origtrackfile, 34); #printf("doing %s...\n", cmd); system(cmd); mavg(origtrackfile,trackfile,mavgalpha); } } printf("set output %c%s%c\n", 34, epsfile, 34) > gplfile; #printf("set terminal png\n") >> gplfile; printf("set terminal postscript eps enhanced %cTimes-Roman%c 28 color solid\n", 34, 34) >> gplfile; printf("set data style lines\n") >> gplfile; #printf("set data style linespoints\n") >> gplfile; #printf("set style linespoints linewidth 4\n") >> gplfile; printf("set title %c%s%c\n", 34, title, 34) >> gplfile; printf("set xlabel %c%s%c\n", 34, xlabel, 34) >> gplfile; printf("set ylabel %c%s%c\n", 34, ylabel, 34) >> gplfile; printf("set size 2.7,2.5\n") >> gplfile; #printf("set size 1.7,1.5\n") >> gplfile; if (islog) { printf("set logscale y 2\n") >> gplfile; if (isnormalized) { printf("set yrange [ 0.5 : 100]\n") >> gplfile; } else { printf("set yrange [ 1 : %d]\n", roundup(maxy)) >> gplfile; } printf("set ytics ( %c1%c 1, %c5%c 5, %c20%c 20, %c10%c 10, %c50%c 50, %c100%c 100, %c200%c 200 )\n", 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34) >> gplfile; } printf("set key top left\n") >> gplfile; printf("set pointsize 2\n") >> gplfile; #printf("set style fill solid\n") >> gplfile; printf("set view ,,2\n") >> gplfile; f = 0; printf("plot ") >> gplfile; for (track in multifunctiontrack) { trackfile = track datafilebase; if (f > 0) { printf(", ") >> gplfile; } f++; printf("%c%s%c", 34, trackfile, 34) >> gplfile; #printf(" smooth cspline") >> gplfile; printf(" with lines lt %d lw 4", f+1) >> gplfile; printf(" title %c%s%c", 34, countrycapitalize(track), 34) >> gplfile; } printf("\n") >> gplfile; plottopng(gplfile,epsfile,pngfile); } function mavg(fromfile,tofile,alpha) { # Read data delete mavgtab1a; delete mavgtab1b; i = 1; while ((getline frome < fromfile) == 1) { if (length(frome) > 1 && split(frome,mavgtab2,"\t") == 2) { mavgtab1a[i] = mavgtab2[1]+0.0; mavgtab1b[i] = mavgtab2[2]+0.0; i++; } } n = i; # Calculate moving average delete mavgtab3; mavgtab3[1] = mavgtab1b[1]; for (i = 2; i < n; i++) { mavgtab3[i] = mavgtab3[i-1] + alpha * (mavgtab1b[i] - mavgtab3[i-1]); } # Output data printf("\n") > tofile; for (i = 1; i < n; i++) { printf("%6.2f %6.2f # orig was %6.2f\n", mavgtab1a[i], mavgtab3[i], mavgtab1b[i]) >> tofile; } } function reportcompanies() { base = "companydistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of authors from companies"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

Company data is calculated from the first occurrence of an author.

\n") >> htmlfile; closehtml(htmlfile); } function reportcountries(useeu) { if (useeu) { base = "countryeudistr"; } else { base = "countrydistr"; } htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of authors from countries"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

Location data is calculated from the first occurrence of an author.

\n") >> htmlfile; closehtml(htmlfile); } function reportcontinents_draft() { base = "d-contdistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of " docname "s according to the continents of their authors"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

Wondering why the total is greater than 100%%? %ss with multiple authors may be counted multiple times, if the authors are from different countries.

\n", docnameu) >> htmlfile; closehtml(htmlfile); } function reportcompanies_draft() { base = "d-companydistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of " docname "s according to the affiliation of their authors"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

Wondering why the total is greater than 100%%? %ss with multiple authors may be counted multiple times, if the authors are from different companies.

\n", docnameu) >> htmlfile; closehtml(htmlfile); } function reportcountries_draft(useeu) { if (useeu) { base = "d-countryeudistr"; } else { base = "d-countrydistr"; } htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of " docname "s according to the countries of their authors"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("

%ss come from these countries, %d different countries in total. \n", docnameu, 34, 34, ncountries) >> htmlfile; printf("

\n") >> htmlfile; printf("\n") >> htmlfile; printf("

Wondering why the total is greater than 100%%? %ss with multiple authors may be counted multiple times, if the authors are from different countries.

\n", docnameu) >> htmlfile; printf("

See also the ranking of the countries per capita.

\n", 34, base, 34) >> htmlfile; closehtml(htmlfile); reportcountries_draft_percap(useeu); } function reportcountries_draft_percap(useeu) { if (useeu) { base = "d-countryeudistrcap"; } else { base = "d-countrydistrcap"; } htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of " docname "s according to the countries of their authors, per capita"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

Source of population data is Wikipedia.

\n") >> htmlfile; closehtml(htmlfile); } function checkpopulation(co) { if (population[co] == "" || population[co] == 0) { fatalerror("Population for " co " is unknown"); } } function fatalerror(s) { printf("authorstats: Fatal error: %s -- exit\n", s); exit(1); } function lastnames(s) { nk = split(s,nameparts,", "); result = lastname(nameparts[1]); for (k = 2; k <= nk; k++) { result = result ", " lastname(nameparts[k]); } return(result); } function lastname(y) { nr = split(y,lnamecomps," "); res = lnamecomps[nr]; if (res == "3rd" || res == "2nd" || res == "III" || res == "II") res = lnamecomps[nr-1]; if (substr(res,1,1) == "(") res = lnamecomps[nr-1]; return(res); } function official(s) { return(s ~ /draft-ietf/ || s ~ /draft-iab/ || s ~ /draft-rfc/ || s ~ /draft-iesg/ || s ~ /draft-irtf/); } function isinarea(draft,area) { if (isarea[area] == "") return(0); wg = getofficialwg(draft); if (wg == "") wg = getrelatedwg(draft); if (wg == "") return(0); if (iswg[wg] == "") return(0); if (wgarea[wg] != area) return(0); return(1); } function reportauthoractivitiesperarea() { base = "authactareadistr"; htmlfile = base ".html"; openhtml(htmlfile,"Most active authors per area"); for (area in isarea) { html_file_css_section(area " Area", htmlfile); printf("\n") >> htmlfile; } closehtml(htmlfile); } function reportauthoractivities(iswg) { if (iswg) { base = "authactdistr-wg"; } else { base = "authactdistr"; } htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of authors according to how many documents they have"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("

Total number of distinct authors is %d, the different %ss together had a total of %d author entries.

", nauthors, docname, nauthoroccurrences) >> htmlfile; printf("\n") >> htmlfile; if (iswg) printf("

(The percentages have been calculated against all authors, not just WG document authors.)

\n") >> htmlfile; closehtml(htmlfile); if (!iswg) realmaxdrafts = maxdrafts; } function reportauthorcrosswg() { base = "authcrosswg"; htmlfile = base ".html"; #epsfile = base ".eps"; #jpgfile = base ".jpg"; #datafile = base ".dat"; #gplfile = base".txt"; openhtml(htmlfile,"Author cross-WG activities"); #html_file_css_section("Statistics",htmlfile); #printf("%cstatistics%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function reportauthorcrossarea() { base = "authcrossarea"; htmlfile = base ".html"; #epsfile = base ".eps"; #jpgfile = base ".jpg"; #datafile = base ".dat"; #gplfile = base".txt"; openhtml(htmlfile,"Author cross-area activities"); #html_file_css_section("Statistics",htmlfile); #printf("%cstatistics%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function getofficialwg(draft) { split(draft,dnamecomps,"-"); if (dnamecomps[2] == "ietf" && iswg[dnamecomps[3]]) return dnamecomps[3]; else return(""); } function getrelatedwg(draft) { split(draft,dnamecomps,"-"); if (dnamecomps[2] != "ietf" && iswg[dnamecomps[3]]) return(dnamecomps[3]); else return(""); } function reportauthors() { base = "authdistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of the number of authors per " docname); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function reportpubyears() { base = "pubdistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; if (doctype == "draft") { openhtml(htmlfile,"Publication rate per month"); } else if (doctype == "rfc") { openhtml(htmlfile,"Publication rate per year"); html_file_css_section("Yearly Statistics",htmlfile); printf("%cstatistics%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("
\n") >> htmlfile; close(epsfile);close(jpgfile);close(datafile);close(gplfile); } epsfile = base "2.eps"; jpgfile = base "2.jpg"; datafile = base "2.dat"; gplfile = base "2.txt"; printf("\n") > datafile; maxn = 0; for (i = startyear; i <= endyear; i++) { for (j = 1; j <= 12; j++) { n = 0; for (draft in draftyears) if (draftyears[draft] == i && draftmonths[draft] == monthnameshort[j]) n++; if (n > maxn) maxn = n; printf("%4.2f %d\n", i - 0.5 + (j-1)/12.0, n) >> datafile; } } html_file_css_section("Monthly Statistics",htmlfile); printf("%cstatistics%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; extra = "set xrange [" startyear-0.5 " : " endyear+0.5 "]\n"; extra = extra "set yrange [ 0 : " roundup(maxn) "]\n"; extra = extra "set xtics "; if (endyear - startyear > 5) { extra = extra "rotate "; } extra = extra startyear ", 1, " endyear; if (endyear - startyear > 20) { extra = extra " font " quote "Times-Roman,20" quote "\n"; } else if (endyear - startyear > 5) { extra = extra " font " quote "Times-Roman,30" quote "\n"; } else { extra = extra "\n"; } extra = extra "set mxtics 0.08\n"; extra = extra "set boxwidth 0.07\n"; basicgnuplotsettings(gplfile,epsfile,datafile,"Year and Month", "# of " docnameu "s","Publication rate per month", 0,extra); plottojpg(gplfile,epsfile,jpgfile); closehtml(htmlfile); } function roundup(n) { if (n < 50) { return( 10 * (n/10) + 10); } else { return( 100 * (n/100) + 100); } } function reportformats() { base = "formatdistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Document formats and features in " docname "s"); html_file_css_section("Formats", htmlfile); printf("

These formats are used:

\n") >> htmlfile; printf("\n") >> htmlfile; html_file_css_section_nextcol("Features", htmlfile); printf("

These features are used within the documents:

\n") >> htmlfile; printf("\n") >> htmlfile; printf("

The document feature recognition is based on heuristics and is inherently unreliable.

\n") >> htmlfile; closehtml(htmlfile); } function reportpages() { base = "pagedistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of the number of pages in " docname "s"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function docgettype(t) { if (t ~ /^draft/) { return("draft"); } else { return("rfc"); } } function draftname(t) { gsub(/^draft-/,"",t); gsub(/-[0-9][0-9][.]txt/,"",t); #gsub(/^rfc/,"",t); gsub(/[.]txt/,"",t); return(t); } function reportareawgs() { base = "areawgdistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of WGs to areas"); html_file_css_section("Statistics",htmlfile); printf("

There are %d WGs and %d areas, an average of %.2f WGs/area.\n", nwgs, nareas, (1.0 * nwgs) / nareas) >> htmlfile; printf("%careas\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; printf("

\n") >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function splittolines(inputstring) { sub(/ /,"\\n",inputstring); sub(/ /,"\\n",inputstring); return(inputstring); } function reportwgs() { base = "wgdistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,"Distribution of " docname "s according to WGs"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; maxcount = 0; for (draft in isdraft) { wg = getofficialwg(isdraft[draft]); if (wg != "") { if (wg in wgcounts) { wgcounts[wg] = wgcounts[wg] + 1; } else { wgcounts[wg] = 1; } if (wgcounts[wg] > maxcount) maxcount = wgcounts[wg]; } else { wg = getrelatedwg(isdraft[draft]); if (wg != "") { if (wg in wgcountsother) { wgcountsother[wg] = wgcountsother[wg] + 1; } else { wgcountsother[wg] = 1; } } } } for (i = maxcount; i >= 0; i--) { nwgswithdrafts[i] = 0; for (wg in wgcounts) { if (wgcounts[wg] == i) { nwgswithdrafts[i] = nwgswithdrafts[i] + 1; } } } printf("\n") > datafile; html_file_css_section("Distribution of " docname " amounts", htmlfile); printf("\n") >> htmlfile; html_file_css_section("List of WGs", htmlfile); printf("\n") >> htmlfile; basicgnuplotsettings(gplfile,epsfile,datafile,"# of " docnameu "s","# of WGs","Number of " docname "s in a WG",1,""); plottojpg(gplfile,epsfile,jpgfile); printf("

Definition of a related %s is that it matches the pattern draft-something-wg, where something is not ietf, and wg is the name of a working group.

\n", docname) >> htmlfile; closehtml(htmlfile); } function reportareas() { base = "areadistr"; htmlfile = base ".html"; epsfile = base ".eps"; jpgfile = base ".jpg"; datafile = base ".dat"; gplfile = base".txt"; openhtml(htmlfile,docnameu " area distribution"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile; maxcount = 0; for (draft in isdraft) { wg = getofficialwg(isdraft[draft]); if (wg != "") { area = wgarea[wg]; if (area in areacounts) { areacounts[area] = areacounts[area] + 1; } else { areacounts[area] = 1; } if (areacounts[area] > maxcount) maxcount = areacounts[area]; } else { wg = getrelatedwg(isdraft[draft]); if (wg != "") { area = wgarea[wg]; if (area in areacountsother) { areacountsother[area] = areacountsother[area] + 1; } else { areacountsother[area] = 1; } } } } printf("\n") > datafile; html_file_css_section("List of areas", htmlfile); printf("