function inittabs() { areaacronym["applications"] = "app"; areaacronym["general"] = "gen"; areaacronym["internet"] = "int"; areaacronym["ip: next generation"] = "nextgen"; areaacronym["network management area"] = "ops"; areaacronym["operational requirements"] = "ops"; areaacronym["operations and management"] = "ops"; areaacronym["osi integration"] = "osi"; areaacronym["real-time applications and infrastructure"] = "rai"; areaacronym["applications and real-time"] = "art"; areaacronym["routing"] = "rtg"; areaacronym["security"] = "sec"; areaacronym["sub-ip"] = "sub"; areaacronym["transport"] = "tsv"; areaacronym["user services"] = "user"; monthname["1"] = "January"; monthname["2"] = "February"; monthname["3"] = "March"; monthname["4"] = "April"; monthname["5"] = "May"; monthname["6"] = "June"; monthname["7"] = "July"; monthname["8"] = "August"; monthname["9"] = "September"; monthname["10"] = "October"; monthname["11"] = "November"; monthname["12"] = "December"; isineu["iceland"] = 1; isineu["finland"] = 1; isineu["sweden"] = 1; isineu["denmark"] = 1; # isineu["united kingdom"] = 1; # brexit! isineu["germany"] = 1; isineu["the netherlands"] = 1; isineu["france"] = 1; isineu["belgium"] = 1; isineu["italy"] = 1; isineu["spain"] = 1; isineu["portugal"] = 1; isineu["austria"] = 1; isineu["cyprus"] = 1; isineu["czech republic"] = 1; isineu["estonia"] = 1; isineu["greece"] = 1; isineu["hungary"] = 1; isineu["ireland"] = 1; isineu["latvia"] = 1; isineu["lithuania"] = 1; isineu["luxembourg"] = 1; isineu["malta"] = 1; isineu["poland"] = 1; isineu["slovakia"] = 1; isineu["slovenia"] = 1; isineu["monaco"] = 1; population["european union"] = 0; for (eucountry in isineu) { continentof[eucountry] = "europe"; } continentof["switzerland"] = "europe"; continentof["united kingdom"] = "europe"; continentof["norway"] = "europe"; continentof["bulgaria"] = "europe"; continentof["romania"] = "europe"; continentof["russia"] = "europe"; continentof["belarus"] = "europe"; continentof["st. lucia"] = "europe"; continentof["ukraine"] = "europe"; continentof["turkey"] = "europe"; continentof["croatia"] = "europe"; continentof["serbia and montenegro"] = "europe"; continentof["gibraltar"] = "europe"; continentof["australia"] = "australia"; continentof["new zealand"] = "australia"; continentof["tonga"] = "australia"; continentof["solomon islands"] = "australia"; continentof["christmas island"] = "australia"; continentof["niue"] = "australia"; continentof["samoa"] = "australia"; continentof["british indian o. terr."] = "asia"; continentof["georgia"] = "asia"; continentof["sri lanka"] = "asia"; continentof["armenia"] = "asia"; continentof["east timor"] = "asia"; continentof["south korea"] = "asia"; continentof["pakistan"] = "asia"; continentof["vietnam"] = "asia"; continentof["japan"] = "asia"; continentof["china"] = "asia"; continentof["israel"] = "asia"; continentof["jordan"] = "asia"; continentof["hong kong"] = "asia"; continentof["thailand"] = "asia"; continentof["india"] = "asia"; continentof["syria"] = "asia"; continentof["united arab emirates"] = "asia"; continentof["singapore"] = "asia"; continentof["saudi arabia"] = "asia"; continentof["lebanon"] = "asia"; continentof["mongolia"] = "asia"; continentof["cocos (keeling) isl."] = "asia"; continentof["taiwan"] = "asia"; continentof["macau"] = "asia"; continentof["malaysia"] = "asia"; continentof["bhutan"] = "asia"; continentof["the philippines"] = "asia"; continentof["iran"] = "asia"; continentof["iraq"] = "asia"; continentof["south africa"] = "africa"; continentof["nigeria"] = "africa"; continentof["swaziland"] = "africa"; continentof["sudan"] = "africa"; continentof["tunisia"] = "africa"; continentof["egypt"] = "africa"; continentof["niger"] = "africa"; continentof["st. tome and principe"] = "africa"; continentof["st. helena"] = "africa"; continentof["mauritius"] = "africa"; continentof["algeria"] = "africa"; continentof["morocco"] = "africa"; continentof["gambia"] = "africa"; continentof["cuba"] = "south america"; continentof["colombia"] = "south america"; continentof["jamaica"] = "south america"; continentof["netherland antilles"] = "south america"; continentof["venezuela"] = "south america"; continentof["aruba"] = "south america"; continentof["panama"] = "south america"; continentof["chile"] = "south america"; continentof["brazil"] = "south america"; continentof["peru"] = "south america"; continentof["argentina"] = "south america"; continentof["costa rica"] = "south america"; continentof["uruguay"] = "south america"; continentof["st. lucia"] = "south america"; continentof["antigua and barbuda"] = "south america"; continentof["mexico"] = "north america"; continentof["usa"] = "north america"; continentof["canada"] = "north america"; continentof["greenland"] = "north america"; continentof["st. pierre miquelon"] = "north america"; continentof["antarctica"] = "antarctica"; continentof["bouvet island"] = "antarctica"; monthnameshort[1] = "jan"; monthnameshort[2] = "feb"; monthnameshort[3] = "mar"; monthnameshort[4] = "apr"; monthnameshort[5] = "may"; monthnameshort[6] = "jun"; monthnameshort[7] = "jul"; monthnameshort[8] = "aug"; monthnameshort[9] = "sep"; monthnameshort[10] = "oct"; monthnameshort[11] = "nov"; monthnameshort[12] = "dec"; } function initstats() { nbaddrafts = 0; nbadauthorlines = 0; nbadauthoremaillines = 0; ndrafts = 0; nauthors = 0; nietfdrafts = 0; nirtfdrafts = 0; niabdrafts = 0; niesgdrafts = 0; nindividualdrafts = 0; nauthoroccurrences = 0; ncountries = 0; ngenders = 0; ncompanies = 0; nunknowncountry = 0; nunknowncompany = 0; euauthors = 0; eudrafts = 0; nalldrafts = 0; nareas = 0; nwgs = 0; nietfdrafts = 0; nirtfdrafts = 0; niesgdrafts = 0; niabdrafts = 0; nrfceddrafts = 0; nindividualdrafts = 0; npostscriptdrafts = 0; npdfdrafts = 0; nnontextformats = 0; nfigures = 0; nformats = 0; nabnfs = 0; nasn1s = 0; ncodes = 0; nxmls = 0; nkwds = 0; nsecconss = 0; nianaconss = 0; nerratas = 0; startyear = 0; endyear = 0; nsorts = 0; } function html_file_css_head(title,csshtmlfile,slogan) { printf("html_file_css_head(%s,%s,%s)\n",title,csshtmlfile,slogan) >> "/tmp/htmlopendebug.txt"; csshtmlfileseensection[csshtmlfile] = 0; if (csshtmlfile == "") { printf("authorstats: Warning: Empty file name %s for title %s (warning 2)\n", csshtmlfile, title); csshtmlfile = "null.html"; } if (substr(csshtmlfile,1,5) != "/dev/") close(csshtmlfile); printf("\n", 34, 34, 34, 34) > csshtmlfile; printf("\n", 34, 34, 34, 34, 34, 34) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n", 34, 34, 34, 34) >> csshtmlfile; printf("\n", 34, 34, 34, 34, 34, 34) >> csshtmlfile; printf("%s\n", title) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("\n", 34, 34, 34, 34) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> htmlfile; printf("\n") >> htmlfile; printf("\n") >> htmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("

IETF Statistics

\n", 34, 34, 34, 34) >> csshtmlfile; printf("
\n") >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf(" \n", 34, 34, 34, 34) >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("

%s

\n", title) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("“%s”\n", slogan) >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("
\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("
\n", 34, 34) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n", 34, 34) >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n") >> csshtmlfile; printf("\n\n\n") >> htmlfile; printf("\n") >> htmlfile; printf("\n") >> htmlfile; printf("
\n", 34, 34) >> csshtmlfile; } function html_file_css_section(title,csshtmlfile) { if (csshtmlfileseensection[csshtmlfile] != 0) { printf("\n
\n", 34, 34) >> csshtmlfile; } csshtmlfileseensection[csshtmlfile] = 1; printf("\n

%s

\n\n", title) >> csshtmlfile; } function html_file_css_section_nextcol(title,csshtmlfile) { csshtmlfileseensection[csshtmlfile] = 1; printf("\n
\n", 34, 34) >> csshtmlfile; printf("\n

%s

\n\n", title) >> csshtmlfile; } function html_file_css_end(htmlfile) { printf("
\n") >> htmlfile; printf("
\n") >> htmlfile; printf("
\n") >> htmlfile; printf("
\n") >> htmlfile; printf("\n") >> htmlfile; printf("\n") >> htmlfile; close(htmlfile); } function checkareaacronym(a) { a = tolower(a); if (areaacronym[a] == "") { printf("authorstats: Fatal error -- area %s acronym unknown\n", a); exit(1); } } function removever(x) { sub(/-[0-9][0-9][.]txt/,"",x); return(x); } BEGIN { FS=":"; if (debug) printf("authorstats: debug: BEGIN\n"); printf("") > "/tmp/unrecognizedfirstnames.txt"; printf("") > "/tmp/unrecognizedgendernames.txt"; printf("") > "/tmp/unreogniseddebuggender.txt"; hrefprefix = "../allstats/"; quote = sprintf("%c",34); inittabs(); initstats(); activeauthor = 1; activecompany = 1; ntopcompanies = 15 ntopcountries = 30; ntopgenders = 4; minimumcompletion = 0.1; if (doctype == "draft") { docname = "draft"; docnameu = "Draft"; docprefix = "https://datatracker.ietf.org/doc/html/"; } else if (doctype == "rfc") { docname = "RFC"; docnameu = "RFC"; docprefix = "https://datatracker.ietf.org/doc/html/"; } else if (doctype == "all") { docname = "document"; docnameu = "Document"; docprefix = "https://datatracker.ietf.org/doc/html/"; } else { print("authorstats: Unrecognized doctype\n"); exit(1); } if (debug) printf("authorstats: debug: BEGIN end\n"); } /^0-person:male:/ { if (femaleperson[$3]) { printf("authorstats: Conflicting gender for %s\n", $3); } if (maleperson[$3]) { printf("authorstats: Duplicate gender for %s\n", $3); exit(1); } maleperson[$3] = 1; next; } /^0-person:female:/ { if (maleperson[$3]) { printf("authorstats: Conflicting gender for %s\n", $3); } if (femaleperson[$3]) { printf("authorstats: Duplicate gender for %s\n", $3); } femaleperson[$3] = 1; next; } /^0-male:/ { if (femalefirstname[$2] || unisexfirstname[$2]) { printf("authorstats: Conflicting first name gender for %s\n", $2); } if (malefirstname[$2]) { printf("authorstats: Duplicate first name gender for %s\n", $2); } malefirstname[$2] = 1; next; } /^0-female:/ { if (malefirstname[$2] || unisexfirstname[$2]) { printf("authorstats: Conflicting first name gender for %s\n", $2); } if (femalefirstname[$2]) { printf("authorstats: Duplicate first name gender for %s\n", $2); } femalefirstname[$2] = 1; next; } /^0-unisex:/ { if (malefirstname[$2] || femalefirstname[$2]) { printf("authorstats: Conflicting first name gender for %s\n", $2); } if (unisexfirstname[$2]) { printf("authorstats: Duplicate first name gender for %s\n", $2); } unisexfirstname[$2] = 1; next; } /^0-unknown:/ { if (malefirstname[$2] || femalefirstname[$2]) { printf("authorstats: Conflicting first name gender for %s\n", $2); } if (unisexfirstname[$2]) { printf("authorstats: Duplicate first name gender for %s\n", $2); } unisexfirstname[$2] = 1; next; } /^0-areadefinition:/ { if (isarea[$2] == "") { isarea[$2] = $2; nareas++; checkareaacronym($2); } next; } /^0-wgareadefinition:/ { if (isarea[$2] == "") { isarea[$2] = $2; nareas++; checkareaacronym($2); } iswg[$3] = $3; nwgs++; wgarea[$3] = $2; next; } /^0-population:/ { if (population[$2] != "") { printf("authorstats: Warning: Redefinition of population for %s in Wikipedia from line %d\n", $2,$4); next; } population[$2] = $3; if (isineu[$2]) { population["european union"] += $3; } next; } /^0-citations:/ { if (citcount[$2] != "") { printf("authorstats: Redefinition of citation count for RFC%d in line %d -- exit\n", $2, $4); exit(1); } citcount[$2] = $3; next; } /^0-cites:/ { if (cites[$2] != "") { printf("authorstats: Redefinition of citation info for %s in line %d -- exit\n", $2, $4); exit(1); } cites[$2] = $3; cnt = split($3, refs, ","); for (r = 1; r <= cnt; r++) citedby[refs[r]] = (citedby[refs[r]] ? (citedby[refs[r]] ",") : "") $2; next; } /^9-debug:/ { next; } /^z1-ad:/ { if (isarea[$2] == "") { printf("authorstats: error - area director defined for unknown area (%s)\n", $2); } if (authorad[$3] != "") { printf("authorstats: warning - %s can not be area director for both %s and %s\n", $3, $2, authorad[$3]); next; } else { if (isauthor[$3] == "") { #printf("authorstats: area director %s is an unknown author (not published anything)\n", $3); } authorad[$3] = $2; next; } } /^z2-chair:/ { if (iswg[$2] == "") { printf("authorstats: error - chair defined for unknown wg (%s)\n", $2); } if (isauthor[$3] == "") { #printf("authorstats: chair %s is an unknown author (not published anything)\n", $3); } if (authorchairs[$3] != "") authorchairs[$3] = authorchairs[$3] "%"; authorchairs[$3] = authorchairs[$3] $2; next; } /^z3-iab:/ { if (isauthor[$2] == "") { #printf("authorstats: iab member %s is an unknown author (not published anything)\n", $2); } authoriab[$2] = $2; next; } /^z4-iaoc:/ { if (isauthor[$2] == "") { #printf("authorstats: iaoc member %s is an unknown author (not published anything)\n", $2); } authoriaoc[$2] = $2; next; } /UNRECOGNIZED/ { allstatinc($1,""); baddraft($1); badauthoremailline($1,"all"); next; } /.*/ { #printf("authorstats: debug: Processing line %s...\n", $0); draft = $1; author = $2; company = $3; email = $4; country = $5; pages = $6; month = $7; year = $8; day = $9 features = $10; goodstatinc(draft,author,country,company,pages,month,year,features); addauthor(draft,author); draftyear if (company == "UNKNOWN") badauthorline($1,$2,"unknown affiliation"); if (country == "UNKNOWN") badauthorline($1,$2,"unknown location"); if (pages == "UNKNOWN") badauthorline($1,$2,"unknown page count"); if (month == "UNKNOWN") badauthorline($1,$2,"unknown publication month"); if (year == "UNKNOWN") badauthorline($1,$2,"unknown publication year"); if (email == "UNKNOWN") badauthoremailline($1,$2); } END { #printf("authorstats: debug: End of input...\n", $0); finalizeauthors(); calculatelastyear(); reportbaddrafts("unparseabledrafts.html"); reportbadauthorlines("unparseableauthors.html"); reportbadauthoremaillines("unparseableemails.html"); reportoverall("index.html"); reportindividualauthors("authors.html"); reportindividualcompanies("companies.html"); reportindividualcountries("countries.html"); reporthindextop(); reportmostcited(); #makebigdotfile("all.dot"); } #function makebigdotfile(dotfile) { # system("rm -f " dotfile); # printf("digraph all {\n") >> dotfile; # printf("\trankdir=LR\n") >> dotfile; # printf("\tmargin=0\n") >> dotfile; # printf("\tnodesep=0.1\n") >> dotfile; # printf("\tnode [ fontsize=11, margin=0, width=0, height=0 ]\n") >> dotfile; # system("grep -Eoh \"^[[:space:]]*[[:digit:]]+ -> [[:digit:]]+\" rfc*.dot | sort | uniq >> " dotfile); # printf("}\n") >> dotfile; # close(dotfile); #} function calculatelastyear() { tmpfile = "/tmp/as.date"; system("rm -f " tmpfile); system("date +%Y > " tmpfile); system("date +%m >> " tmpfile); system("date +%d >> " tmpfile); getline nowyear < tmpfile; getline nowmonth < tmpfile; getline nowday < tmpfile; close(tmpfile); if (nowyear < 2000 || nowyear > 3000) { printf("authorstats: error - can not find out current year -- exit\n"); exit(1); } nowmonth = nowmonth + 0; if (nowmonth < 1 || nowmonth > 12) { printf("authorstats: error - can not find out current month: %s -- exit\n", nowmonth); exit(1); } nowday = nowday + 0; if (nowday < 1 || nowday > 31) { printf("authorstats: error - can not find out current day -- exit\n"); exit(1); } lastyearcompletion = (nowmonth - 1) / 12.0 + ((nowday - 1) / 31.0) / 12.0; #printf("authorstats: lastyear is %f complete (%d %d %d)\n", lastyearcompletion, nowyear, nowmonth, nowday); system("rm -f " tmpfile); } function addauthor(draft,author) { if (draft in draftauthors) { draftauthors[draft] = draftauthors[draft] "%" author; draftnauthors[draft]++; } else { draftauthors[draft] = author; draftnauthors[draft] = 1; } if (author in authordrafts) { #printf("authorstats: debug: Author %s known, adding doc %s...\n", author, draft); authordrafts[author] = authordrafts[author] "%" draft; authorndrafts[author]++; #printf("authorstats: debug: Doc list for him now %s...\n", authordrafts[author]); } else { #printf("authorstats: debug: Author %s not known, adding doc %s...\n", author, draft); authordrafts[author] = draft; authorndrafts[author] = 1; } } function allstatinc(draft,author) { if (isdraft[draft] == "") { isdraft[draft] = draft; nalldrafts = nalldrafts + 1; } #nbadauthorlines = nbadauthorlines + 1; } function goodstatinc(draft,author,country,company,pages,month,year,features) { if (isdraft[draft] == "") { isdraft[draft] = draft; ndrafts = ndrafts + 1; nalldrafts = nalldrafts + 1; draftpagecounts[draft] = pages; if (month != "UNKNOWN") { draftmonths[draft] = month; } if (year != "UNKNOWN") { if (year <= 0 || year > 2020) { printf("authorstats: Invalid year (%s) for %s -- exit\n", year, draft); exit(1); } draftyears[draft] = year; if (startyear == 0 || year < startyear) startyear = year; if (endyear == 0 || year > endyear) endyear = year; } if (pagecounts[pages] == "") { pagecounts[pages] = 1; } else { pagecounts[pages] = pagecounts[pages] + 1; } if (draft ~ /^draft-ietf-/) { nietfdrafts++; } else if (draft ~ /^draft-irtf-/) { nirtfdrafts++; } else if (draft ~ /^draft-iesg-/) { niesgdrafts++; } else if (draft ~ /^draft-iab-/) { niabdrafts++; } else if (draft ~ /^draft-rfc/) { nrfceddrafts++; } else { nindividualdrafts++; } if (index(features,"postscript")) { npostscriptdrafts++; } if (index(features,"pdf")) { npdfdrafts++; } if (index(features,"postscript") || index(features,"pdf")) { nnontextformats++; } if (index(features,"figure")) { nfigures++; } if (index(features,"format")) { nformats++; } if (index(features,"abnf")) { nabnfs++; } if (index(features,"kwd")) { nkwds++; } if (index(features,"asn1")) { nasn1s++; } if (index(features,"code")) { ncodes++; } if (index(features,"xml")) { nxmls++; } if (index(features,"seccons")) { nsecconss++; } if (index(features,"ianacons")) { nianaconss++; } if (index(features,"errata")) { nerratas++; } } lyear = (year == "UNKNOWN" ? 0 : year); if (isauthor[author] == "") { isauthor[author] = author; nauthors = nauthors + 1; nallauthors = nallauthors + 1; newauthor = 1; # # Determine gender # if (0) { split(author,authornameparts," "); firstname = tolower(authornameparts[1]); #printf("authorstats: testing gender for %s: %s %s-%s-%s\n", author, firstname, #malefirstname[firstname], femalefirstname[firstname], unisexfirstname[firstname]); if (maleperson[author]) { gender = "male"; } else if (femaleperson[author]) { gender = "female"; } else if (malefirstname[firstname]) { gender = "male"; } else if (femalefirstname[firstname]) { gender = "female"; } else { if (!unisexfirstname[firstname] && !(firstname ~ /^[A-Za-z][.]/) && !(firstname ~ /^[A-Za-z]$/)) { printf("%s\n", firstname) >> "/tmp/unrecognizedfirstnames.txt"; } printf("%s\n", author) >> "/tmp/unrecognizedgendernames.txt"; printf("authorstats: testing gender for %s: %s %s-%s-%s - FAIL\n", author, firstname, malefirstname[firstname], femalefirstname[firstname], unisexfirstname[firstname]) >> "/tmp/unreogniseddebuggender.txt"; gender = "unknown"; } # authorgender[author] = gender; // for privacy do not register this if (!isgender[gender]) { isgender[gender] = 1; genderauthors[gender] = 0; genderdrafts[gender] = 0; ngenders++; } } } else { newauthor = 0; } if (0 && index(genderdraftslist[gender],draft) == 0) { genderdrafts[gender] = genderdrafts[gender] + 1; if (genderdraftslist[gender] == "") { genderdraftslist[gender] = draft; } else { genderdraftslist[gender] = genderdraftslist[gender] "%" draft; } } if (debug) printf("authorstats: debug: goodstatinc country -%s-\n", country); if (country != "UNKNOWN" && country != "") { if (iscountry[country] == "") { iscountry[country] = country; countryauthors[country] = 0; countrydrafts[country] = 0; ncountries++; } if (index(countrydraftslist[country],draft) == 0) { countrydrafts[country] = countrydrafts[country] + 1; if (countrydraftslist[country] == "") { countrydraftslist[country] = draft; } else { countrydraftslist[country] = countrydraftslist[country] "%" draft; } if (isineu[country] == 1) eudrafts++; } if (debug) printf("authorstats: debug: before the if-list: country = %s, isauthorcountry = %s, isauthorcountryothers = %s\n", country, isauthorcountry[author], isauthorcountryothers[author]); if (isauthorcountry[author] == "") { isauthorcountry[author] = country; if (debug) printf("authorstats: debug: setting initial isauthorcountry[%s] = %s\n", author, country); isauthorcountrytime[author] = lyear; } else if (isauthorcountry[author] == country) { if (isauthorcountrytime[author] < lyear) { if (debug) printf("authorstats: debug: adjusting time in isauthorcountry[%s] = %s\n", author, country); isauthorcountrytime[author] = lyear; } } else if (isauthorcountrytime[author] < lyear) { if (debug) printf("authorstats: debug: setting time-based isauthorcountry[%s] = %s\n", author, country); if (isauthorcountry[author] != "" && isauthorcountry[author] != "UNKNOWN" && index(isauthorcountryothers[author],isauthorcountry[author]) == 0) { if (isauthorcountryothers[author] != "") { isauthorcountryothers[author] = isauthorcountryothers[author] "%"; } if (debug) printf("authorstats: debug: adding another coutnr for %s = %s\n", author, isauthorcountry[author]); isauthorcountryothers[author] = isauthorcountryothers[author] isauthorcountry[author]; } isauthorcountry[author] = country; #printf("re-setting isauthorcountry[%s] = %s\n", author, country); isauthorcountrytime[author] = lyear; } else if (index(isauthorcountryothers[author],country) == 0) { if (isauthorcountryothers[author] != "") isauthorcountryothers[author] = isauthorcountryothers[author] "%"; if (debug) printf("authorstats: debug: adding a new other country for %s = %s\n", author, country); isauthorcountryothers[author] = isauthorcountryothers[author] country; } #printf("isauthorcountry[%s] left as %s\n", author, isauthorcountry[author]); # if (newauthor) { # countryauthors[country] = countryauthors[country] + 1; # if (isineu[country] == 1) euauthors++; # if (countryauthorslist[country] == "") { # countryauthorslist[country] = author; # } else { # countryauthorslist[country] = countryauthorslist[country] ", " author; # } # } # } else { # if (newauthor) nunknowncountry++; } else { #isauthorcountry[author] = "UNKNOWN"; #printf("setting isauthorcountry[%s] = %s\n", author, "UNKNOWN"); } if (company != "UNKNOWN" && company != "") { company = tolower(company); if (iscompany[company] == "") { iscompany[company] = company; companyauthors[company] = 0; companydrafts[company] = 0; ncompanies++; } if (isauthorcompany[author] == "") { isauthorcompany[author] = company; isauthorcompanytime[author] = lyear; #printf(" first company\n") >> "/tmp/oc.txt"; } else if (isauthorcompany[author] == company) { if (isauthorcompanytime[author] < lyear) isauthorcompanytime[author] = lyear; #printf(" same company\n") >> "/tmp/oc.txt"; } else if (isauthorcompanytime[author] < lyear) { if (isauthorcompany[author] != "" && isauthorcompany[author] != "UNKNOWN" && index(isauthorcompanyothers[author],isauthorcompany[author]) == 0) { if (isauthorcompanyothers[author] != "") isauthorcompanyothers[author] = isauthorcompanyothers[author] "%"; isauthorcompanyothers[author] = isauthorcompanyothers[author] isauthorcompany[author]; } isauthorcompany[author] = company; isauthorcompanytime[author] = lyear; #printf(" later company\n") >> "/tmp/oc.txt"; } else if (index(isauthorcompanyothers[author],company) == 0) { #printf(" unseen company\n") >> "/tmp/oc.txt"; if (isauthorcompanyothers[author] != "") isauthorcompanyothers[author] = isauthorcompanyothers[author] "%"; isauthorcompanyothers[author] = isauthorcompanyothers[author] company; } #printf(" post-company = %s, others = %s\n", isauthorcompany[author], isauthorcompanyothers[author]) >> "/tmp/oc.txt"; if (index(companydraftslist[company],draft) == 0) { companydrafts[company] = companydrafts[company] + 1; if (companydraftslist[company] == "") { companydraftslist[company] = draft; } else { companydraftslist[company] = companydraftslist[company] "%" draft; } } # if (newauthor) companyauthors[company] = companyauthors[company] + 1; # } else { # if (newauthor) nunknowncompany++; } else { #isauthorcompany[author] = "UNKNOWN"; } nauthoroccurrences = nauthoroccurrences + 1; } function finalizeauthors() { #printf("finalizeauthors...\n"); for (author in isauthor) { finalizeauthor(author); } } function finalizeauthor(author) { #printf("finalizeauthor(%s)...\n", author); country = isauthorcountry[author]; if (country != "UNKNOWN" && country != "") { countryauthors[country] = countryauthors[country] + 1; #printf("setting countryauthors[%s] = %d\n", country, countryauthors[country]); if (isineu[country] == 1) euauthors++; if (countryauthorslist[country] == "") { countryauthorslist[country] = author; } else { countryauthorslist[country] = countryauthorslist[country] ", " author; } } else { if (newauthor) nunknowncountry++; } company = isauthorcompany[author]; if (company != "UNKNOWN" && company != "") { companyauthors[company] = companyauthors[company] + 1; } else { nunknowncompany++; } } function badauthorline(draft,author,problem) { nbadauthorlines++; badauthorlines[nbadauthorlines] = author " - " draft ": " problem; } function badauthoremailline(draft,author) { nbadauthoremaillines++; badauthoremaillines[nbadauthoremaillines] = author " - " draft; } function baddraft(s) { nbaddrafts = nbaddrafts + 1; baddrafts[nbaddrafts] = s; } function draftreference(drname) { refname = drname; sub(/-[0-9][0-9][.]txt$/,"",refname); return(sprintf("%s", 34, docprefix, refname, 34, draftname(drname))); } function reportbaddrafts(file) { printf("reportbaddrafts(%s)\n",file) >> "/tmp/htmlopendebug.txt"; openhtml(file,"Unrecognized " docnameu "s"); html_file_css_section("Problems",file); printf("\n") >> file; closehtml(file); } function reportbadauthorlines(file) { printf("reportbadauthorlines(%s)\n",file) >> "/tmp/htmlopendebug.txt"; openhtml(file,"Unrecognized Authors in " docnameu "s"); html_file_css_section("Problems",file); printf("\n") >> file; closehtml(file); } function reportbadauthoremaillines(file) { printf("reportbadauthoremaillines(%s)\n",file) >> "/tmp/htmlopendebug.txt"; openhtml(file,"Unrecognized or Incomplete E-mail Addresses in " docnameu "s"); html_file_css_section("Problems",file); printf("\n") >> file; closehtml(file); } function reportindividualcountries(topfile) { printf("reportindividualcountries(%s)\n",topfile) >> "/tmp/htmlopendebug.txt"; openhtml(topfile,"Countries"); html_file_css_section("Countries",topfile); printf("\n") >> topfile; closehtml(topfile); } function reportindividualcountry(country,n,file) { printf("reportindividualcountry(%s,%s)\n",country,file) >> "/tmp/htmlopendebug.txt"; openhtml(file,countrycapitalize(country) " IETF Data"); html_file_css_section("Basic Information",file); countrywgs = ""; for (author in isauthorcompany) { if (isauthorcountry[author] == country) { s1 = authorwgs[author]; split(s1,tempwgs,"%"); for (tempwgi in tempwgs) { tempwg = tempwgs[tempwgi]; if (index(countrywgs,tempwg) == 0) { if (countrywgs == "") countrywgs = tempwg; else countrywgs = countrywgs "%" tempwg; } } } } if (doctype == "draft" || doctype == "all") { printwglist(countrywgs,"

The authors in this country appear to be working in the following WG%s: ","

\n",file); } html_file_css_section("Authors",file); if (0) { printf("

There are %d authors in %s:

\n", n, country) >> file; printf("\n") >> file; } else { printf("

This version of authorstats no longer lists individual authors in a country.

\n") >> file; } closehtml(file); } function capitalize(s) { if (length(s) >= 1 && substr(s,1,1) >= "a" && substr(s,1,1) <= "z") { return(toupper(substr(s,1,1)) substr(s,2)); } else { return(s); } } function countrycapitalize(cn) { if (cn == "usa") return("USA"); cnn = split(cn,cntab," "); cnres = ""; for (cni = 1; cni <= cnn; cni++) { if (cnres != "") cnres = cnres " "; cnres = cnres capitalize(cntab[cni]); } return(cnres); } function areacapitalize(ar) { return(countrycapitalize(ar)); } function companycapitalize(cp) { if (cp == "vpnc" || cp == "nsn" || cp == "ibm" || cp == "att" || cp == "bbn" || cp == "mit" || cp == "sri" || cp == "hp" || cp == "isoc" || cp == "isc" || cp == "dod" || cp == "nec" || cp == "dec" || cp == "mci" || cp == "nasa" || cp == "cnri" || cp == "bt" || cp ~ /^uc/ || cp ~ /^us/ || cp ~ /^ui/) return(toupper(cp)); return(countrycapitalize(cp)); } function coname(s) { if (s == "united arab emirates") s = "arab emirates"; return(s); } function tofname(s) { s = tolower(s) ".html"; gsub(/ /,"",s); gsub(/[()]/,"",s); return(s); } function tofname_comp(s) { s = "c_" tolower(s) ".html"; gsub(/ /,"",s); gsub(/[()]/,"",s); return(s); } function reportindividualcompanies(topfile) { printf("reportindividualcompanies(%s)\n",topfile) >> "/tmp/htmlopendebug.txt"; openhtml(topfile,"All companies"); html_file_css_section("Companies",topfile); printf("\n") >> topfile; closehtml(topfile); } function startsort(inputfile) { sortfiles[nsorts] = inputfile; sortfile = sprintf("sort%d.tmp",nsorts); nsorts++; if (debug) printf("debug: start sort %d in %s to %s\n", nsorts - 1, sortfile, sortfiles[nsorts-1]); system("rm -f " sortfile); system("touch " sortfile); return(sortfile); } function complexstopsort(sortfile, field, reverse, numeric, count) { inputfile = sortfiles[--nsorts]; if (debug) printf("debug: stop sort %d in %s to %s %s %s %s\n", nsorts, sortfile, inputfile, (field ? "field " field " " : ""), (reverse ? "reverse" : ""), (numeric ? "numeric" : "")); close(sortfile); close(inputfile); system("sort " (field ? " -k " field" " : "") (reverse ? " -r " : "") \ (numeric ? " -n " : "") " < " sortfile \ (count ? " | head -n " count : "") " >> " inputfile); close(sortfile); return(inputfile); } function reportindividualcompany(company,repna,repnd,file) { printf("reportindividualcompany(%s,%s)\n",company,file) >> "/tmp/htmlopendebug.txt"; openhtml(file,companycapitalize(company) " IETF Data (" topic ")"); companywgs = ""; for (author in isauthorcompany) { if (isauthorcompany[author] == company) { s1 = authorwgs[author]; split(s1,tempwgs,"%"); for (tempwgi in tempwgs) { tempwg = tempwgs[tempwgi]; if (index(companywgs,tempwg) == 0) { if (companywgs == "") companywgs = tempwg; else companywgs = companywgs "%" tempwg; } } } } html_file_css_section("WGs",file); printwglist(companywgs,"

The authors in this company appear to be working in the following WG%s: ","

\n",file); html_file_css_section("Authors",file); if (0) { printf("

This company currently has %d authors%s

\n", repna, (repna == 0 ? "." : ":")) >> file; printf("\n") >> file; } else { printf("

This version of authorstats no longer lists individual authors in a company.

\n") >> file; } split(companydraftslist[company],compd,"%"); repnpa = 0; prevauths = ""; for (adraft in compd) { ahere = draftauthors[draft]; split(ahere,aheretab,"%"); for (anauthorhere in aheretab) { if (isauthorcompany[anauthorhere] != company) { if (index(prevauths,anauthorhere) == 0) { if (prevauths == "") { prevauths = anauthorhere; repnpa = 1; } else { prevauths = prevauths "%" anauthorhere; repnpa++; } } } } } if (0 && repnpa > 0) { printf("

This company has previously had also %d authors:

\n", repnpa) >> file; printf("\n") >> file; } reportlistofdocs("The authors from " company " have", n, compd, file, 0); closehtml(file); } function reportindividualauthors(topfile) { printf("reportindivudalauthors(%s)\n",topfile) >> "/tmp/htmlopendebug.txt"; openhtml(topfile,activeauthor == 1 ? "All authors" : "Most active authors"); html_file_css_section("Authors",topfile); printf("\n") >> topfile; closehtml(topfile); } function printwglist(string,format1,format2,file) { n = split(string,wgtab,"%"); printf(format1, (n == 1 ? "" : "s")) >> file; if (n == 0) { printf("(none)") >> file; } else { for (q = 1; q <= n; q++) { if (n > 1 && q == n) printf(" and ") >> file; else if (q > 1) printf(", ") >> file; printf("%s", 34, wgtab[q], 34, wgtab[q]) >> file; } } printf(format2, (n == 1 ? "" : "s")) >> file; } function removestring(x,y) { rs = ""; while (length(x) > 0) { if (length(x) >= length(y) && substr(x,1,length(y)) == y) { x = substr(x,length(y) + 1); } else { rs = rs substr(x,1,1); x = substr(x,2); } } return(rs); } function genitive(name) { return(sprintf("%s%c%s", name, 39, match(name, /s$/) ? "" : "s")); } function reportindividualauthor(author,n,hisdraftstab,file) { printf("reportindividualauthor(%s,%s)\n",author,file) >> "/tmp/htmlopendebug.txt"; if (author == "" || author ~ /\000/) { printf("bailing out because of NULs\n") >> "/tmp/htmlopendebug.txt"; return; } openhtml(file,"IETF Publications Data by Author " author " (" topic ")"); html_file_css_section("Author Information",file); split(author,authornameparts," "); fn = authornameparts[1]; if (0) { if (isauthorcountry[author] == "") { printf("

The location of this author is unknown ") >> file; } else { printf("

Based on address information listed in the publications at the IETF, this author is in %s ", 34, hrefprefix, tofname(isauthorcountry[author]), 34, countrycapitalize(isauthorcountry[author])) >> file; } if (isauthorcountrytime[author] != 0) printf(" (most recently)") >> file; if (0 && isauthorcountryothers[author] != "") { printf(", previous locations include") >> file; tz = isauthorcountryothers[author]; tz = removestring(tz,isauthorcountry[author] "%"); tz = removestring(tz,"%" isauthorcountry[author]); cn = split(tz,compo,"%"); for (oi in compo) { if (oi == 1) { } else if (oi == cn) { printf(" and") >> file; } else { printf(",") >> file; } printf(" %s", 34, hrefprefix, tofname(compo[oi]), 34, countrycapitalize(compo[oi])) >> file; } } printf(".\n") >> file; } else { printf("

This version of authorstats no longer lists location or employment data.\n") >> file; } if (0) { if (isauthorcompany[author] == "") { printf("The affiliation of the author is unknown.\n") >> file; } else { printf("The most recent affiliation listed in the publications for the author is %s.\n", 34, hrefprefix, tofname_comp(isauthorcompany[author]), 34, capitalize(isauthorcompany[author])) >> file; } } if (0) { if (isauthorcompanyothers[author] != "") { printf("Previous listed affiliations in the publications include") >> file; tz = isauthorcompanyothers[author]; tz = removestring(tz,isauthorcompany[author] "%"); tz = removestring(tz,"%" isauthorcompany[author]); cn = split(tz,compo,"%"); for (oi in compo) { if (oi == 1) { } else if (oi == cn - 1) { printf(" and") >> file; } else { printf(",") >> file; } printf(" %s", 34, hrefprefix, tofname_comp(compo[oi]), 34, capitalize(compo[oi])) >> file; } printf(".\n") >> file; } } printf("

\n") >> file; if (doctype == "draft" || doctype == "all") { printwglist(authorwgs[author],"

The working group%s where the the author has submitted documents to appear to be ",".\n",file); } if (authorchairs[author] != "") { printwglist(authorchairs[author],"The author chairs the "," working group%s.\n",file); } if (authorad[author] != "") { #adstats = sprintf("(click here for statistics related to this role)", 34, capitalize(authornameparts[2]), capitalize(authornameparts[1]), 34); printf("The author is an area director for the %s area.\n", authorad[author]) >> file; } if (authoriab[author] != "") { printf("The author is in the IAB.\n") >> file; } if (authoriaoc[author] != "") { printf("The author is in the IAOC.\n") >> file; } printf("

\n") >> file; reportlistofdocs("The author has", n, hisdraftstab, file, 1); closehtml(file); } function graphcitations(rfc, n, cnt, r, citfile, dotfile) { if (nocitations || graphed[rfc] == 1) return; citfile = "citations-" rfc ".html"; dotfile = rfc ".dot"; printf("graphcitations(%s)\n",citfile) >> "/tmp/htmlopendebug.txt"; openhtml(citfile, "Citation Information for " rfc); n = substr(rfc, 4); system("rm -f " dotfile); printf("digraph " rfc " {\n") >> dotfile; printf("\trankdir=LR\n") >> dotfile; printf("\tmargin=0\n") >> dotfile; printf("\tnodesep=0.1\n") >> dotfile; printf("\tnode [ fontsize=11, margin=0, width=0, height=0 ]\n") >> dotfile; printf("\tedge [ fontsize=9 ]\n") >> dotfile; edge = 0; html_file_css_section("Cited By", citfile); cnt = split(citedby[n], refs, ","); if (cnt == 0) { printf("

%s is cited by no other RFCs.

\n", rfc) >> citfile; } else { printf("

%s is cited by the following %d RFC%s:

\n", rfc, cnt, (cnt == 1 ? "" : "s")) >> citfile; printf("\n") >> citfile; } html_file_css_section("Cites", citfile); cnt = split(cites[n], refs, ","); if (cnt == 0) { printf("

%s cites no other RFCs.

\n", rfc) >> citfile; } else { printf("

%s cites the following %d RFC%s:

\n", rfc, cnt, (cnt == 1 ? "" : "s")) >> citfile; printf("\n") >> citfile; } printf("\t" n " [ label=RFC" n ", style=bold ]\n") >> dotfile; printf("}\n") >> dotfile; close(dotfile); # although graphviz can also directly generate png, you need to have a magic # combination of pango, cairo, freetype, fontconfig, etc. installed for the # output to look good pngfile = "graph-" rfc ".png"; epsfile = gensub(/.png/, ".eps", 1, pngfile); system("dot -Teps -o " epsfile " " dotfile); system("convert -trim -density 100 " epsfile " " pngfile); html_file_css_section_nextcol("", citfile); printf("%crfc%d\n", 34, pngfile, 34, 34, 34, 34, rfc, 34) >> citfile; graphed[rfc] = 1; closehtml(citfile); } function makerfcli(file, rfc, n) { n = substr(rfc, 4); if (citcount[n] == 0) cb = "no other RFCs"; else cb = " " citcount[n] " RFC"\ (citcount[n] == 1 ? "" : "s")""; printf("
  • %s, cited by %s
  • \n", 34, docprefix, rfc, 34, rfc, cb) >> file; } function reportlistofdocs(who,n,hisdraftstab,file,person) { if (doctype == "all") { nd = 0; nr = 0; delete cits; for (adraft in hisdraftstab) { if (docgettype(hisdraftstab[adraft]) == "rfc") { if (person) { rfcnr = substr(draftname(hisdraftstab[adraft]), 4); cits[nr] = citcount[rfcnr]; citcount[author] += citcount[rfcnr]; } nr++; } else { nd++; } } rfccount[author] = nr; #printf("authorstats: debug: Reporting author with nr=%d and nd=%d...\n", nr, nd); html_file_css_section("RFCs",file); if (nr == 0) { printf("

    %s no RFCs.

    \n", who) >> file; } else { printf("

    %s the following %d RFC%s:

    \n", who, nr, (nr == 1 ? "" : "s")) >> file; printf("\n") >> file; close(file); system("rm -f " hisdocstmpfile); if (person) { # compute the h-index (sort of - we would actually need to eliminate # self-citations to do it properly) asort(cits); hindex[author] = 0; i = nr; while (cits[i] >= nr - i + 1 && i > 0) { hindex[author]++; i--; } printf("

    Based on the RFC%s the author has published, the impact factor (i.e., h-index) is %d.", (nr == 1 ? "" : "s"), 34, 34, hindex[author]) >> file; printf(" The author has %d total citations for %d RFC%s; an average of %.2f citations per RFC.", citcount[author], nr, (cnt == 1 ? "" : "s"), citcount[author] / nr) >> file; } } html_file_css_section_nextcol("Drafts",file); if (nd == 0) { printf("

    The author has no drafts.

    \n") >> file; } else { printf("

    The author has the following %d drafts:

    \n", nd) >> file; printf("\n") >> file; close(file); system("rm -f " hisdocstmpfile); } } else { html_file_css_section(docname "s",file); printf("

    %s the following %d %ss:

    \n", who, n, docname) >> file; printf("\n") >> file; close(file); system("rm -f " hisdocstmpfile); } } function reportoverall(file) { printf("reportoverall(%s)\n",file) >> "/tmp/htmlopendebug.txt"; openhtml(file,"IETF document statistics (" topic ")"); html_file_css_section(docnameu "s", file); printf("

    Total number of %ss is %d.\n", docname, nalldrafts) >> file; if (doctype == "draft") { printf("Of these %d (%.2f%%) are individual %ss, %d (%.2f%%) IETF, %d (%.2f%%) IRTF, %d (%.2f%%) IAB, %d (%.2f%%) RFC Editor and %d (%.2f%%) IESG drafts.\n", nindividualdrafts, (100.0 * nindividualdrafts) / ndrafts, docname, nietfdrafts, (100.0 * nietfdrafts) / ndrafts, nirtfdrafts, (100.0 * nirtfdrafts) / ndrafts, niabdrafts, (100.0 * niabdrafts) / ndrafts, nrfceddrafts, (100.0 * nrfceddrafts) / ndrafts, niesgdrafts, (100.0 * niesgdrafts) / ndrafts) >> file; } printf("

    \n") >> file; printf("

    The distribution of %s ", docname) >> file; if (doctype == "draft") { printf("according to version numbers is shown here,\n", 34, 34) >> file; reportversions(); } printf("and according to number of authors here,\n", 34, 34) >> file; reportauthors(); printf("%s page count distribution looks like this.\n", docnameu, 34, 34) >> file; reportpages(); printf("The used document format types and content features are shown here.\n", 34, 34) >> file; reportformats(); if (doctype == "rfc") { printf("The growth of publication rate per year is shown here.\n", 34, 34) >> file; reportpubyears(); } else if (doctype == "draft") { printf("The publication rate per month is shown here.\n", 34, 34) >> file; reportpubyears(); } else { # do nothing } printf("

    \n") >> file; printf("

    The most frequently cited RFCs can be seen here.\n", 34, 34) >> file; printf("

    \n") >> file; html_file_css_section("Authors", file); printf("

    The distribution of authors according to the number of %ss\n", docname) >> file; printf("they have is shown here.\n", 34, 34) >> file; reportauthoractivities(0); if (doctype == "draft" || doctype == "all") { printf("The same distribution for IETF (or other official) %ss is shown here.\n", docname, 34, 34) >> file; reportauthoractivities(1); printf("The authors work in one or multiple WGs according this distribution.\n", 34, 34) >> file; reportauthorcrosswg(); printf("The same distribution for cross-area work is here.\n", 34, 34) >> file; reportauthorcrossarea(); printf("Most active authors per area can be seen here.\n", 34, 34) >> file; reportauthoractivitiesperarea(); } if (doctype == "rfc") { printf("New RFC authors on a given year can be seen here.\n", 34, 34) >> file; reportnewauthors(); } printf("

    \n") >> file; printf("

    The relative impact of different RFC authors can be seen here.\n", 34, 34) >> file; printf("

    \n") >> file; if (doctype == "draft" || doctype == "all") { html_file_css_section("Areas and WGs", file); printf("

    The distribution of WGs per areas is shown in here.\n", 34, 34) >> file; reportareawgs(); printf("The distribution of %ss according to WGs is shown here,\n", docname, 34, 34) >> file; reportwgs(); printf("and according to areas is shown here.\n", 34, 34) >> file; reportareas(); printf("

    \n") >> file; } html_file_css_section("Affiliations", file); printf("

    Authors come from these companies, %d different companies in total. \n", 34, 34, ncompanies) >> file; reportcompanies(); if (endyear - startyear > 2 && topic != "active I-Ds") { printf("Over the years, the situation has changed like this\n", 34, 34) >> file; reporttopcompanies(0); # Normalization: OK printf(" (normalized).\n", 34, 34) >> file; reporttopcompanies(1); } printf("

    \n") >> file; html_file_css_section_nextcol("Countries and Continents", file); reportcountries_draft(0); if (endyear - startyear > 2 && topic != "active I-Ds") { printf("Over the years, the situation has developed like this", 34, 34) >> file; reporttopcountries(0); # Normalization: OK printf(" (normalized)", 34, 34) >> file; reporttopcountries(1); printf(".\n") >> file; } printf("Considering EU as a country, the distribution would look like this instead.\n", 34, 34) >> file; reportcountries_draft(1); printf("The same distribution for continents can be found here.\n", 34, 34) >> file; reportcontinents_draft(); if (endyear - startyear > 2 && topic != "active I-Ds") { printf("Over the years, the situation has developed like this\n", 34, 34) >> file; reporttopcontinents(0); # Normalization: OK printf(" (normalized)", 34, 34) >> file; reporttopcontinents(1); printf(".\n") >> file; } printf("

    Authors come from these countries.\n", 34, 34, ncountries) >> file; reportcountries(0); printf("Considering EU as a country, the distribution would look like this instead.\n", 34, 34) >> file; reportcountries(1); printf("The same distribution for continents can be found here.\n", 34, 34) >> file; reportcontinents(); printf("

    \n") >> file; if (0) { html_file_css_section_nextcol("Gender", file); reportgender_draft(); if (endyear - startyear > 2 && topic != "active I-Ds") { printf("Over the years, the situation has developed like this", 34, 34) >> file; reporttopgenders(0); # Normalization: OK printf(" (normalized)", 34, 34) >> file; reporttopgenders(1); printf(".\n") >> file; } printf("

    \n") >> file; } html_file_css_section("Detailed data", file); printf("

    All active authors can be seen here.\n", 34, 34) >> file; printf("Countries can be seen here.\n", 34, 34) >> file; printf("And active companies can be seen here.

    \n", 34, 34) >> file; html_file_css_section("Information Sources", file); printf("

    The information is gathered from the following public sources:

    \n") >> file; printf("\n") >> file; html_file_css_section("Reliability", file); printf("

    Out of the total %d %ss, %d or %.2f%% are unparseable by this tool.\n", nalldrafts, docname, nalldrafts - ndrafts, (100.0 * (nalldrafts - ndrafts)) / nalldrafts) >> file; printf("See here for a list of %ss with problems.\n", 34, 34, docname) >> file; printf("Out of a total of %d authors entries, %d or %.2f%% had limited information. ", nauthoroccurrences, nbadauthorlines, (100.0 * nbadauthorlines) / nauthoroccurrences) >> file; printf("See here for a list of authors with problems. \n", 34, 34) >> file; printf("See here for a list of %ss with incomplete or non-working e-mail addresses. \n", 34, 34, docname) >> file; printf("Warning: See the tool description for ", 34, 34) >> file; printf("detailed information about what can or can not be assumed about the quality of the results.

    \n") >> file; html_file_css_section("Privacy", file); printf("

    \n") >> file; printf("For various good reasons the IETF process for making submissions\n") >> file; printf("is open and all information, including author's information is\n") >> file; printf("publicly available. The statistics tools use this information. In\n") >> file; printf("the most recent version of the statistics tools, the statistics are\n") >> file; printf("based only on documents submitted to the IETF.\n") >> file; printf("

    \n") >> file; printf("\n") >> file; printf("

    \n") >> file; printf("Should there be any concern about any information relating to a\n") >> file; printf("particular document author in these statistics, we provide an\n") >> file; printf("opportunity to send corrections or even remove data concerning a\n") >> file; printf("particular author on request. Email jari.arkko(at)piuha.net.takeaway\n") >> file; printf("to contact the developer. Please use a subject line that includes\n") >> file; printf("the word authorstats.

    \n") >> file; printf("

    Our privacy policy can be found\n") >> file; printf("in here.\n", 34, 34) >> file; printf("

    \n") >> file; closehtml(file); } function reportcontinents() { base = "contdistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportcontinents(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of Authors per Continent"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

    Location data is calculated from the first occurrence of an author.

    \n") >> htmlfile; closehtml(htmlfile); } function reporttopcompanies(normalized) { base = "companydistrhist"; if (normalized) base = base "_norm"; htmlfile = base ".html"; printf("reporttopcompanies(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Comparison of Companies over the Years"); delete reporttopcompanies_seencompany; delete multifunctiontrack; for (company in topcompanies) { multifunctiontrack[company] = company; } delete multifunctiontab; for (year = startyear; year <= endyear; year++) { yeardata = ""; for (company in topcompanies) { n = 0; split(companydraftslist[company],compd,"%"); for (drafti in compd) { draft = compd[drafti]; if (year == draftyears[draft]) { n++; } } if (reporttopcompanies_seencompany[company] != "" || n != 0) { if (reporttopcompanies_seencompany[company] == "") { if (multifunctiontab[year-1] != "") { multifunctiontab[year-1] = multifunctiontab[year-1] "%"; } multifunctiontab[year-1] = multifunctiontab[year-1] company ":0"; } if (yeardata != "") yeardata = yeardata "%"; if (year == nowyear) { if (lastyearcompletion >= minimumcompletion) { yeardata = yeardata company ":" (n * (1/lastyearcompletion)); } } else { yeardata = yeardata company ":" n; } reporttopcompanies_seencompany[company] = "seen"; } } multifunctiontab[year] = yeardata; } mavgalpha = 0.4; genericmultifunctionreport(base,htmlfile,startyear,endyear,0,normalized,mavgalpha, "Comparison of Companies over the Years","Year", normalized ? "Percentage of " docnameu "s" : "Number of " docnameu "s"); printf("

    This tracks publication of %ss with authors from most active companies per year. ", docname) >> htmlfile; printf("Company data is calculated from the first occurrence of an author. ISI has been excluded from this graph, to make it clearer.\n") >> htmlfile; if (normalized) printf("The graph is normalized to 100%% representing sum of the top companies.\n") >> htmlfile; printf("The data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile; if (lastyearcompletion >= minimumcompletion) { printf("The last year (%d) has been adjusted assuming the same publication ", nowyear) >> htmlfile; printf("rates as has happened during first part of the year (now %.2f %% complete).\n", 100 * lastyearcompletion) >> htmlfile; } else { printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile; } printf("

    \n") >> htmlfile; closehtml(htmlfile); } function reporttopcountries(normalized) { base = "countrydistrhist"; if (normalized) base = base "_norm"; htmlfile = base ".html"; printf("reporttopcountries(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; if (normalized) openhtml(htmlfile,"Comparison of Countries over the Years (Normalized)"); else openhtml(htmlfile,"Comparison of Countries over the Years"); # # Calculate the main results # delete reporttopcountries_seencountry; delete multifunctiontrack; for (country in topcountries) { multifunctiontrack[country] = country; } delete multifunctiontab; for (year = startyear; year <= endyear; year++) { yeardata = ""; for (country in topcountries) { n = 0; split(countrydraftslist[country],compd,"%"); for (drafti in compd) { draft = compd[drafti]; if (year == draftyears[draft]) { n++; } } if (reporttopcountries_seencountry[country] != "" || n != 0) { if (reporttopcountries_seencountry[country] == "") { if (multifunctiontab[year-1] != "") { multifunctiontab[year-1] = multifunctiontab[year-1] "%"; } multifunctiontab[year-1] = multifunctiontab[year-1] country ":0"; } if (yeardata != "") yeardata = yeardata "%"; if (year == nowyear) { if (lastyearcompletion >= minimumcompletion) { yeardata = yeardata country ":" (n * (1/lastyearcompletion)); } } else { yeardata = yeardata country ":" n; } reporttopcountries_seencountry[country] = "seen"; } } multifunctiontab[year] = yeardata; } # # Output the graphs # mavgalpha = 0.4; genericmultifunctionreport(base,htmlfile,startyear,endyear,1,normalized,mavgalpha, "Comparison of Countries over the Years","Year", normalized ? ("Percentage of " docnameu "s") : ("Number of " docnameu "s")); printf("

    This tracks publication of %ss with authors from a given country. Country data is calculated from the first occurrence of an author.\n", docname) >> htmlfile; printf("The scale is logarithmic, ") >> htmlfile; if (normalized) printf("normalized to 100%% representing sum of the top countries, ") >> htmlfile; printf("and data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile; if (lastyearcompletion >= minimumcompletion) { printf("The last year (%d) has been adjusted assuming the same publication ", nowyear) >> htmlfile; printf("rates as has happened during first part of the year (now %.2f %% complete).\n", 100 * lastyearcompletion) >> htmlfile; } else { printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile; } printf("

    \n") >> htmlfile; closehtml(htmlfile); } function reporttopcontinents(normalized) { base = "d-contdistrhist"; if (normalized) base = base "_norm"; htmlfile = base ".html"; printf("reporttopcontinents(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Comparison of Continents over the Years"); delete reporttopcontinents_seencontinent; delete multifunctiontrack; for (continent in continentdrafts) { multifunctiontrack[continent] = continent; } delete multifunctiontab; for (year = startyear; year <= endyear; year++) { yeardata = ""; for (continent in continentdrafts) { n = 0; split(continentdraftslist[continent],compd,"%"); for (drafti in compd) { draft = compd[drafti]; if (year == draftyears[draft]) { n++; } } if (reporttopcontinents_seencontinent[continent] != "" || n != 0) { if (reporttopcontinents_seencontinent[continent] == "") { if (multifunctiontab[year-1] != "") { multifunctiontab[year-1] = multifunctiontab[year-1] "%"; } multifunctiontab[year-1] = multifunctiontab[year-1] continent ":0"; } if (yeardata != "") yeardata = yeardata "%"; if (year == nowyear) { if (lastyearcompletion >= minimumcompletion) { yeardata = yeardata continent ":" (n * (1/lastyearcompletion)); } } else { yeardata = yeardata continent ":" n; } reporttopcontinents_seencontinent[continent] = "seen"; } } multifunctiontab[year] = yeardata; } mavgalpha = 0.4; genericmultifunctionreport(base,htmlfile,startyear,endyear,1,normalized,mavgalpha, "Comparison of Continents over the Years","Year", normalized ? "Percentage of " docnameu "s" : "Number of " docnameu "s"); printf("

    This tracks publication of %ss with authors from a given continent.\n", docname) >> htmlfile; printf("Location data is calculated from the first occurrence of an author.\n") >> htmlfile; printf("The scale is logarithmic, ") >> htmlfile; if (normalized) printf("normalized to 100%% representing sum of the top companies, ") >> htmlfile; printf("and data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile; if (lastyearcompletion >= minimumcompletion) { printf("The last year (%d) has been adjusted assuming the same\n", nowyear) >> htmlfile; printf("publication rates as has happened during first part of the year (now %.2f %% complete).\n", 100 * lastyearcompletion) >> htmlfile; } else { printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile; } printf("

    \n") >> htmlfile; closehtml(htmlfile); } function genericmultifunctionreport(base,htmlfile,startx,endx,islog,isnormalized,mavgalpha,title,xlabel,ylabel) { pngfile = base ".png"; datafilebase = "-" base ".dat"; gplfile = base".txt"; html_file_css_section("Statistics",htmlfile); printf("%cstatistics%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); for (track in multifunctiontrack) { trackfile = track datafilebase; printf("\n") > trackfile; close(trackfile); } # # Calculate the per-x sums, if normalization needed # delete multifunctionsum; for (i = startx; i <= endx; i++) { sum = 0; e = multifunctiontab[i]; split(e,multifunctionint,"%"); for (vi in multifunctionint) { v = multifunctionint[vi]; split(v,multifunctionint2,":"); y = multifunctionint2[2]; sum += y; } multifunctionsum[i] = sum; } # # Output the main per-track plotting data # maxy = 0; for (i = startx; i <= endx; i++) { e = multifunctiontab[i]; split(e,multifunctionint,"%"); for (vi in multifunctionint) { v = multifunctionint[vi]; split(v,multifunctionint2,":"); track = multifunctionint2[1]; y = multifunctionint2[2]; if (!(track in multifunctiontrack)) { printf("authorstats: Error: track %s is unknown at %d for %s -- exit\n", track, i, htmlfile); printf("authorstats: e = %s\n", e); printf("authorstats: vi = %s, v = %s\n", vi, v); printf("authorstats: y = %s\n", y); exit(1); } trackfile = track datafilebase; if (isnormalized) { printf("%d %6.4f\n", i, multifunctionsum[i] == 0 ? 0.0 : (y * 100.0) / multifunctionsum[i]) >> trackfile; } else { printf("%d %d\n", i, y) >> trackfile; } if (y > maxy) maxy = y; close(trackfile); } } if (mavgalpha > 0.0) { for (track in multifunctiontrack) { trackfile = track datafilebase; origtrackfile = "orig-" trackfile; cmd = sprintf("mv %c%s%c %c%s%c", 34, trackfile, 34, 34, origtrackfile, 34); #printf("doing %s...\n", cmd); system(cmd); mavg(origtrackfile,trackfile,mavgalpha); } } epsfile = gensub(/.png/, ".eps", 1, pngfile); printf("set output %c%s%c\n", 34, epsfile, 34) > gplfile; printf("set terminal postscript eps enhanced color solid font %cTimes-Roman,12%c size 5,2.5\n", 34, 34) >> gplfile; # the following produces png straight from gnuplot, but ImageMagick has # a better anti-aliaser, producing nicer pngs # printf("set output %c%s%c\n", 34, pngfile, 34) > gplfile; # printf("set terminal png interlace truecolor enhanced crop font %cTimes New Roman,11%c size 800,400\n", 34, 34) >> gplfile; printf("set style data lines\n") >> gplfile; printf("set title %c%s%c\n", 34, title, 34) >> gplfile; printf("set xlabel %c%s%c\n", 34, xlabel, 34) >> gplfile; printf("set ylabel %c%s%c\n", 34, ylabel, 34) >> gplfile; printf("set grid ytics ls 0\n") >> gplfile; if (islog) { printf("set logscale y 2\n") >> gplfile; printf("set yrange [ 1 : ]\n") >> gplfile; printf("set ytics ( %c1%c 1, %c5%c 5, %c20%c 20, %c10%c 10, %c50%c 50, %c100%c 100, %c200%c 200 )\n", 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34) >> gplfile; } printf("set key outside right\n") >> gplfile; f = 0; printf("plot ") >> gplfile; for (track in multifunctiontrack) { trackfile = track datafilebase; if (f > 0) { printf(", ") >> gplfile; } f++; printf("%c%s%c", 34, trackfile, 34) >> gplfile; printf(" with lines lt %d lw 2", f+1) >> gplfile; printf(" title %c%s%c", 34, countrycapitalize(track), 34) >> gplfile; } printf("\n") >> gplfile; close(gplfile); system("gnuplot < " gplfile); system("convert -trim -density 250 " epsfile " " pngfile); } function mavg(fromfile,tofile,alpha) { # Read data delete mavgtab1a; delete mavgtab1b; i = 1; while ((getline frome < fromfile) == 1) { if (length(frome) > 1 && split(frome,mavgtab2,"\t") == 2) { mavgtab1a[i] = mavgtab2[1]+0.0; mavgtab1b[i] = mavgtab2[2]+0.0; i++; } } n = i; close(fromfile); # Calculate moving average delete mavgtab3; mavgtab3[1] = mavgtab1b[1]; for (i = 2; i < n; i++) { mavgtab3[i] = mavgtab3[i-1] + alpha * (mavgtab1b[i] - mavgtab3[i-1]); } # Output data printf("\n") > tofile; for (i = 1; i < n; i++) { printf("%6.2f %6.2f # orig was %6.2f\n", mavgtab1a[i], mavgtab3[i], mavgtab1b[i]) >> tofile; } close(tofile); } function reportcompanies() { base = "companydistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportcompanies(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of Authors per Companies"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

    Company data is calculated from the first occurrence of an author.

    \n") >> htmlfile; closehtml(htmlfile); } function reportcountries(useeu) { if (useeu) { base = "countryeudistr"; } else { base = "countrydistr"; } htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportcountries(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of Authors per Country"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

    Location data is calculated from the first occurrence of an author.

    \n") >> htmlfile; closehtml(htmlfile); } function reportcontinents_draft() { base = "d-contdistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportcontinents_draft(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of Number of " docnameu "s per Continent"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

    Wondering why the total is greater than 100%%? %ss with ", docnameu) >> htmlfile; printf("multiple authors may be counted multiple times, if the authors are from different countries.

    \n") >> htmlfile; closehtml(htmlfile); } function reportcompanies_draft() { base = "d-companydistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportcompanies_draft(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of " docnameu "s According to the Affiliation of their Authors"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

    Wondering why the total is greater than 100%%? %ss with multiple ", docnameu) >> htmlfile; printf("authors may be counted multiple times, if the authors are from different companies.

    \n") >> htmlfile; closehtml(htmlfile); } function reportcountries_draft(useeu) { if (useeu) { base = "d-countryeudistr"; } else { base = "d-countrydistr"; } htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportcountries_draft(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of " docnameu "s According to the Countries of their Authors"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("

    %ss come from these countries, %d different countries in total. \n", docnameu, 34, 34, ncountries) >> htmlfile; printf("

    \n") >> htmlfile; printf("\n") >> htmlfile; printf("

    Wondering why the total is greater than 100%%? %ss with multiple ", docnameu) >> htmlfile; printf("authors may be counted multiple times, if the authors are from different countries.

    \n") >> htmlfile; printf("

    See also the ranking of the countries per capita.

    \n", 34, base, 34) >> htmlfile; closehtml(htmlfile); reportcountries_draft_percap(useeu); } function reportcountries_draft_percap(useeu) { if (useeu) { base = "d-countryeudistrcap"; } else { base = "d-countrydistrcap"; } htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportcountries_draft_percap(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of " docnameu "s According to the Countries of their Authors, per Capita"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("

    Source of population data is Wikipedia.

    \n") >> htmlfile; closehtml(htmlfile); } function checkpopulation(co) { if (population[co] == "" || population[co] == 0) { # prevent division by zero when offline population[co] = 1; nonfatalerror("Population for " co " is unknown"); } } function reportgender_draft() { base = "d-genderdistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; if (debug) printf("authorstats: debug: reportgender_draft begins\n"); printf("reportgender_draft(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of " docnameu "s According to Author Gender"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("

    %ss come from these genders, %d different genders in total. \n", docnameu, 34, 34, ngenders) >> htmlfile; printf("

    \n") >> htmlfile; printf("\n") >> htmlfile; printf("

    Wondering why the total is greater than 100%%? %ss with multiple ", docnameu) >> htmlfile; printf("authors may be counted multiple times, if the authors have different gender.\n") >> htmlfile; printf("Genders are determined by knowing some individuals, the rest are guessed by testing the first name via genderchecker.

    \n", 34, 34) >> htmlfile; closehtml(htmlfile); if (debug) printf("authorstats: debug: reportgender_draft ends\n"); } function reporttopgenders(normalized) { if (debug) printf("authorstats: debug: reportgender begins\n"); base = "genderdistrhist"; if (normalized) base = base "_norm"; htmlfile = base ".html"; printf("reporttopgenders(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; if (normalized) openhtml(htmlfile,"Comparison of Gender over the Years (Normalized)"); else openhtml(htmlfile,"Comparison of Gender over the Years"); # # Calculate the main results # delete reporttopgenders_seengender; delete multifunctiontrack; for (gender in topgenders) { multifunctiontrack[gender] = gender; } delete multifunctiontab; for (year = startyear; year <= endyear; year++) { yeardata = ""; for (gender in topgenders) { n = 0; split(genderdraftslist[gender],compd,"%"); for (drafti in compd) { draft = compd[drafti]; if (year == draftyears[draft]) { n++; } } if (reporttopgenders_seengender[gender] != "" || n != 0) { if (reporttopgenders_seengender[gender] == "") { if (multifunctiontab[year-1] != "") { multifunctiontab[year-1] = multifunctiontab[year-1] "%"; } multifunctiontab[year-1] = multifunctiontab[year-1] gender ":0"; } if (yeardata != "") yeardata = yeardata "%"; if (year == nowyear) { if (lastyearcompletion >= minimumcompletion) { yeardata = yeardata gender ":" (n * (1/lastyearcompletion)); } } else { yeardata = yeardata gender ":" n; } reporttopgenders_seengender[gender] = "seen"; } } multifunctiontab[year] = yeardata; } # # Output the graphs # mavgalpha = 0.4; genericmultifunctionreport(base,htmlfile,startyear,endyear,1,normalized,mavgalpha, "Comparison of Genders over the Years","Year", normalized ? ("Percentage of " docnameu "s") : ("Number of " docnameu "s")); printf("

    This tracks publication of %ss with authors with a given gender. Gender is based on the authors first name.\n", docname) >> htmlfile; printf("Genders are determined by knowing some individuals, the rest are guessed by testing the first name via genderchecker.\n", 34, 34) >> htmlfile; printf("The scale is logarithmic, ") >> htmlfile; if (normalized) printf("normalized to 100%% representing all documents, ") >> htmlfile; printf("and data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile; if (lastyearcompletion >= minimumcompletion) { printf("The last year (%d) has been adjusted assuming the same publication ", nowyear) >> htmlfile; printf("rates as has happened during first part of the year (now %.2f %% complete).\n", 100 * lastyearcompletion) >> htmlfile; } else { printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile; } printf("

    \n") >> htmlfile; closehtml(htmlfile); if (debug) printf("authorstats: debug: reportgender ends\n"); } function fatalerror(s) { printf("authorstats: Fatal error: %s -- exit\n", s); exit(1); } function nonfatalerror(s) { printf("authorstats: Error: %s\n", s); } function lastnames(s) { nk = split(s,nameparts,", "); result = lastname(nameparts[1]); for (k = 2; k <= nk; k++) { result = result ", " lastname(nameparts[k]); } return(result); } function lastname(y) { nr = split(y,lnamecomps," "); res = lnamecomps[nr]; if (res == "3rd" || res == "2nd" || res == "III" || res == "II") res = lnamecomps[nr-1]; if (substr(res,1,1) == "(") res = lnamecomps[nr-1]; return(res); } function official(s) { return(s ~ /draft-ietf/ || s ~ /draft-iab/ || s ~ /draft-rfc/ || s ~ /draft-iesg/ || s ~ /draft-irtf/); } function isinarea(draft,area) { if (isarea[area] == "") return(0); wg = getofficialwg(draft); if (wg == "") wg = getrelatedwg(draft); if (wg == "") return(0); if (iswg[wg] == "") return(0); if (wgarea[wg] != area) return(0); return(1); } function findauthorsfirstdocinyear(author,theyear) { fm = 0; thedoc = ""; herdrafts = authordrafts[author]; dn = split(herdrafts,herdraftstab,"%"); for (di = 1; di <= dn; di++) { dr = herdraftstab[di]; if (draftyears[dr] == theyear) { if (fm == 0 || fm > draftmonths[dr]) { fm = draftmonths[dr]; thedoc = dr; } } } return(thedoc); } function findauthorsfirstyear(author) { fy = 0; herdrafts = authordrafts[author]; dn = split(herdrafts,herdraftstab,"%"); for (di = 1; di <= dn; di++) { dr = herdraftstab[di]; if (fy == 0 || fy > draftyears[dr]) fy = draftyears[dr]; } return(fy); } function nicedocname(z) { gsub(/[.]txt/,"",z); gsub(/rfc/,"RFC ",z); return(z); } function reportnewauthors() { base = "newauthors"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("newauthors(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"New RFC authors over the years"); html_file_css_section("Yearly Statistics",htmlfile); printf("%cstatistics%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("
    \n") >> htmlfile; closehtml(htmlfile); } function reportauthoractivitiesperarea() { base = "authactareadistr"; htmlfile = base ".html"; printf("reportauthoractivitiesperarea(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Most active authors per area"); for (area in isarea) { html_file_css_section(area " Area", htmlfile); printf("\n") >> htmlfile; } closehtml(htmlfile); } function reporthindextop() { if (doctype == "all") { base = "hindextop"; htmlfile = base ".html"; printf("reporthindextop(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Author Impact Factors"); html_file_css_section("Highest h-Index Impact Factors",htmlfile); printf("\n") >> htmlfile; html_file_css_section("Description",htmlfile); printf("

    These impact factors are represented by h-index.\n", 34, 34) >> htmlfile; printf("They are currently calculated based only on RFC citations.\n") >> htmlfile; printf("Self-citations are currently not excluded.\n") >> htmlfile; printf("The h-index calculation software is courtesy of Lars Eggert.

    \n") >> htmlfile; html_file_css_section_nextcol("Most Citations",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } } function reportmostcited() { if (doctype == "all") { htmlfile = "mostcited.html"; printf("reportmostcited(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Most Frequently Cited RFCs"); html_file_css_section("Most Frequently Cited RFCs", htmlfile); printf("These RFCs are cited most frequently in other RFCs:\n") >> htmlfile; printf("\n") >> htmlfile; closehtml(htmlfile); } } function reportauthoractivities(iswg) { if (iswg) { base = "authactdistr-wg"; } else { base = "authactdistr"; } htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportauthoractivities(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of Authors According to How Many Documents They Have"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("

    Total number of distinct authors is %d, the different %ss together had a total of %d author entries.

    ", nauthors, docname, nauthoroccurrences) >> htmlfile; printf("\n") >> htmlfile; if (iswg) printf("

    (The percentages have been calculated against all authors, not just WG document authors.)

    \n") >> htmlfile; closehtml(htmlfile); if (!iswg) realmaxdrafts = maxdrafts; } function reportauthorcrosswg() { base = "authcrosswg"; htmlfile = base ".html"; #pngfile = base ".png"; #datafile = base ".dat"; #gplfile = base".txt"; printf("reportauthorcrosswg(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Author cross-WG activities"); #html_file_css_section("Statistics",htmlfile); #printf("%cstatistics%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function reportauthorcrossarea() { base = "authcrossarea"; htmlfile = base ".html"; #pngfile = base ".png"; #datafile = base ".dat"; #gplfile = base".txt"; printf("reportauthorcrossarea(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Author cross-area activities"); #html_file_css_section("Statistics",htmlfile); #printf("%cstatistics%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function getofficialwg(draft) { split(draft,dnamecomps,"-"); if (dnamecomps[2] == "ietf" && iswg[dnamecomps[3]]) return dnamecomps[3]; else return(""); } function getrelatedwg(draft) { split(draft,dnamecomps,"-"); if (dnamecomps[2] != "ietf" && iswg[dnamecomps[3]]) return(dnamecomps[3]); else return(""); } function reportauthors() { base = "authdistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportauthors(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of the Number of Authors per " docnameu); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function reportpubyears() { base = "pubdistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportpubyears(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; if (doctype == "draft") { openhtml(htmlfile,"Publication rate per month"); } else if (doctype == "rfc") { openhtml(htmlfile,"Publication rate per year"); html_file_css_section("Yearly Statistics",htmlfile); printf("%cstatistics%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; printf("
    \n") >> htmlfile; } pngfile = base "2.png"; datafile = base "2.dat"; gplfile = base "2.txt"; printf("\n") > datafile; for (i = startyear; i <= endyear; i++) { for (j = 1; j <= 12; j++) { n = 0; for (draft in draftyears) if (draftyears[draft] == i && draftmonths[draft] == monthnameshort[j]) n++; printf("%4.2f %d\n", i - 0.5 + (j-1)/12.0, n) >> datafile; } } close(datafile); html_file_css_section("Monthly Statistics",htmlfile); printf("%cstatistics%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; extra = "set xrange [" startyear-0.5 " : " endyear+0.5 "]\n"; extra = extra "set yrange [ 0 : ]\n"; extra = extra "set xtics "; if (endyear - startyear > 5) { extra = extra "rotate "; } extra = extra startyear ", 1, " endyear; extra = extra "\n"; gnuplot(gplfile,pngfile,datafile,"Year and Month", "Number of " docnameu "s","Publication Rate per Month", 0,extra); closehtml(htmlfile); } function reportformats() { base = "formatdistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportformats(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Document formats and features in " docname "s"); html_file_css_section("Formats", htmlfile); printf("

    These formats are used:

    \n") >> htmlfile; printf("\n") >> htmlfile; html_file_css_section_nextcol("Features", htmlfile); printf("

    These features are used within the documents:

    \n") >> htmlfile; printf("\n") >> htmlfile; printf("

    The document feature recognition is based on heuristics and is inherently unreliable.

    \n") >> htmlfile; closehtml(htmlfile); } function reportpages() { base = "pagedistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportpages(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of the Number of Pages per " docnameu); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function docgettype(t) { if (t ~ /^draft/) { return("draft"); } else { return("rfc"); } } function draftname(t) { gsub(/^draft-/,"",t); gsub(/-[0-9][0-9][.]txt/,"",t); #gsub(/^rfc/,"",t); gsub(/[.]txt/,"",t); return(t); } function reportareawgs() { base = "areawgdistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportareawgs(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of WGs to Areas"); html_file_css_section("Statistics",htmlfile); # prevent division by zero when offline (for debugging) if (debug) nareas = nareas == 0 ? 1 : nareas; printf("

    There are %d WGs and %d areas, an average of %.2f WGs/area.

    \n", nwgs, nareas, (nareas == 0 ? 0 : (1.0 * nwgs) / nareas)) >> htmlfile; printf("%careas\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; printf("

    \n") >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function splittolines(inputstring) { sub(/ /,"\\n",inputstring); sub(/ /,"\\n",inputstring); return(inputstring); } function reportwgs() { base = "wgdistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportwgs(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,"Distribution of " docname "s According to WGs"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; maxcount = 0; for (draft in isdraft) { wg = getofficialwg(isdraft[draft]); if (wg != "") { if (wg in wgcounts) { wgcounts[wg] = wgcounts[wg] + 1; } else { wgcounts[wg] = 1; } if (wgcounts[wg] > maxcount) maxcount = wgcounts[wg]; } else { wg = getrelatedwg(isdraft[draft]); if (wg != "") { if (wg in wgcountsother) { wgcountsother[wg] = wgcountsother[wg] + 1; } else { wgcountsother[wg] = 1; } } } } for (i = maxcount; i >= 0; i--) { nwgswithdrafts[i] = 0; for (wg in wgcounts) { if (wgcounts[wg] == i) { nwgswithdrafts[i] = nwgswithdrafts[i] + 1; } } } printf("\n") > datafile; html_file_css_section("Distribution of " docname " amounts", htmlfile); printf("\n") >> htmlfile; html_file_css_section("List of WGs", htmlfile); printf("\n") >> htmlfile; close(datafile); gnuplot(gplfile, pngfile, datafile, "Number of " docnameu "s", "Number of WGs", "Number of " docnameu "s per WG", 1, ""); printf("

    Definition of a related %s is that it matches the pattern draft-something-wg, ", docname) >> htmlfile; printf("where something is not ietf, and wg is the name of a working group.

    \n") >> htmlfile; closehtml(htmlfile); } function reportareas() { base = "areadistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportareas(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,docnameu " Area Distribution"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; maxcount = 0; for (draft in isdraft) { wg = getofficialwg(isdraft[draft]); if (wg != "") { area = wgarea[wg]; if (area in areacounts) { areacounts[area] = areacounts[area] + 1; } else { areacounts[area] = 1; } if (areacounts[area] > maxcount) maxcount = areacounts[area]; } else { wg = getrelatedwg(isdraft[draft]); if (wg != "") { area = wgarea[wg]; if (area in areacountsother) { areacountsother[area] = areacountsother[area] + 1; } else { areacountsother[area] = 1; } } } } printf("\n") > datafile; html_file_css_section("List of areas", htmlfile); printf("\n") >> htmlfile; printf("

    Definition of a related %s is that it matches the pattern draft-something-wg, ", docname) >> htmlfile; printf("where something is not ietf, and wg is the name of a working group.

    \n") >> htmlfile; closehtml(htmlfile); } function reportversions() { base = "versiondistr"; htmlfile = base ".html"; pngfile = base ".png"; datafile = base ".dat"; gplfile = base".txt"; printf("reportversions(%s)\n",htmlfile) >> "/tmp/htmlopendebug.txt"; openhtml(htmlfile,docnameu " Version Number Distribution"); html_file_css_section("Statistics",htmlfile); printf("%cdistribution%c/\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile; html_file_css_section("Details",htmlfile); printf("\n") >> htmlfile; closehtml(htmlfile); } function gnuplot(file, pngfile, datafile, x, y, title, continuous, morecmds) { # printf("authorstats: debug: calling gnuplot with arguments %s %s %s\n", file, pngfile, datafile); epsfile = gensub(/.png/, ".eps", 1, pngfile); printf("set output %c%s%c\n", 34, epsfile, 34) > file; printf("set terminal postscript eps enhanced color solid "\ "font %cTimes-Roman,11%c size 5,2.5\n", 34, 34) >> file; # the following produces png straight from gnuplot, but ImageMagick has # a better anti-aliaser, producing nicer pngs # printf("set output %c%s%c\n", 34, pngfile, 34) > file; # printf("set terminal png interlace truecolor enhanced crop "\ # font %cTimes New Roman,11%c size 800,400\n", 34, 34) >> file; printf("set style data boxes\n") >> file; if (!continuous) printf("set boxwidth 0.8 relative\n") >> file; if (title) printf("set title %c%s%c\n", 34, title, 34) >> file; if (x) printf("set xlabel %c%s%c\n", 34, x, 34) >> file; if (y) printf("set ylabel %c%s%c\n", 34, y, 34) >> file; printf("set grid ytics ls 0\n") >> file; printf("set style fill solid 0.8 noborder\n") >> file; printf("%s", morecmds) >> file; printf("plot %c%s%c notitle\n", 34, datafile, 34) >> file; close(file); # printf("authorstats: debug: running gnuplot on %s\n", file); system("gnuplot < " file); # printf("authorstats: debug: converting %s to %s\n", epsfile, pngfile); # system("gspsconvert " epsfile " " pngfile); system("convert -trim -density 250 " epsfile " " pngfile); } function openhtml(file,title) { printf("openhtml(%s,%s)\n",file,title) >> "/tmp/htmlopendebug.txt"; if (file == "") { printf("authorstats: Warning: Empty file name %s for title %s\n", file, title); file = "null.html"; } system("rm -f " file); html_file_css_head(title, file, "Document Stats -- What is Going on in the IETF?"); } function closehtml(file) { html_file_css_section("Data Freshness and Source",file); printf("

    This is a part of a statistics report ", 34, 34) >> file; printf("generated by authorstats\n", 34, 34) >> file; printf("on %04d-%02d-%02d.

    ", nowyear, nowmonth, nowday) >> file; printf("

    All data in this report is retrieved from public sources, such as publications published at the IETF.\n") >> file; printf("The tool and the report are independent of the IETF, however.

    \n") >> file; html_file_css_section("Data Accuracy and Privacy",file); printf("

    The data or the report may contain inaccuracies.

    \n") >> file; printf("Note that we do not hold any per-person information beyond what is visible in publicly submitted documents at the IETF.\n") >> file; printf("However, if this information is about you, we do appreciate corrections which we will try to incorporate.\n") >> file; printf("Similarly, we would be happy to remove your information if you so desire. In either case, send mail to\n") >> file; printf("jari.arkko(at)piuha.net.takeaway to contact the developer.\n") >> file; printf("Please use a subject line that includes the word authorstats.\n") >> file; printf("Our privacy policy can be found in here.

    \n", 34, 34) >> file; html_file_css_end(file); }