#!/bin/bash
###
### AUTHORSTATS - Get author statistics from Internet Drafts and RFCs
###
### Version 2.5.2
###
### Written in 2005-2008 by Jari Arkko
### Donated to the public domain.
###
### 2.5.0 Cleaned up HTML
### 2.5.1 Added support for authors moving, fixed country and company capitalization
### 2.5.2 Fixed monthly publication graph style
###
### Usage:
###
### authorstats draft-foo-00.txt ...
###
###
### Initialize
###
debug=0
doctype=draft
authordata=none
wgdata=none
oldwgdata=none
rfcdata=none
chairdata=none
iabdata=none
iaocdata=none
popdata=none
topic="active I-Ds"
tmpbase=/tmp/$$-tmp
###
### Process options
###
for i in $*
do
case x$1 in
(x--doctype)shift
doctype=$1
shift;;
(x--debug) debug=1;
shift;;
(x--data) shift;
authordata=$1;
shift;;
(x--wgs) shift
wgdata=$1;
shift;;
(x--oldwgs) shift
oldwgdata=$1;
shift;;
(x--rfcs) shift
rfcdata=$1;
shift;;
(x--chairs) shift
chairdata=$1;
shift;;
(x--iab) shift
iabdata=$1;
shift;;
(x--iaoc) shift
iaocdata=$1;
shift;;
(x--population)
shift
popdata=$1;
shift;;
(x--topic) shift
topic="$1";
shift;;
(x-*) echo 'authorstats: Unrecognized option -- exit';
exit 1;;
esac
done
###
### Get author data from the drafts
###
if [ x$authordata = xnone ]
then
authordata=${tmpbase}-a
getauthors $* > $authordata
fi
###
### Test that we have author data
###
if [ -s $authordata ]
then
ok=ok
else
echo 'There is no author data in file '$authordata' -- exit'
exit 1
fi
###
### Get WG data from the IETF site
###
if [ x$wgdata = xnone ]
then
(cd /tmp;
rm -f wg-dir.html;
wget -q http://www.ietf.org/html.charters/wg-dir.html)
wgdata=/tmp/wg-dir.html
fi
###
### Get old WG data from the IETF site
###
if [ x$oldwgdata = xnone ]
then
(cd /tmp;
rm -f oldindex.html;
wget -q -O oldindex.html http://www.ietf.org/html.charters/OLD/index.html)
oldwgdata=/tmp/oldindex.html
fi
###
### Get RFC data from the IETF tools site
###
if [ x$rfcdata = xnone ]
then
(cd /tmp;
rm -f rfc_id.txt;
wget -q http://tools.ietf.org/id/rfc_id.txt)
rfcdata=/tmp/rfc_id.txt
fi
###
### Get chair/AD data from the IETF site
###
if [ x$chairdata = xnone ]
then
(cd /tmp;
rm -f rfc_id.txt;
wget -q http://www.ietf.org/ietf/1wg-summary.txt)
chairdata=/tmp/1wg-summary.txt
fi
###
### Get IAB data from the IAB site
###
if [ x$iabdata = xnone ]
then
(cd /tmp;
rm -f rfc_id.txt;
wget -q http://www.iab.org/about/members.html)
iabdata=/tmp/members.html
fi
###
### Get IAOC data from the temporary IAOC site
###
if [ x$iaocdata = xnone ]
then
(cd /tmp;
rm -f index.html
#wget -q http://koi.uoregon.edu/~iaoc/index.html
)
iaocdata=/tmp/index.html
fi
###
### Get country population data from the wikipedia site
###
if [ x$popdata = xnone ]
then
(cd /tmp;
rm -rf List_of_countries_by_population List_of_countries_by_population.html List_of_countries_by_population.files;
wget -q http://en.wikipedia.org/wiki/List_of_countries_by_population)
popdata=/tmp/List_of_countries_by_population
fi
###
### Process WG data
###
wgs=/tmp/wgdata.txt
rm -f $wgs
if [ $debug = 1 ]
then
echo authorstats: parsing wg data
fi
awk < $wgdata '
BEGIN {
area = "";
}
/.* Area<.h2><.a>$/ {
i1 = index($0,"
");
area = substr($0,i1+4);
i2 = index(area," Area<");
area = substr(area,1,i2 - 1);
next;
}
/-charter.html/ {
if (area != "") {
i1 = index($0,"html.charters/");
wg = substr($0,i1 + 5 + 8 + 1);
i2 = index(wg,"-charter.html");
wg = substr(wg,1,i2 - 1);
printf("0-wgareadefinition:%s:%s\n", area, wg);
}
}
/.*/ {
next;
}
END {
}
' > $wgs
###
### Process old WG data
###
oldwgs=/tmp/oldindex.txt
rm -f $oldwgs
if [ $debug = 1 ]
then
echo authorstats: parsing old wg data
fi
awk < $oldwgdata '
BEGIN {
area = "";
}
/.* Area<.[hH]2>/ {
i1 = index($0,"
\n") >> file;
for (f = realmaxdrafts; f >= 1; f--) {
for (author in isauthor) {
if (isauthorcountry[author] == country && authorndrafts[author] == f) {
hisdrafts = authordrafts[author];
n = split(hisdrafts,hisdraftstab,"%");
if (n >= activeauthor) {
printf("
\n") >> file;
for (f = realmaxdrafts; f >= 1; f--) {
for (author in isauthorcompany) {
if (isauthorcompany[author] == company && authorndrafts[author] == f) {
if (f >= activeauthor) {
printf("
\n") >> file;
reportlistofdocs("The authors from " company " have", n, compd, file);
closehtml(file);
}
function reportindividualauthors(topfile) {
openhtml(topfile,activeauthor == 1 ? "All authors" : "Most active authors");
printf("
\n") >> topfile;
topfile = startsort(topfile);
for (author in isauthor) {
hisdrafts = authordrafts[author];
#printf("authorstats: debug: Reporting active %s with drafts %s...\n", author, hisdrafts);
n = split(hisdrafts,hisdraftstab,"%");
if (n >= activeauthor) {
file = tofname(author);
reportindividualauthor(author,n,hisdraftstab,file);
printf("
The working group%s where " pron " is active appear to be ",".\n",file);
}
if (authorchairs[author] != "") {
printwglist(authorchairs[author],capitalize(pron) " chairs the "," working group%s.\n",file);
}
if (authorad[author] != "") {
adstats = sprintf("(click here for statistics related to this role)",
34, capitalize(authornameparts[2]), capitalize(authornameparts[1]), 34);
printf(capitalize(pron) " is an area director for the %s area %s.\n", authorad[author], adstats) >> file;
}
if (authoriab[author] != "") {
printf(capitalize(pron) " is in the IAB.\n") >> file;
}
if (authoriaoc[author] != "") {
printf(capitalize(pron) " is in the IAOC.\n") >> file;
}
printf("
\n") >> file;
reportlistofdocs(capitalize(pron) " has", n, hisdraftstab, file);
closehtml(file);
}
function reportlistofdocs(who,n,hisdraftstab,file) {
if (doctype == "all") {
nd = 0;
nr = 0;
for (adraft in hisdraftstab) {
if (docgettype(hisdraftstab[adraft]) == "rfc") {
nr++;
} else {
nd++;
}
}
#printf("authorstats: debug: Reporting author with nr=%d and nd=%d...\n", nr, nd);
html_file_css_section("RFCs",file);
if (nr == 0) {
printf("
%s no RFCs.
\n", who) >> file;
} else {
printf("
%s the following %d RFCs:
\n", who, nr) >> file;
printf("
\n") >> file;
system("rm -f docslisttmp.txt");
system("touch docslisttmp.txt");
for (adraft in hisdraftstab) {
if (docgettype(hisdraftstab[adraft]) == "rfc") {
printf("
The distribution of %s ", docname) >> file;
if (doctype == "draft") {
printf("according to version numbers is shown here,\n",
34, 34) >> file;
reportversions();
}
printf("and according to number of authors here,\n",
34, 34) >> file;
reportauthors();
printf("%s page count distribution looks like this.\n", docnameu, 34, 34) >> file;
reportpages();
printf("The used document format types and content features are shown here.\n", 34, 34) >> file;
reportformats();
if (doctype == "rfc") {
printf("The growth of publication rate per year is shown here.\n", 34, 34) >> file;
reportpubyears();
} else if (doctype == "draft") {
printf("The publication rate per month is shown here.\n", 34, 34) >> file;
reportpubyears();
} else {
# do nothing
}
printf("
The distribution of authors according to the number of %ss\n", docname) >> file;
printf("they have is shown here.\n", 34, 34) >> file;
reportauthoractivities(0);
if (doctype == "draft" || doctype == "all") {
printf("The same distribution for IETF (or other official) %ss is shown here.\n",
docname, 34, 34) >> file;
reportauthoractivities(1);
printf("The authors work in one or multiple WGs according this distribution.\n", 34, 34) >> file;
reportauthorcrosswg();
printf("The same distribution for cross-area work is here.\n", 34, 34) >> file;
reportauthorcrossarea();
printf("Most active authors per area can be seen here.\n",
34, 34) >> file;
reportauthoractivitiesperarea();
}
printf("
\n") >> file;
if (doctype == "draft" || doctype == "all") {
html_file_css_section("Areas and WGs", file);
printf("
The distribution of WGs per areas is shown in here.\n",
34, 34) >> file;
reportareawgs();
printf("The distribution of %ss according to WGs is shown here,\n",
docname, 34, 34) >> file;
reportwgs();
printf("and according to areas is shown here.\n",
34, 34) >> file;
reportareas();
printf("
Authors come from these companies, %d different companies in total. \n",
34, 34, ncompanies) >> file;
reportcompanies();
if (endyear - startyear > 2 && topic != "active I-Ds") {
printf("Over the years, the situation has changed like this\n",
34, 34) >> file;
reporttopcompanies(0);
# Normalization: OK
printf(" (normalized).\n",
34, 34) >> file;
reporttopcompanies(1);
}
printf("
\n") >> file;
html_file_css_section_nextcol("Countries and Continents", file);
reportcountries_draft(0);
if (endyear - startyear > 2 && topic != "active I-Ds") {
printf("Over the years, the situation has developed like this",
34, 34) >> file;
reporttopcountries(0);
# Normalization: OK
printf(" (normalized)",
34, 34) >> file;
reporttopcountries(1);
printf(".\n") >> file;
}
printf("Considering EU as a country, the distribution would look like this instead.\n",
34, 34) >> file;
reportcountries_draft(1);
printf("The same distribution for continents can be found here.\n", 34, 34) >> file;
reportcontinents_draft();
if (endyear - startyear > 2 && topic != "active I-Ds") {
printf("Over the years, the situation has developed like this\n",
34, 34) >> file;
reporttopcontinents(0);
# Normalization: OK
printf(" (normalized)",
34, 34) >> file;
reporttopcontinents(1);
printf(".\n") >> file;
}
printf("
Authors come from these countries.\n",
34, 34, ncountries) >> file;
reportcountries(0);
printf("Considering EU as a country, the distribution would look like this instead.\n",
34, 34) >> file;
reportcountries(1);
printf("The same distribution for continents can be found here.\n", 34, 34) >> file;
reportcontinents();
printf("
All active authors can be seen here.\n", 34, 34) >> file;
printf("Countries can be seen here.\n", 34, 34) >> file;
printf("And active companies can be seen here.
Out of the total %d %ss, %d or %.2f%% are unparseable by this tool.\n",
nalldrafts, docname, nalldrafts - ndrafts, (100.0 * (nalldrafts - ndrafts)) / nalldrafts) >> file;
printf("See here for a list of %ss with problems.\n", 34, 34, docname) >> file;
printf("Out of a total of %d authors entries, %d or %.2f%% had limited information. ",
nauthoroccurrences, nbadauthorlines,
(100.0 * nbadauthorlines) / nauthoroccurrences) >> file;
printf("See here for a list of authors with problems. \n", 34, 34) >> file;
printf("See here for a list of %ss with incomplete or non-working e-mail addresses. \n", 34, 34, docname) >> file;
printf("Warning: See the tool description for detailed information about what can or can not be assumed about the quality of the results.
\n", 34, 34) >> file;
closehtml(file);
}
function reportcontinents() {
base = "contdistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of authors on continents");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
\n") >> htmlfile;
continentauthors["antarctica"] = 0;
for (country in countryauthors) {
continent = continentof[country];
if (continent == "") {
printf("authorstats: Warning - Country %s unrecognized (authors %s)\n", country, countryauthors[country]);
continent = "OTHER";
}
if (continentauthors[continent] == "") continentauthors[continent] = 0;
continentauthors[continent] = continentauthors[continent] + countryauthors[country];
}
nth = 1;
maxn = 0;
printf("\n") > datafile;
extra = "set xtics rotate (";
for (continent in continentauthors) {
if (continentauthors[continent] > 0 || continent != "OTHER") {
if (maxn < continentauthors[continent]) maxn = continentauthors[continent];
printf("
\n",
nunknowncountry, (100.0 * nunknowncountry) / nauthors) >> htmlfile;
printf("%d %d\n", nth, nunknowncountry) >> datafile;
extra = extra quote "(unknown)" quote " " nth ")\n";
#extra = extra " font " quote "Times-Roman,40" quote "\n";
extra = extra "set xrange [0 : " nth + 1 "]\n";
extra = extra "set yrange [0 : " roundup(maxn) "]\n";
basicgnuplotsettings(gplfile,epsfile,datafile,"Continent","# of Authors","Number of authors per continent",
0,extra);
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
printf("
Location data is calculated from the first occurrence of an author.
\n") >> htmlfile;
closehtml(htmlfile);
}
function reporttopcompanies(normalized) {
base = "companydistrhist";
if (normalized) base = base "_norm";
htmlfile = base ".html";
openhtml(htmlfile,"Comparison of companies over the years");
delete reporttopcompanies_seencompany;
delete multifunctiontrack;
for (company in topcompanies) {
multifunctiontrack[company] = company;
}
delete multifunctiontab;
for (year = startyear; year <= endyear; year++) {
yeardata = "";
for (company in topcompanies) {
n = 0;
split(companydraftslist[company],compd,"%");
for (drafti in compd) {
draft = compd[drafti];
if (year == draftyears[draft]) {
n++;
}
}
if (reporttopcompanies_seencompany[company] != "" ||
n != 0) {
if (reporttopcompanies_seencompany[company] == "") {
if (multifunctiontab[year-1] != "") {
multifunctiontab[year-1] = multifunctiontab[year-1] "%";
}
multifunctiontab[year-1] = multifunctiontab[year-1] company ":0";
}
if (yeardata != "") yeardata = yeardata "%";
if (year == nowyear) {
if (lastyearcompletion >= minimumcompletion) {
yeardata = yeardata company ":" (n * (1/lastyearcompletion));
}
} else {
yeardata = yeardata company ":" n;
}
reporttopcompanies_seencompany[company] = "seen";
}
}
multifunctiontab[year] = yeardata;
}
mavgalpha = 0.4;
genericmultifunctionreport(base,htmlfile,startyear,endyear,0,normalized,mavgalpha,
"Comparison of companies over the years","Year",
normalized ? "% of " docnameu "s" : "# of " docnameu "s");
printf("
This tracks publication of %ss with authors from most active companies per year. Company data is calculated from the first occurrence of an author. ISI has been excluded from this graph, to make it clearer.\n", docname) >> htmlfile;
if (normalized) printf("The graph is normalized to 100%% representing sum of the top companies.\n") >> htmlfile;
printf("The data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile;
if (lastyearcompletion >= minimumcompletion) {
printf("The last year (%d) has been adjusted assuming the same publication rates as has happened during first part of the year (now %.2f %% complete).\n", nowyear, 100 * lastyearcompletion) >> htmlfile;
} else {
printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile;
}
printf("
\n") >> htmlfile;
closehtml(htmlfile);
}
function reporttopcountries(normalized) {
base = "countrydistrhist";
if (normalized) base = base "_norm";
htmlfile = base ".html";
if (normalized)
openhtml(htmlfile,"Comparison of countries over the years");
else
openhtml(htmlfile,"Comparison of countries over the years (normalized)");
#
# Calculate the main results
#
delete reporttopcountries_seencountry;
delete multifunctiontrack;
for (country in topcountries) {
multifunctiontrack[country] = country;
}
delete multifunctiontab;
for (year = startyear; year <= endyear; year++) {
yeardata = "";
for (country in topcountries) {
n = 0;
split(countrydraftslist[country],compd,"%");
for (drafti in compd) {
draft = compd[drafti];
if (year == draftyears[draft]) {
n++;
}
}
if (reporttopcountries_seencountry[country] != "" ||
n != 0) {
if (reporttopcountries_seencountry[country] == "") {
if (multifunctiontab[year-1] != "") {
multifunctiontab[year-1] = multifunctiontab[year-1] "%";
}
multifunctiontab[year-1] = multifunctiontab[year-1] country ":0";
}
if (yeardata != "") yeardata = yeardata "%";
if (year == nowyear) {
if (lastyearcompletion >= minimumcompletion) {
yeardata = yeardata country ":" (n * (1/lastyearcompletion));
}
} else {
yeardata = yeardata country ":" n;
}
reporttopcountries_seencountry[country] = "seen";
}
}
multifunctiontab[year] = yeardata;
}
#
# Output the graphs
#
mavgalpha = 0.4;
genericmultifunctionreport(base,htmlfile,startyear,endyear,1,normalized,mavgalpha,
"Comparison of countries over the years","Year",
normalized ? ("% of " docnameu "s") : ("# of " docnameu "s"));
printf("
This tracks publication of %ss with authors from a given country. Country data is calculated from the first occurrence of an author.\n",
docname) >> htmlfile;
printf("The scale is logarithmic, ") >> htmlfile;
if (normalized) printf("normalized to 100%% representing sum of the top countries, ") >> htmlfile;
printf("and data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile;
if (lastyearcompletion >= minimumcompletion) {
printf("The last year (%d) has been adjusted assuming the same publication rates as has happened during first part of the year (now %.2f %% complete).\n", nowyear, 100 * lastyearcompletion) >> htmlfile;
} else {
printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile;
}
printf("
\n") >> htmlfile;
closehtml(htmlfile);
}
function reporttopcontinents(normalized) {
base = "d-contdistrhist";
if (normalized) base = base "_norm";
htmlfile = base ".html";
openhtml(htmlfile,"Comparison of continents over the years");
delete reporttopcontinents_seencontinent;
delete multifunctiontrack;
for (continent in continentdrafts) {
multifunctiontrack[continent] = continent;
}
delete multifunctiontab;
for (year = startyear; year <= endyear; year++) {
yeardata = "";
for (continent in continentdrafts) {
n = 0;
split(continentdraftslist[continent],compd,"%");
for (drafti in compd) {
draft = compd[drafti];
if (year == draftyears[draft]) {
n++;
}
}
if (reporttopcontinents_seencontinent[continent] != "" ||
n != 0) {
if (reporttopcontinents_seencontinent[continent] == "") {
if (multifunctiontab[year-1] != "") {
multifunctiontab[year-1] = multifunctiontab[year-1] "%";
}
multifunctiontab[year-1] = multifunctiontab[year-1] continent ":0";
}
if (yeardata != "") yeardata = yeardata "%";
if (year == nowyear) {
if (lastyearcompletion >= minimumcompletion) {
yeardata = yeardata continent ":" (n * (1/lastyearcompletion));
}
} else {
yeardata = yeardata continent ":" n;
}
reporttopcontinents_seencontinent[continent] = "seen";
}
}
multifunctiontab[year] = yeardata;
}
mavgalpha = 0.4;
genericmultifunctionreport(base,htmlfile,startyear,endyear,1,normalized,mavgalpha,
"Comparison of continents over the years","Year",
normalized ? "% of " docnameu "s" : "# of " docnameu "s");
printf("
This tracks publication of %ss with authors from a given continent.\n",
docname) >> htmlfile;
printf("Location data is calculated from the first occurrence of an author.\n") >> htmlfile;
printf("The scale is logarithmic, ") >> htmlfile;
if (normalized) printf("normalized to 100%% representing sum of the top companies, ") >> htmlfile;
printf("and data has been smoothed using an exponential moving average with alpha = %.2f.\n", mavgalpha) >> htmlfile;
if (lastyearcompletion >= minimumcompletion) {
printf("The last year (%d) has been adjusted assuming the same\n",
nowyear) >> htmlfile;
printf("publication rates as has happened during first part of the year (now %.2f %% complete).\n",
100 * lastyearcompletion) >> htmlfile;
} else {
printf("The last year (%d) has been excluded because of lack of sufficiently reliable data.\n", nowyear) >> htmlfile;
}
printf("
\n") >> htmlfile;
closehtml(htmlfile);
}
function genericmultifunctionreport(base,htmlfile,startx,endx,islog,isnormalized,mavgalpha,title,xlabel,ylabel) {
pngfile = base ".png";
epsfile = base ".eps";
datafilebase = "-" base ".dat";
gplfile = base".txt";
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, pngfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
for (track in multifunctiontrack) {
trackfile = track datafilebase;
printf("\n") > trackfile;
}
#
# Calculate the per-x sums, if normalization needed
#
delete multifunctionsum;
for (i = startx; i <= endx; i++) {
sum = 0;
e = multifunctiontab[i];
split(e,multifunctionint,"%");
for (vi in multifunctionint) {
v = multifunctionint[vi];
split(v,multifunctionint2,":");
y = multifunctionint2[2];
sum += y;
}
multifunctionsum[i] = sum;
}
#
# Output the main per-track plotting data
#
maxy = 0;
for (i = startx; i <= endx; i++) {
e = multifunctiontab[i];
split(e,multifunctionint,"%");
for (vi in multifunctionint) {
v = multifunctionint[vi];
split(v,multifunctionint2,":");
track = multifunctionint2[1];
y = multifunctionint2[2];
if (!(track in multifunctiontrack)) {
printf("authorstats: Error: track %s is unknown at %d for %s -- exit\n", track, i, htmlfile);
exit(1);
}
trackfile = track datafilebase;
if (isnormalized) {
printf("%d %6.4f\n",
i,
multifunctionsum[i] == 0 ? 0.0 : (y * 100.0) / multifunctionsum[i]) >> trackfile;
} else {
printf("%d %d\n", i, y) >> trackfile;
}
if (y > maxy) maxy = y;
}
}
for (track in multifunctiontrack) {
trackfile = track datafilebase;
close(trackfile);
}
if (mavgalpha > 0.0) {
for (track in multifunctiontrack) {
trackfile = track datafilebase;
origtrackfile = "orig-" trackfile;
cmd = sprintf("mv %c%s%c %c%s%c", 34, trackfile, 34, 34, origtrackfile, 34);
#printf("doing %s...\n", cmd);
system(cmd);
mavg(origtrackfile,trackfile,mavgalpha);
}
}
printf("set output %c%s%c\n", 34, epsfile, 34) > gplfile;
#printf("set terminal png\n") >> gplfile;
printf("set terminal postscript eps enhanced %cTimes-Roman%c 28 color solid\n", 34, 34) >> gplfile;
printf("set data style lines\n") >> gplfile;
#printf("set data style linespoints\n") >> gplfile;
#printf("set style linespoints linewidth 4\n") >> gplfile;
printf("set title %c%s%c\n", 34, title, 34) >> gplfile;
printf("set xlabel %c%s%c\n", 34, xlabel, 34) >> gplfile;
printf("set ylabel %c%s%c\n", 34, ylabel, 34) >> gplfile;
printf("set size 2.7,2.5\n") >> gplfile;
#printf("set size 1.7,1.5\n") >> gplfile;
if (islog) {
printf("set logscale y 2\n") >> gplfile;
if (isnormalized) {
printf("set yrange [ 0.5 : 100]\n") >> gplfile;
} else {
printf("set yrange [ 1 : %d]\n", roundup(maxy)) >> gplfile;
}
printf("set ytics ( %c1%c 1, %c5%c 5, %c20%c 20, %c10%c 10, %c50%c 50, %c100%c 100, %c200%c 200 )\n",
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34) >> gplfile;
}
printf("set key top left\n") >> gplfile;
printf("set pointsize 2\n") >> gplfile;
#printf("set style fill solid\n") >> gplfile;
printf("set view ,,2\n") >> gplfile;
f = 0;
printf("plot ") >> gplfile;
for (track in multifunctiontrack) {
trackfile = track datafilebase;
if (f > 0) {
printf(", ") >> gplfile;
}
f++;
printf("%c%s%c", 34, trackfile, 34) >> gplfile;
#printf(" smooth cspline") >> gplfile;
printf(" with lines lt %d lw 4", f+1) >> gplfile;
printf(" title %c%s%c", 34, countrycapitalize(track), 34) >> gplfile;
}
printf("\n") >> gplfile;
plottopng(gplfile,epsfile,pngfile);
}
function mavg(fromfile,tofile,alpha) {
# Read data
delete mavgtab1a;
delete mavgtab1b;
i = 1;
while ((getline frome < fromfile) == 1) {
if (length(frome) > 1 &&
split(frome,mavgtab2,"\t") == 2) {
mavgtab1a[i] = mavgtab2[1]+0.0;
mavgtab1b[i] = mavgtab2[2]+0.0;
i++;
}
}
n = i;
# Calculate moving average
delete mavgtab3;
mavgtab3[1] = mavgtab1b[1];
for (i = 2; i < n; i++) {
mavgtab3[i] = mavgtab3[i-1] + alpha * (mavgtab1b[i] - mavgtab3[i-1]);
}
# Output data
printf("\n") > tofile;
for (i = 1; i < n; i++) {
printf("%6.2f %6.2f # orig was %6.2f\n", mavgtab1a[i], mavgtab3[i], mavgtab1b[i]) >> tofile;
}
}
function reportcompanies() {
base = "companydistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of authors from companies");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
\n") >> htmlfile;
maxauthors = 0;
for (company in companyauthors) if (companyauthors[company] > maxauthors) maxauthors = companyauthors[company];
nth = 1;
extra = "set xtics rotate (";
lasti = 0;
printf("\n") > datafile;
for (i = maxauthors; i >= 1; i--) {
for (company in companyauthors) {
if (companyauthors[company] == i) {
printf("
\n",
34, hrefprefix, tofname_comp(company), 34, companycapitalize(company),
i, (100.0 * i) / nauthors) >> htmlfile;
if (nth <= ntopcompanies + 1 && company != "isi") {
topcompanies[company] = company;
}
if (nth < 40) {
printf("%d %d\n", nth, i) >> datafile;
cot = companycapitalize(company);
sub(/ /,"\\n",cot);
extra = extra quote cot quote " " nth ", ";
nth++;
} else {
if (i > lasti) lasti = i;
}
}
}
}
if (lasti > 0) {
printf("%d %d\n", nth, 2) >> datafile;
extra = extra quote "(1-" lasti " authors)" quote " " nth++ ", ";
}
printf("
unknown author company for %d authors (%.2f%%).
\n",
nunknowncompany, (100.0 * nunknowncompany) / nauthors) >> htmlfile;
printf("%d %d\n", nth, nunknowncompany) >> datafile;
extra = extra quote "(unknown)" quote " " nth ")";
extra = extra " font " quote "Times-Roman,30" quote "\n";
#extra = extra "set xrange [0 : " nth + 1 "]\n";
extra = extra "set yrange [0 : " roundup(maxauthors) "]\n";
extra = extra "set xrange [0.5 : " nth+0.5 "]\n";
basicgnuplotsettings(gplfile,epsfile,datafile,
"Company","# of Authors","Number of authors in a company",
0,extra);
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
printf("
Company data is calculated from the first occurrence of an author.
\n") >> htmlfile;
closehtml(htmlfile);
}
function reportcountries(useeu) {
if (useeu) {
base = "countryeudistr";
} else {
base = "countrydistr";
}
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of authors from countries");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
\n") >> htmlfile;
if (useeu) maxauthors = euauthors; else maxauthors = 0;
for (country in countryauthors) if (countryauthors[country] > maxauthors) maxauthors = countryauthors[country];
nth = 1; printf("\n") > datafile;
seeneu = 0;
extra = "set xtics rotate (";
for (i = maxauthors; i >= 1; i--) {
for (country in countryauthors) {
na = countryauthors[country];
co = country;
if (useeu && isineu[country]) {
if (seeneu) {
continue;
} else {
co = "european union";
na = euauthors;
seeneu = (na == i);
}
}
if (na == i) {
printf("
%d authors (%.2f%%) come from %s",
i, (100.0 * i) / nauthors,
34, hrefprefix, tofname(co), 34, countrycapitalize(co)) >> htmlfile;
if (i <= 5 && co != "european union") {
printf(" (%s)", lastnames(countryauthorslist[country])) >> htmlfile;
}
printf(".
\n",
nunknowncountry, (100.0 * nunknowncountry) / nauthors) >> htmlfile;
printf("%d %d\n", nth, nunknowncountry) >> datafile;
extra = extra quote "(unknown)" quote " " nth ")";
if (useeu) {
extra = extra " font " quote "Times-Roman,30" quote "\n";
} else {
extra = extra " font " quote "Times-Roman,24" quote "\n";
}
extra = extra "set xrange [0 : " nth + 1 "]\n";
extra = extra "set yrange [0 : " maxauthors "]\n";
basicgnuplotsettings(gplfile,epsfile,datafile,"Country","# of Authors","Number of authors in a country",0,extra);
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
printf("
Location data is calculated from the first occurrence of an author.
\n") >> htmlfile;
closehtml(htmlfile);
}
function reportcontinents_draft() {
base = "d-contdistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of " docname "s according to the continents of their authors");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
\n") >> htmlfile;
continentauthors["antarctica"] = 0;
for (country in countryauthors) {
continent = continentof[country];
if (continent == "") continent = "OTHER";
if (continentdrafts[continent] == "") continentdrafts[continent] = 0;
continentdrafts[continent] = continentdrafts[continent] + countrydrafts[country];
if (countrydraftslist[country] != "") {
if (continentdraftslist[continent] != "") continentdraftslist[continent] = continentdraftslist[continent] "%";
continentdraftslist[continent] = continentdraftslist[continent] countrydraftslist[country];
}
}
i = 0;
maxn = 0;
printf("\n") > datafile;
extra = "set xtics rotate (";
for (continent in continentdrafts) {
if (continentdrafts[continent] > 0 || continent != "OTHER") {
if (maxn < continentdrafts[continent]) maxn = continentdrafts[continent];
printf("
%d %ss (%.2f%%) have authors from %s.
\n",
continentdrafts[continent],
docname,
(100.0 * continentdrafts[continent]) / ndrafts,
countrycapitalize(continent)) >> htmlfile;
printf("%d %d\n", i, continentdrafts[continent]) >> datafile;
continentt = countrycapitalize(continent);
sub(/ /,"\\n",continentt);
extra = extra quote continentt quote " " i ", ";
i++;
}
}
extra = extra quote "(unknown)" quote " " i ")\n";
#extra = extra " font " quote "Times-Roman,40" quote "\n";
extra = extra "set xrange [-1 : " i + 1 "]\n";
extra = extra "set yrange [0 : " roundup(maxn) "]\n";
basicgnuplotsettings(gplfile,epsfile,datafile,"Continent","# of " docnameu "s","Continent",0,extra);
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
printf("
Wondering why the total is greater than 100%%? %ss with multiple authors may be counted multiple times, if the authors are from different countries.
\n", docnameu) >> htmlfile;
closehtml(htmlfile);
}
function reportcompanies_draft() {
base = "d-companydistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of " docname "s according to the affiliation of their authors");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
\n") >> htmlfile;
maxdrafts = 0;
for (company in companydrafts) {
n = split(companydrafts[company],compd,"%");
if (n > maxdrafts) maxdrafts = n;
}
nth = 1; printf("\n") > datafile;
for (i = maxdrafts; i >= 1; i--) {
for (company in companydrafts) {
if (companydrafts[company] == i) {
n = split(companydrafts[company],compd,"%");
printf("
\n",
34, hrefprefix, tofname_comp(company), 34, companycapitalize(company),
i, (100.0 * i) / ndrafts) >> htmlfile;
printf("%d %d\n", nth++, i) >> datafile;
}
}
}
basicgnuplotsettings(gplfile,epsfile,datafile,"Company","# of " docnameu,"Number of " docname "s by a company",0,"");
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
printf("
Wondering why the total is greater than 100%%? %ss with multiple authors may be counted multiple times, if the authors are from different companies.
\n", docnameu) >> htmlfile;
closehtml(htmlfile);
}
function reportcountries_draft(useeu) {
if (useeu) {
base = "d-countryeudistr";
} else {
base = "d-countrydistr";
}
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of " docname "s according to the countries of their authors");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
%ss come from these countries, %d different countries in total. \n",
docnameu, 34, 34, ncountries) >> htmlfile;
printf("
\n") >> htmlfile;
printf("
\n") >> htmlfile;
if (useeu) maxdrafts = eudrafts; else maxdrafts = 0;
for (country in countryauthors) if (countrydrafts[country] > maxdrafts) maxdrafts = countrydrafts[country];
nth = 1; printf("\n") > datafile;
seeneu = 0;
extra = "set xtics rotate (";
for (i = maxdrafts; i >= 1; i--) {
for (country in countrydrafts) {
nd = countrydrafts[country];
co = country;
if (useeu && isineu[country]) {
if (seeneu) {
continue;
} else {
co = "european union";
nd = eudrafts;
seeneu = (nd == i);
}
}
if (nd == i) {
printf("
%d %ss (%.2f%%) have authors from %s",
i, docname, (100.0 * i) / ndrafts,
34, hrefprefix, tofname(co), 34, countrycapitalize(co)) >> htmlfile;
if (nth < ntopcountries && !useeu) {
topcountries[co] = co;
}
if (i <= 2 && co != "european union") {
v = countrydraftslist[country];
split(v,draftc,"%");
printf(" (") >> htmlfile;
commayet = 0;
for (g in draftc) {
if (commayet) {
printf(", ") >> htmlfile;
} else {
commayet = 1;
}
printf("%s", 34, docprefix, draftc[g], 34, draftc[g]) >> htmlfile;
}
printf(")", v) >> htmlfile;
}
printf(".
\n") >> htmlfile;
printf("%d %d\n", nth, i) >> datafile;
cot = countrycapitalize(coname(co));
#sub(/ /,"\\n",cot);
extra = extra quote cot quote " " nth ", ";
nth++;
}
}
}
extra = extra quote "(unknown)" quote " " nth ")";
if (useeu) {
extra = extra " font " quote "Times-Roman,30" quote "\n";
} else {
extra = extra " font " quote "Times-Roman,20" quote "\n";
}
extra = extra "set xrange [0 : " nth + 1 "]\n";
extra = extra "set yrange [0 : " maxdrafts "]\n";
basicgnuplotsettings(gplfile,epsfile,datafile,"","# of " docnameu "s",
"Number of " docname "s with authors from a country",
0,extra);
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
printf("
Wondering why the total is greater than 100%%? %ss with multiple authors may be counted multiple times, if the authors are from different countries.
\n",
34, base, 34) >> htmlfile;
closehtml(htmlfile);
reportcountries_draft_percap(useeu);
}
function reportcountries_draft_percap(useeu) {
if (useeu) {
base = "d-countryeudistrcap";
} else {
base = "d-countrydistrcap";
}
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of " docname "s according to the countries of their authors, per capita");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
\n") >> htmlfile;
printf("\n") > datafile;
if (useeu) {
#printf("rcdpc eu\n");
checkpopulation("european union");
maxdrafts = eudrafts / (1.0 * population["european union"]);
#printf("max document density is %f for %s\n", maxdrafts, "european union");
} else {
maxdrafts = 0.0;
}
for (country in countryauthors) {
#printf("rcdpc2 %s\n", country, countryauthors[country]);
checkpopulation(country);
forthis = 1000 * 1000 * countrydrafts[country] / (1.0 * population[country]);
#printf("%s: %d at %d (%f)\n", country, countrydrafts[country], population[country], forthis);
if (forthis > maxdrafts) {
maxdrafts = forthis;
#printf("max document density is %f for %s\n", maxdrafts, country);
}
}
seeneu = 0;
nth = 1;
extra = "set xtics rotate (";
delta = maxdrafts/500.0;
for (i = maxdrafts; delta > 0.0 && i >= -delta; i -= delta) {
#printf("stepping at %.20f\n", i);
for (country in countrydrafts) {
#printf("testing %s\n", country);
nd = countrydrafts[country];
co = country;
if (useeu && isineu[country]) {
if (seeneu) {
#printf("already seen eu\n");
continue;
} else {
co = "european union";
nd = eudrafts;
#printf("testing now eu\n");
}
}
nd = nd;
forthis = 1000 * 1000 * nd / (1.0 * population[co]);
if (forthis < i + delta && forthis >= i) {
printf("
%.2f %ss per million people from %s (%.2f million people)",
forthis, docname,
34, hrefprefix, tofname(co), 34, countrycapitalize(co),
population[co] / (1000 * 1000)) >> htmlfile;
printf(".
\n") >> htmlfile;
printf("%d %f\n", nth, forthis) >> datafile;
cot = countrycapitalize(coname(co));
#sub(/ /,"\\n",cot);
extra = extra quote cot quote " " nth ", ";
nth++;
if (useeu && co == "european union") seeneu = 1;
}
}
}
extra = extra quote "(unknown)" quote " " nth ")";
if (useeu) {
extra = extra " font " quote "Times-Roman,30" quote "\n";
} else {
extra = extra " font " quote "Times-Roman,20" quote "\n";
}
extra = extra "set xrange [0 : " nth + 1 "]\n";
extra = extra "set yrange [0 : " maxdrafts "]\n";
basicgnuplotsettings(gplfile,epsfile,datafile,"",
"# of " docnameu "s per million capita",
"Number of " docname "s with authors from a country, per capita",
0,extra);
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
printf("
Source of population data is Wikipedia.
\n") >> htmlfile;
closehtml(htmlfile);
}
function checkpopulation(co) {
if (population[co] == "" ||
population[co] == 0) {
fatalerror("Population for " co " is unknown");
}
}
function fatalerror(s) {
printf("authorstats: Fatal error: %s -- exit\n", s);
exit(1);
}
function lastnames(s) {
nk = split(s,nameparts,", ");
result = lastname(nameparts[1]);
for (k = 2; k <= nk; k++) {
result = result ", " lastname(nameparts[k]);
}
return(result);
}
function lastname(y) {
nr = split(y,lnamecomps," ");
res = lnamecomps[nr];
if (res == "3rd" || res == "2nd" || res == "III" || res == "II") res = lnamecomps[nr-1];
if (substr(res,1,1) == "(") res = lnamecomps[nr-1];
return(res);
}
function official(s) {
return(s ~ /draft-ietf/ ||
s ~ /draft-iab/ ||
s ~ /draft-rfc/ ||
s ~ /draft-iesg/ ||
s ~ /draft-irtf/);
}
function isinarea(draft,area) {
if (isarea[area] == "") return(0);
wg = getofficialwg(draft);
if (wg == "") wg = getrelatedwg(draft);
if (wg == "") return(0);
if (iswg[wg] == "") return(0);
if (wgarea[wg] != area) return(0);
return(1);
}
function reportauthoractivitiesperarea() {
base = "authactareadistr";
htmlfile = base ".html";
openhtml(htmlfile,"Most active authors per area");
for (area in isarea) {
html_file_css_section(area " Area", htmlfile);
printf("
\n") >> htmlfile;
maxdrafts = 0;
delete numbersofdrafts;
delete numbersofdraftsauthor;
for (i = 0; i < 500; i++) {
numbersofdrafts[n] = 0;
numbersofdraftsauthor[n] = "";
}
for (author in authordrafts) {
split(authordrafts[author],acomps,"%");
n = 0;
delete acomps2;
for (drafti in acomps) {
if (isinarea(acomps[drafti],area)) {
acomps2[++n] = acomps[drafti];
}
}
if (n > 0) {
numbersofdrafts[n] = numbersofdrafts[n] + 1;
if (numbersofdraftsauthor[n] == "") {
numbersofdraftsauthor[n] = author;
} else {
numbersofdraftsauthor[n] = numbersofdraftsauthor[n] "%" author;
}
if (n > maxdrafts) maxdrafts = n;
}
}
printedauthors = 0;
for (i = maxdrafts; i >= 0 && printedauthors < 20; i--) {
if (numbersofdrafts[i] > 0) {
split(numbersofdraftsauthor[i],guiltyauthors,"%");
for (h in guiltyauthors) {
printf("
\n") >> htmlfile;
}
closehtml(htmlfile);
}
function reportauthoractivities(iswg) {
if (iswg) {
base = "authactdistr-wg";
} else {
base = "authactdistr";
}
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of authors according to how many documents they have");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
Total number of distinct authors is %d, the different %ss together had a total of %d author entries.
\n") >> htmlfile;
}
printf("%d %.2f\n", i, (100.0 * numbersofauthors[i]) / ndrafts) >> datafile;
}
basicgnuplotsettings(gplfile,epsfile,datafile,"# of Authors", "% of " docname "s","Number of authors in " docname "s",
0,"set xtics 1, 1\n");
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
closehtml(htmlfile);
}
function reportpubyears() {
base = "pubdistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
if (doctype == "draft") {
openhtml(htmlfile,"Publication rate per month");
} else if (doctype == "rfc") {
openhtml(htmlfile,"Publication rate per year");
html_file_css_section("Yearly Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
\n") >> htmlfile;
printf("\n") > datafile;
maxn = 0;
for (i = startyear; i <= endyear; i++) {
n = 0;
for (draft in draftyears) if (draftyears[draft] == i) n++;
if (n > maxn) maxn = n;
if (n > 0) {
printf("
%d %ss in %d.", n, docname, i) >> htmlfile;
printf("
\n") >> htmlfile;
}
printf("%d %d\n", i, n) >> datafile;
}
extra = "set xrange [" startyear-1 " : " endyear+1 "]\n";
extra = extra "set yrange [ 0 : " roundup(maxn) "]\n";
extra = extra "set xtics ";
if (endyear - startyear > 5) {
extra = extra "rotate ";
}
extra = extra startyear ", 1, " endyear;
if (endyear - startyear > 20) {
extra = extra " font " quote "Times-Roman,20" quote "\n";
} else if (endyear - startyear > 5) {
extra = extra " font " quote "Times-Roman,30" quote "\n";
} else {
extra = extra "\n";
}
basicgnuplotsettings(gplfile,epsfile,datafile,"Year", "# of " docnameu "s","Publication rate per year",
0,extra);
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
printf(" \n") >> htmlfile;
close(epsfile);close(jpgfile);close(datafile);close(gplfile);
}
epsfile = base "2.eps";
jpgfile = base "2.jpg";
datafile = base "2.dat";
gplfile = base "2.txt";
printf("\n") > datafile;
maxn = 0;
for (i = startyear; i <= endyear; i++) {
for (j = 1; j <= 12; j++) {
n = 0;
for (draft in draftyears) if (draftyears[draft] == i && draftmonths[draft] == monthnameshort[j]) n++;
if (n > maxn) maxn = n;
printf("%4.2f %d\n", i - 0.5 + (j-1)/12.0, n) >> datafile;
}
}
html_file_css_section("Monthly Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
extra = "set xrange [" startyear-0.5 " : " endyear+0.5 "]\n";
extra = extra "set yrange [ 0 : " roundup(maxn) "]\n";
extra = extra "set xtics ";
if (endyear - startyear > 5) {
extra = extra "rotate ";
}
extra = extra startyear ", 1, " endyear;
if (endyear - startyear > 20) {
extra = extra " font " quote "Times-Roman,20" quote "\n";
} else if (endyear - startyear > 5) {
extra = extra " font " quote "Times-Roman,30" quote "\n";
} else {
extra = extra "\n";
}
extra = extra "set mxtics 0.08\n";
extra = extra "set boxwidth 0.07\n";
basicgnuplotsettings(gplfile,epsfile,datafile,"Year and Month", "# of " docnameu "s","Publication rate per month",
0,extra);
plottojpg(gplfile,epsfile,jpgfile);
closehtml(htmlfile);
}
function roundup(n) {
if (n < 50) {
return( 10 * (n/10) + 10);
} else {
return( 100 * (n/100) + 100);
}
}
function reportformats() {
base = "formatdistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Document formats and features in " docname "s");
html_file_css_section("Formats", htmlfile);
printf("
The document feature recognition is based on heuristics and is inherently unreliable.
\n") >> htmlfile;
closehtml(htmlfile);
}
function reportpages() {
base = "pagedistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of the number of pages in " docname "s");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
html_file_css_section("Details",htmlfile);
printf("
\n") >> htmlfile;
maxpages = 0;
for (draft in draftpagecounts) {
if (draftpagecounts[draft] > maxpages) maxpages = draftpagecounts[draft];
}
printf("\n") > datafile;
for (i = maxpages; i >= 0; i--) {
if (pagecounts[i] > 0) {
printf("
\n") >> htmlfile;
printf("\n") > datafile;
nth = 1;
maxn = 0;
extra = "set xtics rotate (";
for (area in isarea) {
thiscount = 0;
for (wg in iswg) {
if (area == wgarea[wg]) {
thiscount++;
}
}
if (thiscount > maxn) maxn = thiscount;
printf("
%s has %d (%.2f%%) working groups.
\n",
area, thiscount, (100.0 * thiscount) / nwgs) >> htmlfile;
printf("%d %d\n", nth, thiscount) >> datafile;
areat = splittolines(areacapitalize(area));
if (nth > 1) extra = extra ", ";
extra = extra quote areat quote " " nth;
nth++;
}
extra = extra ")\n";
extra = extra "set xrange [-0.5 : " nth+1 "]\n";
basicgnuplotsettings(gplfile,epsfile,datafile,"Area","# of WGs","Number of WGs in an area",0,extra);
plottojpg(gplfile,epsfile,jpgfile);
printf("
\n") >> htmlfile;
closehtml(htmlfile);
}
function splittolines(inputstring) {
sub(/ /,"\\n",inputstring);
sub(/ /,"\\n",inputstring);
return(inputstring);
}
function reportwgs() {
base = "wgdistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,"Distribution of " docname "s according to WGs");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
maxcount = 0;
for (draft in isdraft) {
wg = getofficialwg(isdraft[draft]);
if (wg != "") {
if (wg in wgcounts) {
wgcounts[wg] = wgcounts[wg] + 1;
} else {
wgcounts[wg] = 1;
}
if (wgcounts[wg] > maxcount) maxcount = wgcounts[wg];
} else {
wg = getrelatedwg(isdraft[draft]);
if (wg != "") {
if (wg in wgcountsother) {
wgcountsother[wg] = wgcountsother[wg] + 1;
} else {
wgcountsother[wg] = 1;
}
}
}
}
for (i = maxcount; i >= 0; i--) {
nwgswithdrafts[i] = 0;
for (wg in wgcounts) {
if (wgcounts[wg] == i) {
nwgswithdrafts[i] = nwgswithdrafts[i] + 1;
}
}
}
printf("\n") > datafile;
html_file_css_section("Distribution of " docname " amounts", htmlfile);
printf("
\n") >> htmlfile;
for (i = maxcount; i >= 0; i--) {
if (nwgswithdrafts[i] > 0) {
printf("
%d WGs have %d %ss.
\n", nwgswithdrafts[i], i, docname) >> htmlfile;
}
printf("%d %d\n", i, nwgswithdrafts[i]) >> datafile;
}
printf("
\n") >> htmlfile;
html_file_css_section("List of WGs", htmlfile);
printf("
\n") >> htmlfile;
for (i = maxcount; i >= 0; i--) {
for (wg in wgcounts) {
if (wgcounts[wg] == i) {
printf("
%s has %d official %ss (%.2f%% of all) and %d related drafts (together %.2f%% of all).
\n") >> htmlfile;
basicgnuplotsettings(gplfile,epsfile,datafile,"# of " docnameu "s","# of WGs","Number of " docname "s in a WG",1,"");
plottojpg(gplfile,epsfile,jpgfile);
printf("
Definition of a related %s is that it matches the pattern draft-something-wg, where something is not ietf, and wg is the name of a working group.
\n", docname) >> htmlfile;
closehtml(htmlfile);
}
function reportareas() {
base = "areadistr";
htmlfile = base ".html";
epsfile = base ".eps";
jpgfile = base ".jpg";
datafile = base ".dat";
gplfile = base".txt";
openhtml(htmlfile,docnameu " area distribution");
html_file_css_section("Statistics",htmlfile);
printf("\n", 34, jpgfile, 34, 34, 34, 34, 34) >> htmlfile;
maxcount = 0;
for (draft in isdraft) {
wg = getofficialwg(isdraft[draft]);
if (wg != "") {
area = wgarea[wg];
if (area in areacounts) {
areacounts[area] = areacounts[area] + 1;
} else {
areacounts[area] = 1;
}
if (areacounts[area] > maxcount) maxcount = areacounts[area];
} else {
wg = getrelatedwg(isdraft[draft]);
if (wg != "") {
area = wgarea[wg];
if (area in areacountsother) {
areacountsother[area] = areacountsother[area] + 1;
} else {
areacountsother[area] = 1;
}
}
}
}
printf("\n") > datafile;
html_file_css_section("List of areas", htmlfile);
printf("
\n") >> htmlfile;
nth = 1;
extra = "set xtics rotate (";
areacountsother[""] = 0;
for (i = maxcount; i >= 0; i--) {
for (area in areacounts) {
if (areacounts[area] == i) {
if (area == "") areaname = "Unknown"; else areaname = area;
printf("
%s has %d %ss (%.2f%% of all) and %d related drafts (together %.2f%% of all).