#!/bin/bash

###
### Parse arguments
###

DOSCP=0
if [ x$1 = x--scp ]
then
  DOSCP=1
  shift
fi

###
### Cleanup old stuff first
###

echo Remove old results...

cd /tmp
rm -f idres.txt
rm -f timeres.txt
rm -rf ietfdb

###
### Function definitions
###

function removecomments() {
  awk '
/^#/ {
  next;
}

/^[ 	]*$/ {
  next;
}

/.*/ {
  printf("%s\n", $0);
}'

}

###
### WGET
###

echo Fetch files...
rm -f rfc_id.txt
wget -q http://tools.ietf.org/id/rfc_id.txt
rm -f draft-timing.txt
wget -q http://www.arkko.com/tools/admeasurements/draft-timing.txt
rm -f adwork.txt
wget -q http://www.arkko.com/tools/admeasurements/adwork.txt
rm -f ads.txt
wget -q http://www.arkko.com/tools/admeasurements/ads.txt
rm -f rfcauthors.txt
wget -q http://www.arkko.com/tools/rfcauthors.txt
rm -f idauthors.txt
wget -q http://www.arkko.com/tools/idauthors.txt
rm -f all_id.txt
wget -q http://www.ietf.org/internet-drafts/all_id.txt
rm -f all_id2.txt
wget -q http://www.ietf.org/id/all_id2.txt
rm -f queue.xml
wget -q http://www.rfc-editor.org/queue.xml
rm -f discusses.html
wget -q -O discusses.html https://datatracker.ietf.org/iesg/discusses/

###
### Header
###

(echo '# Draft author entries, combined with process timing information'
 echo '# '
 echo '# Fields are separated by : and are the following:'
 echo '#   first 9 fields: as in idauthors.txt (note that e-mail field is missing!)'
 echo '#   next 2 fields: tracker state and substate
 echo '#   next 1 field: RFC editor state
 echo '#'
 echo '') > idauthors_withstates.txt

(echo '# RFC author entries, combined with process timing information'
 echo '# '
 echo '# Fields are separated by : and are the following:'
 echo '#   first 9 fields: as in rfcauthors.txt (note that e-mail field is missing!)'
 echo '#   next 1 field: name of draft that the RFC came from'
 echo '#   next 15 fields: as in draft-timing.txt (except first two fields, which are omitted)'
 echo '#'
 echo '') > rfcauthors_withids.txt

###
### Parse Discusses
###

echo 'Parse Discusses...'
awk '

BEGIN {
  indoc = "";
  FS="/";
}

/^<a href=".doc.draft-[-a-zA-Z0-9_]+.">draft-[-a-zA-Z0-9_]+<.a>$/ {

  indoc = $3;

}

/^[A-Za-z]+ [A-Za-z]+ \([0-9]+ days ago\)<br.>$/ {
  if (indoc != "") {
    ad = substr($0,1,index($0,"(")-2);
    printf("%s:%s\n", indoc, ad);
  }
}

/^[A-Za-z]+ [A-Za-z]+ \([0-9]+ days ago for -[0-9]+\)<br.>$/ {
  if (indoc != "") {
    ad = substr($0,1,index($0,"(")-2);
    printf("%s:%s\n", indoc, ad);
  }
}

' < discusses.html > discusses.txt

###
### Parse RFC Editor states
###

echo 'Parse RFC Editor states...'
awk '
BEGIN {
  FS = "[<>]";
  draftname = "";
}
/<draft>draft-.*<.draft>/ {
  draftname = $3;
  sub(/-[0-9][0-9][.][a-z][a-z][a-z]/,"",draftname);
  next;
}
/<state>.*<.state>/ {
  if (draftname != "") {
    state = $3;
    printf("%s:%s\n", draftname, state);
    draftname = "";
  }
  next;
}
' < queue.xml > rfceditorstates.txt

###
### Join idauthors.txt and draft state information
###

echo Join drafts with state information...

removecomments < idauthors.txt |
awk '
BEGIN {
  FS = "[:]";
}

/^draft-/ {
  id = $1;
  sub(/-[0-9][0-9].[a-z][a-z][a-z]$/,"",id);

  cmd = sprintf("grep %c^%s-[0-9][0-9]\t%c /tmp/all_id.txt | cut -f3 %c-d\t%c | cut -f2 -d%c<%c > /tmp/stateres.txt",
                34, id, 34, 34, 34, 34, 34);
  system(cmd);
  getline state < "/tmp/stateres.txt";
  sub(/>/,"",state);
  sub(/::/,":",state);
  if (state ~ /eplaced by/) state = "Replaced";
  if (index(state,":") < 1) state = state ":";
  close("/tmp/stateres.txt");

  rfcedstate = "";
  cmd = sprintf("grep %c^%s:%c rfceditorstates.txt | cut -f2 -d: > /tmp/rfcstateres.txt", 34, id, 34);
  system(cmd);
  getline rfcedstate < "/tmp/rfcstateres.txt";
  close("/tmp/rfcstateres.txt");

  printf("%s:%s:%s\n",$0,state,rfcedstate);
}
' >> idauthors_withstates.txt

###
### Join rfcauthors.txt and draft-timing.txt
###

echo Join RFCs with draft information...

removecomments < rfcauthors.txt |
awk '
BEGIN {
  FS = "[:.]";
}

/^rfc/ {
  rfcno = $1;

  cmd = sprintf("fgrep %c%s\t%c /tmp/rfc_id.txt | cut -f2 %c-d\t%c > /tmp/idres.txt",
                34, rfcno, 34, 34, 34);
  system(cmd);
  getline id < "/tmp/idres.txt";
  sub(/-[0-9][0-9]$/,"",id);
  close("/tmp/idres.txt");

  cmd = sprintf("fgrep %ctiming:%s:%c /tmp/draft-timing.txt | cut -f3- %c-d:%c > /tmp/timeres.txt",
                34, id, 34, 34, 34);
  system(cmd);
  getline timing < "/tmp/timeres.txt";
  close("/tmp/timeres.txt");

  #sub(/^timing:[^:]+:/,"",timing);
  printf("%s:%s:%s\n",$0,id,timing);
}
' >> rfcauthors_withids.txt

###
### Ietfdb
###

echo IETF db creation...
mkdir ietfdb ietfdb/authors ietfdb/drafts ietfdb/rfcs

removecomments < rfcauthors_withids.txt |
awk '
BEGIN { FS=":" }
function cleanup(s) {
  gsub(/[^a-zA-Z0-9]/,"",s);
  return(s);
}
/^rfc/ {
  rfc = $1;
  sub(/[.][a-z][a-z][a-z]$/,"",rfc);
  author = $2;
  authordir = "ietfdb/authors/" cleanup(tolower(author));
  system("mkdir -p " authordir);
  authorfile = authordir "/rfc.txt";
  printf("%s\n", $0) >> authorfile;
  close(authorfile);
  rfcfile = "ietfdb/rfcs/" rfc ".txt";
  printf("%s\n", $0) >> rfcfile;
  close(rfcfile);
}
'

removecomments < idauthors_withstates.txt |
awk '
BEGIN { FS=":" }
function cleanup(s) {
  gsub(/[^a-zA-Z0-9]/,"",s);
  return(s);
}
/^draft/ {
  draft = $1;
  sub(/-[0-9][0-9][.][a-z][a-z][a-z]/,"",draft);
  author = $2;
  authordir = "ietfdb/authors/" cleanup(tolower(author));
  system("mkdir -p " authordir);
  authorfile = authordir "/id.txt";
  printf("%s\n", $0) >> authorfile;
  close(authorfile);
  system("mkdir -p ietfdb/drafts/" draft);
  draftfile = "ietfdb/drafts/" draft "/authors.txt";
  printf("%s\n", $0) >> draftfile;
  close(draftfile);
}
'

for ad in `cut -f3 -d: ads.txt`
do
  adfirst=`echo $ad | cut -f2 -d_`
  adlast=`echo $ad | cut -f1 -d_`
  adfirstl=`echo $adfirst | tr ABCDEFGHIJKLMNOPQRSTUWVXYZ abcdefghijklmnopqrstuwvxyz`
  adlastl=`echo $adlast | tr ABCDEFGHIJKLMNOPQRSTUWVXYZ abcdefghijklmnopqrstuwvxyz`
  addir=ietfdb/authors/$adfirstl$adlastl
  mkdir -p $addir
  adfile=$addir/ad.txt
  addiscussfile=$addir/discusses.txt
  idaddir=ietfdb/drafts/
  for doc in `fgrep ":$ad:" adwork.txt | cut -f6 -d:`
  do
    grep "^$doc-[0-9][0-9].txt:" idauthors_withstates.txt >> $adfile
    mkdir -p $idaddir/$doc
    idadfile=$idaddir/$doc/ad.txt
    echo "$adfirst $adlast" > $idadfile
  done
  grep ":$adfirst $adlast$" discusses.txt | cut -f1 -d: > $addiscussfile
  for discusseddraft in `cat $addiscussfile`
  do
    mkdir -p $idaddir/$discusseddraft
    echo "$adfirst $adlast" >> $idaddir/$discusseddraft/discusses.txt
  done
done

###
### SCP
###

echo Copying...
if [ $DOSCP = 1 ]
then
  scp -q -rp idauthors_withstates.txt rfcauthors_withids.txt ietfdb jarkko@users.piuha.net:public_html/tools
else
  cp -rp idauthors_withstates.txt rfcauthors_withids.txt ietfdb /home/jarkko/public_html/tools
fi

