#
# usenixDB.pm
# Dan Wallach <dwallach@cs.rice.edu>
#
# This file contains all the shared mechanics used by everything else.
# This does the full-blown Perl module thing, so if you want to add
# a new symbol here, you have to hack both the body of the module and
# the awful manifest at the top.  It's the Perl Way (tm).
#
# Note that I don't explicitly export any symbols into the user's namespace.
# Instead, I force every user of this package to type usenixDB::symbol for
# every symbol they use.  It's a little more verbose but it helps catch bugs.
#
package usenixDB;

use strict;

BEGIN {
    use Exporter   ();
    use vars       qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
    
    # if using RCS/CVS, this may be preferred
    $VERSION = do { my @r = (q$Revision: 1.11 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # must be all one line, for MakeMaker
    
    @ISA         = qw(Exporter);
    @EXPORT      = ( );
    %EXPORT_TAGS = ( );
    @EXPORT_OK   = ( );
}
use vars      @EXPORT_OK;

# non-exported package globals go here
use vars      qw($sendmailStr $rootDir $reviewDir $papersDir $webroot $assignmentsFile $conflictsFile $paperStatusFile $committeeFile %idToReviewers %reviewDB %reviewerStats %reviewerToID %numReviews %numReviewsComplete @ids %reviewDB %db @reviewFields %conflicts %committee @sessions $totalAccepted);

# initialize package globals, first exported ones
$totalAccepted = 0;
$rootDir = ".";
$reviewDir = "reviews";
$papersDir = "papers";
$assignmentsFile = "assignments.txt";
$conflictsFile = "conflicts.txt";
$paperStatusFile = "paper-status.txt";
$committeeFile = "committee.txt";

# where make-review-forms and make-web write their output
$webroot = "/home/dwallach/public_html/UsenixSec2001";

# run this to send an RFC822 e-mail
$sendmailStr = "|/usr/lib/sendmail -t";
	    
# global: maps id -> list of reviewers (assigned)
%idToReviewers = ();

# global: part of the big reviewer database
#   maps id, key, reviewer -> value
#     where key is one of the strings from %reviewFields
#
#   for derived keys ("Average", "Stddev"), reviewDB also
#   maps id, derived_key, key -> value
%reviewDB = ();

# global: part of the big reviewer database
#   maps reviewer, derived_key, key -> value
#     where key is one of the strings from %reviewFields
#     and derived_key is "Average", "Stddev", etc.
%reviewerStats = ();

# global: maps reviewer -> list of IDs (assigned)
%reviewerToID = ();

# global: maps reviewer -> number of reviews (assigned)
# (also keys %numReviews gives a useful list)
%numReviews = ();

# global: maps reviewer -> number of reviews (completed)
# (keys %numReviewsComplete gives more people than keys %numReviews
#  because it will catch unsolicited reviews)
%numReviewsComplete = ();

# global: list of paper IDs (in the form "L%d")
@ids = ();

# global: maps {id}{key} -> value
%db = ();

# global: a generally useful list, used occasionally for
# indexing into the reviewDB
@reviewFields = ("Relevance", "Presentation", "Quality", "Overall", "Self");

# global: maps reviewer, id -> boolean
#    (whether reviewer has a conflict with the paper)
%conflicts = ();

# global: maps handle, field -> string
#    (valid fields are "Name", "Affiliation", and "E-Mail")
%committee = ();

# global: list of sessions (each session is a hashtable
#    valid fields are "Session", "Papers", "IT" and "Chair")
#    and the chair should be a handle from the committee list
@sessions = ();

END { }

sub comparePaperID {
    my $aFix = $a;
    my $bFix = $b;
    $aFix =~ s/^L//;
    $bFix =~ s/^L//;
    return $aFix <=> $bFix;
}

#
# TODO: replace 'chomp' with something that will deal with \r\n as well
# as \n, since some of those creepy DOS files always find their way in.
#
my $hasReadReviews = 0;
sub readReviews {
    return if $hasReadReviews;
    $hasReadReviews = 1;
	
    # need this stuff in the database first
    &usenixDB::readAssignments;
    &usenixDB::readPaperInfo;
    
    opendir(REVIEWS, "$rootDir/$reviewDir") || die "can't read reviews directory: $!\n";
    my @reviewFiles = readdir(REVIEWS);
    closedir(REVIEWS);

    #
    # first, load in all the files and flesh out the basic database
    # then, afterward, compute some statistics
    #
    printf STDERR "Reading reviews";
    foreach my $reviewFile (@reviewFiles) {
	next if $reviewFile =~ /^\./ || $reviewFile =~ /~$/;
	printf STDERR ".";

	if($reviewFile =~ /^(\d+)-(\w+).txt/) {
	    my $paperNumber = $1;
	    my $reviewer = $2;

	    open(P, "$rootDir/$reviewDir/$reviewFile") ||
		die "Cannot read review $reviewFile: $!\n";

	    # read the header
	    my $title = "";
	    my $author = "";
	    my $nonPC = 0;
	    my $verboseReviewer = "";
	    my $paperNumber2 = "";
	    my $relevance = "";
	    my $presentation = "";
	    my $quality = "";
	    my $overall = "";
	    my $selfRating = "";
	    my @publicText = ();
	    my @privateText = ();

	    # note the ugly logic to deal with line breaks -- some
	    # people's mailers do this automatically.
	    my $snoop = "";
	    for(;;) {
		$_ = ($snoop)?$snoop :<P>;
		last unless $_;
		chomp;
		$snoop = "";
		
		if (/^(\d+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)/) {
		    $paperNumber2 = $1;
		    $relevance = $2;
		    $presentation = $3;
		    $quality = $4;
		    $overall = $5;
		    $selfRating = $6;
		    last;
		}
		if(/^\#/) {
		    $snoop = <P>;
		    # if the next line starts with something besides
		    # a hash or a digit, then odds are it's a continuation
		    # of the current line, so join them with whitespace
		    # and go back around again
		    if($snoop =~ /^[^\#\d]/) {
			$snoop = $_ . " " . $snoop;
			next;
		    }
		}
		if(/^\# Title: TI\s+(.*)$/) {
		    $title = $1;
		} elsif (/^\# Author: AU\s+(.*)$/) {
		    $author = $1;
		} elsif (/^\# Reviewer:\s+(.*)$/) {
		    $verboseReviewer = $1;
		} elsif (/^\# IF YOU..THE REVIEWER. ARE A/) {
		    $nonPC = 1;
		} elsif (/^\#/) {
		    next;
		}
	    }
	    
	    # the body has two parts, one public and one private,
	    # separated by "----"
	    my $privateDashesSeen = 0;
	    while(<P>) {
		if(/^\s*----/) {
		    $privateDashesSeen = 1;
		    last;
		}
		chomp;
		push(@publicText, $_) unless 
		    /<Public comments, each line indented/;
	    }
	    if($privateDashesSeen) {
		while(<P>) {
		    chomp;
		    push(@privateText, $_) unless
			/<Private comments, each line indented/;
		}
	    }
	    close P;

	    # sanity checking
	    if($paperNumber != $paperNumber2) {
		warn "Bogosity with $reviewFile: weird paper numbers ($paperNumber,$paperNumber2)\n";
	    }

	    # convert from old format to new format
	    my $id = sprintf("L%d", $paperNumber);
	    if($reviewDB{$id}{$reviewer}{"Submitted"}) {
		warn "Review by $reviewer for paper $id seen more than once!\n";
	    } else {
		$numReviewsComplete{$reviewer}++;
	    }

	    $reviewDB{$id}{"Title"} = $title;
	    $reviewDB{$id}{"Author"} = $author;
	    $reviewDB{$id}{"Public"}{$reviewer} = \@publicText;
	    $reviewDB{$id}{"Private"}{$reviewer} = \@privateText;
	    $reviewDB{$id}{"VerboseReviewer"}{$reviewer} = $verboseReviewer;
	    $reviewDB{$id}{"Submitted"}{$reviewer} = 1;
	    $reviewDB{$id}{"Relevance"}{$reviewer} = $relevance;
	    $reviewDB{$id}{"Presentation"}{$reviewer} = $presentation;
	    $reviewDB{$id}{"Quality"}{$reviewer} = $quality;
	    $reviewDB{$id}{"Overall"}{$reviewer} = $overall;
	    $reviewDB{$id}{"Self"}{$reviewer} = $selfRating;
	    $reviewDB{$id}{"NonPC"}{$reviewer} = $nonPC;
	}
    }
    printf STDERR "\n";

    my %rsum = ();  # used to track per-reviewer stats
    my %rnum = ();
    foreach my $id (@ids) {
	# first, check that all the assigned reviews were submitted
	# and that reviewer names are synced appropriately
	foreach my $r (keys %numReviews) {
	    $reviewDB{$id}{"Incomplete"}{$r} = 1
		if $reviewDB{$id}{"Assigned"}{$r} &&
		    !$reviewDB{$id}{"Submitted"}{$r};
	}

	my $num = 0;  # used to track per-id stats
	my %sum = ();
	
	# this loop does all the addition to compute averages
	foreach my $r (keys %{$reviewDB{$id}{"Submitted"}}) {
	    foreach my $key (@reviewFields) {
		$sum{$key} += $reviewDB{$id}{$key}{$r};
		$rsum{$key}{$r} += $reviewDB{$id}{$key}{$r};
		$rnum{$key}{$r} ++;
	    }
	    $num++;
	    $reviewDB{$id}{"Unexpected"}{$r} = 1
		if !$reviewDB{$id}{"Assigned"}{$r} &&
		    $reviewDB{$id}{"Submitted"}{$r};
	}
	
	# we can compute the per-ID averages (but not the per-reviewer ones)
	foreach my $key (@reviewFields) {
	    $reviewDB{$id}{"Average"}{$key} = $sum{$key} / $num if $num;
	}
	$reviewDB{$id}{"NumReviews"} = $num;
	
	# now, compute the per-ID stddev
	if($num) {
	    %sum = ();
	    foreach my $r (keys %{$reviewDB{$id}{"Submitted"}}) {
		foreach my $key (@reviewFields) {
		    my $tmp =
			$reviewDB{$id}{"Average"}{$key} -
			    $reviewDB{$id}{$key}{$r};
		    $sum{$key} += $tmp * $tmp;
		}
	    }
	    foreach my $key (@reviewFields) {
		$reviewDB{$id}{"Variance"}{$key} = $sum{$key} / $num;
		$reviewDB{$id}{"Stddev"}{$key} = sqrt($sum{$key} / $num);
	    }
	}
    }
    
    # okay, now that every review has been considered, we can compute
    # the per-reviewer average
    foreach my $r (keys %numReviewsComplete) {
	foreach my $key (@reviewFields) {
	    $reviewerStats{$r}{"Average"}{$key} = $rsum{$key}{$r} / $rnum{$key}{$r};
	}
    }
    
    # and, now we have do go back over it all again to compute the per-reviewer
    # stddev
    %rsum = ();
    foreach my $id (@ids) {
	foreach my $r (keys %{$reviewDB{$id}{"Submitted"}}) {
	    foreach my $key (@reviewFields) {
		my $tmp = $reviewerStats{$r}{"Average"}{$key} -
		    $reviewDB{$id}{$key}{$r};
		$rsum{$key}{$r} += $tmp * $tmp;
	    }
	}
    }
    foreach my $r (keys %numReviewsComplete) {
	foreach my $key (@reviewFields) {
	    $reviewerStats{$r}{"Variance"}{$key} = $rsum{$key}{$r} / $rnum{$key}{$r};
	    $reviewerStats{$r}{"Stddev"}{$key} = sqrt($rsum{$key}{$r} / $rnum{$key}{$r});
	}
    }
}

my $hasReadPaperInfo = 0;
sub readPaperInfo {
    return if $hasReadPaperInfo;
    $hasReadPaperInfo = 1;
    
    opendir(PAPERS, "$rootDir/$papersDir") || die "can't read papers directory: $!\n";
    my @papers = readdir(PAPERS);
    closedir(PAPERS);

    @ids = ();  # global: list of paper IDs
    %db = {};   # global: maps {id}{key} -> value

    printf STDERR "Reading paper info";
    
    foreach my $paper (sort comparePaperID @papers) {
	next if $paper =~ /^\./;
	printf STDERR ".";

	open(P, "$rootDir/$papersDir/$paper/info.txt") ||
	    die "Can't read paper $paper/info.txt: $!\n";

	$_ = <P>;
	chomp;
	s/^ID: //;
	my $id = $_;
	push(@ids, $id);

	my $key = "";
	my $val = "";
	my $city = "";
	my $abstract = "";

	while(<P>) {
	    chomp;
	    if(/([^:]*): (.*$)/) {
		$key = $1;
		$val = $2;
	    } else {
		printf STDERR "($id) funny line: $_\n";
	    }
	    # printf STDERR "Key, Val: %s, %s\n", $key, $val;

	    if($key eq "Mailstop") {
		$city = $val =~ /City: (.*$)/;
		$val =~ s/City:.*$//;
		$db{$id}{"City"} = $city;
	    }

	    # printf STDERR "Setting db{$id}{$key} to $val\n";
	    $db{$id}{$key} = $val;
	}

	close(P);

	if (open(P, "$rootDir/$papersDir/$paper/txtabstract.txt")) {
	    my $abstract = "";
	    while(<P>) {
		chomp;
		$abstract .= " " . $_;
	    }
	    close(P);
	    $db{$id}{"Abstract"} = $abstract;
	} else {
	    warn "Can't read paper $paper/txtabstract.txt: $!\n";
	}
    }
	    printf STDERR "\n";
}

my $hasReadAssignments = 0;
sub readAssignments {
    return if $hasReadAssignments;
    $hasReadAssignments = 1;
    
    readCommittee();
    
    open(F, "$rootDir/$assignmentsFile") || die "no assignments.txt?";

    while(<F>) {
	chomp;
	if(/^\s*(L[0-9]+)=(.*)/) {
	    # TODO: add code to make sure ID is valid
	    my $id = $1;
	    my $reviewerString = $2;
	    my @reviewers = split(/,/,$reviewerString);

	    # global: maps id -> list of reviewers
	    $idToReviewers{$id} = \@reviewers;

	    foreach my $reviewer (@reviewers) {
		# global: part of the big reviewer database
		#   maps id, key, reviewer -> value
		$reviewDB{$id}{"Assigned"}{$reviewer} = 1;
		warn "Review $id assigned to $reviewer, not on committee"
		    unless $committee{$reviewer};

		# global: maps reviewer -> list of IDs
		push @{ $reviewerToID{$reviewer} }, $id;

		# global: maps reviewer -> number of assignments
		# (also keys %numReviews gives a useful list)
		$numReviews{$reviewer}++;
	    }
	}
    }

    close F;
}

my $hasReadConflicts = 0;
sub readConflicts {
    return if $hasReadConflicts;
    $hasReadConflicts = 1;
    
    readCommittee();
    
    open(F, "$rootDir/$conflictsFile") || die "no $conflictsFile: $!\n";

    while(<F>) {
	chomp;
	my ($r, @confList) = split /[:\s]+/;
	foreach my $id (@confList) {
	    # TODO: add code to make sure ID is valid
	    if($committee{$r}) {
		$conflicts{$r}{$id} = 1;
	    } else {
		warn "$conflictsFile mentions $r, not on committee";
	    }
	}
    }
}

my $hasReadPaperStatus = 0;
sub readPaperStatus {
    return if $hasReadPaperStatus;
    $hasReadPaperStatus = 1;
    
    open(F, "$rootDir/$paperStatusFile") || die "no $paperStatusFile: $!\n";

    my $sessionID = 0;
    my %s = ();
    
    my %validFields = (Session => 1, Papers => 1,
		       Chair => 1, IT => 1, Shepherd => 1);
    
    my $loopDone = 0;    # note: weird logic to continue loop one time
    my $eofSeen = 0;     # past when we reach EOF
    while(! ($loopDone && $eofSeen))  {
	$_ = <F> unless $eofSeen;
	$eofSeen = 1 if !$_;
	
	chomp;
	if(/^([^:]*): (.*)$/) {
	    if($validFields{$1}) {
		$loopDone = 0;
		$s{$1}=$2;
	    } else {
		warn "Unknown field type $1";
	    }
	} elsif (/^$/ || $eofSeen) {
	    $loopDone = 1;
	    if($s{Session}) {
		my @papers = split(/ /, $s{Papers});
		foreach (@papers) {
		    $db{$_}{Accepted} = 1;
		}
		
		$sessions[$sessionID++] = {%s};
	    } elsif($s{Shepherd}) {
		my @papers = split(/ /, $s{Papers});
		foreach (@papers) {
		    $db{$_}{Shepherd} = $s{Shepherd};
		}
	    } else {
		warn "Ignoring data from $paperStatusFile: unknown type";
	    }
	    %s = ();
	}
    }
    close F;
    
    foreach my $i (@ids) {
	$totalAccepted++ if $db{$_}{Accepted};
    }
}

my $hasReadCommittee = 0;
sub readCommittee {
    return if $hasReadCommittee;
    $hasReadCommittee = 1;
    
    my %validFields = (Name => 1, Affiliation => 1,
		       "E-Mail" => 1, Handle => 1);
    
    open(F, "$rootDir/$committeeFile") || die "no $committeeFile: $!\n";

    my %s = ();
    
    while(<F>) {
	chomp;
	if(/^([^:]*): (.*)$/) {
	    if($validFields{$1}) {
		$s{$1}=$2;
	    } else {
		warn "Unknown field type $1";
	    }
	} elsif (/^$/) {
	    if($s{Handle}) {
		$committee{$s{Handle}} = {%s};
	    } else {
		warn "Ignoring data from $committeeFile: unknown type";
	    }
	    %s = ();
	} else {
	    warn "Ignoring data from $committeeFile: weird line";
	}
    }
    if($s{Handle}) {
	$committee{$s{Handle}} = {%s};
    }
    close F;
}


sub printSummary {
    my ($IO, $r_conflict) = @_;
    
    printf $IO "Paper summaries for $r_conflict (conflicting papers: %s)\n\n",
      join " ", sort comparePaperID keys %{$conflicts{$r_conflict}}
      if $r_conflict;
    printf $IO "--------------------------------------------------\n";
    printf $IO "Reviewer Summary (sorted by average overall score)\n";
    printf $IO "--------------------------------------------------\n";

    printf $IO "Reviewer   %s NRvws\n",
      join("", map {sprintf("%-12s", $_);} @reviewFields);
    
    foreach my $r (sort {$reviewerStats{$b}{Average}{Overall} <=> $reviewerStats{$a}{Average}{Overall}} keys %numReviewsComplete) {
	printf $IO "%-10s %s %2d/%2d\n", $r,
	  join("",
	     map {sprintf("%3.1f %4.2f   ",
			  $reviewerStats{$r}{Average}{$_},
			  $reviewerStats{$r}{Stddev}{$_});}
	     @reviewFields),
	  $numReviewsComplete{$r},
	  $numReviews{$r};
    }

    printf $IO "\n-------------\n";
    printf $IO "Paper Summary\n";
    printf $IO "-------------\n";
    printf $IO "ID   %sNRvw\n",
    join("", map {sprintf("%-12s", $_);} @reviewFields);
    foreach my $id (sort
		 {my $avgA = $reviewDB{$a}{Average}{Overall};
		  my $avgB = $reviewDB{$b}{Average}{Overall};
		  my $stddevA = $reviewDB{$a}{Stddev}{Overall};
		  my $stddevB = $reviewDB{$b}{Stddev}{Overall};
		  my $qualityA = $reviewDB{$a}{Average}{Quality};
		  my $qualityB = $reviewDB{$b}{Average}{Quality};
		  ($avgA == $avgB)?
		      (($stddevA == $stddevB)?
		       $qualityB <=> $qualityA:
		       $stddevA <=> $stddevB):
			   $avgB<=>$avgA; }
		 @ids) {
	
	if($r_conflict && $conflicts{$r_conflict}{$id}) {
	    printf $IO "%-3s\n", $id;
	} else {
	    printf $IO "%-3s  %s%d\n", $id,
	    join("",
		 map {sprintf("%3.1f %4.2f   ",
			      $reviewDB{$id}{Average}{$_},
			      $reviewDB{$id}{Stddev}{$_});}
		   @reviewFields),
	      $reviewDB{$id}{NumReviews};
	}
    }
}

1;
