#!/usr/bin/perl5
# CountFDF for Adobe Acrobat 5/6/7 .fdf files
# by Rob Elliott, HP (elliott@hp.com)
# distributed on http://www.t10.org/tools.htm
# 
# first release 23 March 2005
# updated 19 April 2005 to fix Acrobat 6 Author parsing, support files with
# binary content, and simplify hex characters 
# 
# based in part on Ralph Weber's fdf2cmt_pgm.pl
# .fdf parsing identical to that in ParseFDF.pl
#
# inputs an .fdf file containing comments following certain
# rules, outputs a count and categorization of the comments
# 
# Usage, in Acrobat (or Acrobat Reader 7 for some .PDF files),
# Export comments to any <filename>.FDF file
# perl parsecomments.pl <filename>.fdf
#
# Outputs the results to stdout
#
# While resolving comments, add a line at the top of 
# each comment containing:
# ACCEPT - DONE (accepted comments that have been implemented)
# ACCEPT - TODO (accepted comments that have not yet been implemented)
# REJECT - TODO (rejected comments that still require some work)
# REJECT (rejected comments)
# REFER (comments that need WG discussion)
# REFER - VOTE (comments that need WG vote)
# etc.  (see the code below for the exact strings)
#
# This program counts the number of comments with each of those
# terms and and reports them by chapter number and by author.
#
use Text::Wrap;	# import wrap ()

$Text::Wrap::columns = 76;	# wrap output lines

# fill in this array with the starting PDF page numbers of each chapter
# the first one is "Chapter 0" used for preliminary sections and must be 0
# chapter numbers use PDF page numbers
# SBC-2 @chapterstartpage = (0, 22, 24, 26, 31, 51, 122, 143, 147);
@chapterstartpage = (0, 41, 43, 46, 74, 132, 200, 251, 363, 383, 437, 505);


sub report_chapter {
	my ($page) = @_;
	my $lastchapter = " Chapter 0";		# start with space so it sorts first
	my $chapter = 0;
	# return $lastchapter if ($page =~ /[ivx]/);
	foreach $startpage (@chapterstartpage) {
		if ($page >= $startpage) {
			$lastchapter = " Chapter $chapter";
		}
		$chapter++;
	}
	return $lastchapter;
}

$init_chapter = 0;
foreach (@chapterstartpage) {
	$authors{" Chapter $init_chapter"} = 0;
	$init_chapter++;
}
$infile = $outfile = shift (@ARGV);
if( length( $infile ) == 0 ) { die "No FDF file to process"; };
print "Processing \"$infile\"\n";
open( IN,  "<$infile" ) || die ("Error opening $infile\n");
binmode IN;	# some .fdfs contain paperclips which include binary streams

# FDF files use CR as line separator
# CR is chr(13) and LF is chr(10)
$normal_crlf = $/;
$/ = chr(13);

ParseComments();
PrintStats();

# Close FDF/PDF file & restore line separator
close( IN ) || die ("Error closing $infile\n");
$/ = $normal_crlf;


# search for author, subtype, etc. in the passed string
# and update global variables if found
sub SearchForStuff {
	my ($line) = shift (@_);

  	if ($line =~ /endobj/ ) {
    		$endobj = 1;
    	}

  	if ($line =~ /\/Parent/ ) {
    		$skipthisobj = 1;
    	}

  	if ($line =~ /\/T\s*\(([^)]*)\)/ ) {
    		$author = $1;
	}

  	if ($line =~ /\/Page\s+(\d+)/ ) {
    		$page = $1 + 1;		# in Acrobat 7, page numbers are 0-based in the .fdf file
    	}

  	if ($line =~ /\/Subtype\s*\/(\w+)/ ) {
    		$subtype = $1;
    	}
} # SearchForStuff

# perform cleanups on the comment text
sub CleanupLine {
	my $line = shift (@_);

        $line =~ s/\r//g;	# 
        $line =~ s/\n//g;	#
        $line =~ s/\\t/ /g;	# tabs
        $line =~ s/\\$//;	# trailing \

    	$line =~ s/\\\)/@/g;	# change \) to a single @ to avoid confusion
		# if the comment contains a @, it will be corrupted
				
	# simplify fancy versions of special characters 
        $line =~ s/\x84/-/g;	# dash  (octal 204)
        $line =~ s/\x85/-/g;	# dash  (octal 205)
        $line =~ s/\x8D/"/g;	# double quote (octal 215)
        $line =~ s/\x8E/"/g;	# double quote (octal 216)
        $line =~ s/\x90/\'/g;	# single quote (octal 90)
        $line =~ s/\xD7/x/g;	# multiply (octal 327)

	# simplify fancy versions of quotes and other special characters that are
	# escaped in Postscript/PDF
        $line =~ s/\\203//g;	# this appears next to 215/216 (quotes)
        $line =~ s/\\204/-/g;	# dash
        $line =~ s/\\205/-/g;	# dash
        $line =~ s/\\215/\"/g;	# opening double quote
        $line =~ s/\\216/\"/g;	# closing double quote
        $line =~ s/\\220/\'/g;	# single quote
        $line =~ s/\\327/x/g;	# multiply
		
	return $line;
} # CleanupLine

# walk through the file
sub ParseComments {
	my ($line);

	# Skip to beginning of a comment line
	while( $line = <IN> ) {
  		chomp $line;
  		if( $line  =~ s/Type\s*/Annot/ ) { last; };
	};

	# Process the comments
	$author = $page = $subtype = $text = $comment = "";
	$number = 1;
	while( $line = <IN> ) {
  		chomp $line;

		# parse the simple attributes
  		SearchForStuff ($line);

		# the Contents attribute can be split over multiple lines
  		if( $line =~ s/\/Contents\s*\((.*)// ) {
    			$line = $1;  # garbage stripped off the left but not the right
			$line = CleanupLine ($line);
    			$line =~ s/\).*/\)/;  # strip off anything after any remaining )
			$done = $line =~ /\)/;
    			$comment = $line;	# start of the full string

    			# pull in lines until we find a bare )
    			while (!$done) {
				unless ($line = <IN>) { last; }
				$line = CleanupLine ($line);
				$line =~ s/\s*\)(.*)/\)/;  # strip off anything after any remaining )
				$restofline = $1;	# in Acrobat 6, leftover contains more attributes to parse
  				SearchForStuff ($restofline);
				$line =~ s/\s*$//;	# trailing spaces
				$done = $line =~ /\)/;
    				$comment .= $line;	# continue
    			}

      			$comment =~ s/\)$//;	# get rid of the final )
      			$comment =~ s/\\\\/\\/g;# allow \
      			$comment =~ s/\\r/\n/g;	# allow forced CRs
      			$comment =~ s/\\n/\n/g;	# allow forced CRs
      			$comment =~ s/\\\(/(/g;	# allow (
      			$comment =~ s/@/)/g;	# put back the internal )s 
		}; # if Contents

		$comment =~ s/\n+$//;	# get rid of trailing blank lines (allowed in the middle)

		# Acrobat 7 litters the .fdf with these pseudo-comments
		# the author names can even be obsolete
		if ($endobj && $author && !$skipthisobj && !(
			    ($comment =~ /Accepted set by $author/) || 
			    ($comment =~ /Rejected set by $author/) || 
			    ($comment =~ /Canceled set by $author/) || 
			    ($comment =~ /Completed set by $author/) || 
			    ($comment =~ /Unmarked set by $author/)
		    	)) {
		    	
			ParseSingleComment();
			$number++;
		} # if
	
		# reset for next object
		if ($endobj) {
			$author = "";
			$page = "";
			$subtype = "";
			$comment = "";
			$endobj = 0;
			$skipthisobj = 0;
		}
  	}; # while line
} # ParseComments

# use the global variables author, page, subtype
sub ParseSingleComment {

	$annotation_count++;
	$chapter = report_chapter ($page);
	$authors{$author}++;		# count per author
	$authors{$chapter}++;		# count per author entry (page entries are shared)

	$foundstat = 0;

	if ($comment =~ /ACCEPT - DONE/) {
		print "Extra acceptdone: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		$acceptdone++;
		$acceptdone{$author}++;
		$acceptdone{$chapter}++;
		$foundstat = 1;
	} elsif (($comment =~ /ACCEPT - TODO/) && ($comment =~ /last edit/)) {
		print "Extra acceptlast: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		$acceptlast++;
		$acceptlast{$author}++;
		$acceptlast{$chapter}++;
		$foundstat = 1;
	} elsif ($comment =~ /ACCEPT - TODO/) {
		print "Extra accepttodo: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		$accepttodo++;
		$accepttodo{$author}++;
		$accepttodo{$chapter}++;
		$foundstat = 1;
	} elsif (($comment =~ /^ACCEPT/) || ($comment =~ /\nACCEPT/)) {
		# print "$author  $chapter  $_";
		print "Extra accept: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		print "acceptonly: author=$author chapter=$chapter page=$page\n";
		$acceptonly++;
		$acceptonly{$author}++;
		$acceptonly{$chapter}++;
		$foundstat = 1;
	} elsif ($comment =~ /REJECT - TODO/) {
		print "Extra rejecttodo: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		$rejecttodo++;
		$rejecttodo{$author}++;
		$rejecttodo{$chapter}++;
		$foundstat = 1;
	} elsif (($comment =~ /^REJECT/) || ($comment =~ /\nREJECT/)) {
		print "Extra reject: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		$rejectonly++;
		$rejectonly{$author}++;
		$rejectonly{$chapter}++;
		$foundstat = 1;
	} elsif ($comment =~ /^REFER - VOTE/) {
		print "Extra refervote: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		$refervote++;
		$refervote{$author}++;
		$refervote{$chapter}++;
		$foundstat = 1;
	} elsif ((($comment =~ /^REFER/) || ($comment =~ /\nREFER/)) && (!$foundstat)) {
		print "Extra refer: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		$refer++;
		$refer{$author}++;
		$refer{$chapter}++;
		$foundstat = 1;
	} elsif (($comment =~ /^TODO/) || ($comment =~ /\nTODO/))  {
		print "Extra todo: author=$author chapter=$chapter page=$page\n" if ($foundstat == 1);
		$todoonly++;
		$todoonly{$author}++;
		$todoonly{$chapter}++;
		$foundstat = 1;
        }
} # ProcessSingleComment

# Print per-author statistics
# This includes per-chapter statistics since chapter headers are also
# stored in the %authors hash
sub PrintStats {
	print "Per-author statistics\n";
	foreach $author (sort keys %authors) {
		print "$author\n";
		print "  Total comments: " . $authors{$author} . "\n";

		print "   Accept - Done: $acceptdone{$author}\n" if $acceptdone{$author};
		print "   Accept - Todo: $accepttodo{$author}\n" if $accepttodo{$author};
		print "   Accept - Last: $acceptlast{$author}\n" if $acceptlast{$author};
		print "   Accept (only): $acceptonly{$author}\n" if $acceptonly{$author};
		print "   Reject - Todo: $rejecttodo{$author}\n" if $rejecttodo{$author};
		print "   Reject (only): $rejectonly{$author}\n" if $rejectonly{$author};
		print "    Refer - Vote: $refervote{$author}\n" if $refervote{$author};
		print "           Refer: $refer{$author}\n" if $refer{$author};
		print "     Todo (only): $todoonly{$author}\n" if $todoonly{$author};
		$totalworked = $acceptdone{$author} + $acceptlast{$author} + $accepttodo{$author} + $acceptonly{$author} + $rejecttodo{$author} + $rejectonly{$author} + $refervote{$author} + $refer{$author} + $todoonly{$author};
		print "    Total worked: $totalworked\n";
		print "            Left: " . ($authors{$author} - $totalworked) . "\n";
	}


	# Print global statistics
	print "\n----------------\n";

	print "Total comments: $annotation_count\n";
	printf (" Accept - Done: %d (%2.2f\%)\n", $acceptdone, ($acceptdone / $annotation_count) * 100);
	printf (" Accept - Last: %d (%2.2f\%)\n", $acceptlast, ($acceptlast / $annotation_count) * 100);
	printf (" Accept - Todo: %d (%2.2f\%)\n", $accepttodo, ($accepttodo / $annotation_count) * 100);
	printf (" Accept (only): %d (%2.2f\%)\n", $acceptonly, ($acceptonly / $annotation_count) * 100);
	printf (" Reject - Todo: %d (%2.2f\%)\n", $rejecttodo, ($rejecttodo / $annotation_count) * 100);
	printf (" Reject (only): %d (%2.2f\%)\n", $rejectonly, ($rejectonly / $annotation_count) * 100);
	printf ("         Refer: %d (%2.2f\%)\n", $refer, ($refer / $annotation_count) * 100);
	printf ("  Refer - Vote: %d (%2.2f\%)\n", $refervote, ($refer / $annotation_count) * 100);
	printf ("   Todo (only): %d (%2.2f\%)\n", $todoonly, ($todoonly / $annotation_count) * 100);
	$totalworked = $acceptdone + $acceptlast + $accepttodo + $acceptonly + $rejecttodo + $rejectonly + $refervote + $refer + $todoonly;
	if ($totalworked) {
		printf ("          Left: %d (100%)\n", ($annotation_count - $totalworked));
	} else {
		printf ("          Left: %d (%2.2f\%)\n", ($annotation_count - $totalworked), ($annotation_count/$totalworked) * 100);
	}
	printf ("  Total worked: %d (%2.2f\%)\n", $totalworked, ($totalworked / $annotation_count) * 100);
} # PrintStats
