#!/usr/pkg/bin/perl -w

# cvsreport (c) 2003 Vincent Caron <v.caron@zerodeux.net>
#
# Build commit activity reports from a CVS repository with a rich
# framework to create nicely formated text and/or HTML output.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use Getopt::Long qw(:config no_auto_abbrev no_ignore_case bundling);
use Digest::MD5 qw(md5_hex);
use POSIX qw(mktime strftime tzset);
use Fcntl qw(:flock);
use Cwd qw(getcwd);
use Net::SMTP;
use Net::Domain qw(hostfqdn);
use strict;



#
# Globals
#

my $package_name    = 'cvsreport';
my $package_version = '0.3.5';
my $package_url     = 'http://www.nongnu.org/cvsreport/';

# Can be overriden by command line
my $localconfig = 0;
my @configfiles = ();
my $cvsroot     = '';
my $all_users   = 0;
my $cset_from   = '';
my $cset_max    = 0;  # Unlimited cset number
my @execute     = ();

# %config layout :
#
#  $config{set}{'myset'}{'key'} => value (string)
#  @{%config{command}}          => array of command strings
my %config;

# Configuration defaults (this script is parsed first)
my $defaults = <<EOF;
set text {
  format   = text
  encoding = iso-8859-1
}

set html {
  format       = html
  encoding     = iso-8859-1
  title        = message
  footer       = Generated by <a href="$package_url">$package_name \$(version)</a>
  cgi_links    = no
  cgi_urlbase  = please_set_cgi_urlbase
  cgi_download = no
}

set mail {
  command     = mail
  subject     = message
# reply-to    =
# from        =
# smtp_server =
}
EOF

# Variables (subst'ed by their values in config files)
my %vars;

# Internals
my $tmp_path = "/tmp";
my $globalconfig = "/etc/cvsreport.conf";
my $ARGS;                  # Will store command line as a simple string
my $single_cset = 0;       # Optimization, see get_file_info()
my $cset_time_leap = 121;  # In second, currently >2min because 'cvs history' has minute precision
my @history = ();
my @csets = ();
my @toclean = ();



#
# Misc tools
#

# Define the global env var CVSREPORT_DEBUG and see cvsreport flooding your stderr.
# See also command line switch.
#
my $do_debug = defined $ENV{'CVSREPORT_DEBUG'} ? 1 : 0;
my $debug_file = "$tmp_path/cvsreport.log";  # When we don't have a tty to write to


# Usage: debug "message"
#
sub debug {
    print STDERR "** ". (shift) ."\n" if $do_debug;
}

# Cleanup : remove temporary files. This is automatically called by myexit(), don't
# call it directly. Note that this program ends with a myexit(0) on purpose.
#
sub cleanup {
    for my $path (@toclean) {
        debug "cleanup: $path";
        unlink $path;
    }
}

sub cleanup_add {
    push @toclean, shift;
}

# Safe exit.
#
# This program only uses myexit (and not 'exit'). This way in non-debug mode, cvsreport can't
# fail (from the user POV), and it won't drive you mad if you set it up as a commit filter.
# It also makes sure cleanup() is called before leaving. In debug mode it properly reports
# errors to its caller.
#
sub myexit {
    my $status = $do_debug ? shift : 0;

    cleanup();
    exit $status;
}

# Usage: error "message", $exit_code
#
sub error {
    print STDERR "cvsreport: error: ". (shift) ."\n";
    myexit shift;
}

# Like Perl's warn but preferred because it tells which program is talking, which
# is less confusing for the CVS client user. Same idea for error().
#
sub warning {
    print STDERR "cvsreport: warning: ". (shift) ."\n";
}

# Check if a 'human readable value' is true ('1' or 'yes').
#
sub get_bool {
    my $val = shift;
    return (defined $val and ($val eq 'yes' or $val eq '1'));
}

# Retrieve current time in preferred CVS format (UTC/GMT time)
#
# Eg. "2003-06-30 16:55:01 +0000"
#
sub get_timestamp {
    # CVS prefers GMT
    my @now = gmtime(time());
    return sprintf ("%d-%.2d-%.2d %.2d:%.2d:%.2d +0000", $now[5]+1900, $now[4]+1, $now[3], $now[2], $now[1], $now[0]);
}

# Parse a CVS formated date (year-month-day hour:minute +timezone).
# (we ignore timezone since 'cvs history' always uses UTC)
#
sub parse_utc_date {
    my ($ymd, $hm, $zone) = split / /, shift;
    my ($year, $month, $day) = split /[-\/]/, $ymd;
    my ($hour, $min) = split /:/, $hm;

    # Save TZ, wether it is defined or not.
    my $tz = defined $ENV{TZ} ? $ENV{TZ} : undef;
    # Go UTC.
    $ENV{TZ} = '';
    tzset();

    my $ctime = mktime(0, $min, $hour, $day, $month - 1, $year - 1900);

    # Restore local timezone.
    if (defined $tz) { $ENV{TZ} = $tz; } else { delete $ENV{TZ}; }
    tzset();

    return $ctime;
}

# Converts time stamp to human readable date, using local timezone.
#
sub get_local_date {
    my $ctime = shift;
    return strftime "%Y-%m-%d %H:%M %Z", localtime($ctime);
}

# Returns true if a cvsroot value is a local path
#
sub is_local {
    my $cvsroot = shift;

    # We don't have to check $ENV{CVS_RSH}
    return ($cvsroot =~ /^(:local:|\/)/);
}

# Check for existence of a process given its PID
#
sub is_alive {
    my $pid = shift;

    # Try with the /proc/<pid> interface (simple, efficient)
    if (-d '/proc/1') {         # init MUST exist or you're running some strange OS
        return -d "/proc/$pid";
    }

    error "is_alive() has no working implementation on this platform.", 3;
}

# Print a whole file content to currently select()'ed output.
#
sub print_file {
    my $src = shift;
    my $buffer;
    my $done = 0;

    open(SRC, "<$src") or return 0;
    while (my $bytes = read(SRC, $buffer, 64*1024)) {
        print $buffer;
        $done += $bytes;
    }
    close SRC;

    return $done;
}

# Setup some default variables
#
sub init_vars {
    my $now = time();

    my @lt = localtime($now);
    $vars{localtime} = sprintf ("%d-%.2d-%.2d %.2d:%.2d:%.2d", $lt[5]+1900, $lt[4]+1, $lt[3], $lt[2], $lt[1], $lt[0]);

    my @gmt = gmtime($now);
    $vars{gmtime} = sprintf ("%d-%.2d-%.2d %.2d:%.2d:%.2d", $gmt[5]+1900, $gmt[4]+1, $gmt[3], $gmt[2], $gmt[1], $gmt[0]);

    $vars{version} = $package_version;

    my $sfproject = $cvsroot;
    $sfproject =~ s/.*\///;
    $vars{sfproject} = $sfproject;
}

# Substitute variables references with their values.
#
# A variable reference looks like $(var).
#
sub subst_vars {
    my $text = shift;

    while ($text =~ /\$\(([^\)]*)\)/g) {
        my $val = defined $vars{$1} ? $vars{$1} : '';
        $text =~ s/\$\(([^\)]*)\)/$val/;
    }

    return $text;
}



#
# Core functions
#

# When invoked by CVS (eg. from CVSROOT/commitinfo filter), wait for full (recursive) end.
#
# The main CVS process (the 'driver') spawns one clone per folder (the 'command'), which
# in turn will call the filter (us!). Thus we have to :
#
#   * Detect if we are called from the first comand, we use a file lock tied to our
#     driver (ie. has its PID in its name). If the lock is already there, we simply
#     return success to the CVS command and the driver will continue happily.
#
#   * Otherwise on first call, we set the lock, and set ourselves as a background process,
#     waiting for driver PID to die, and then proceed normally.
#
sub wait_commit_completion {
    # This is the parent CVS process (the main driver, not the command fork that called us).
    my $cvs_pid = $ENV{CVS_PID};
    $cvs_pid = getpgrp(0) if not defined $cvs_pid;

    # We need a lock wich is specific to our driver ($cvs_pid will do), and can discriminate
    # simultaneous calls from the same filter invocation (a hash on parsed args will do).
    my $hash = md5_hex $ARGS;
    my $cvs_lock = "$tmp_path/cvsreport.lock.$cvs_pid.$hash";

    debug "wait_commit_completion():";
    debug "  lock: $cvs_lock";

    # Get lock handle.
    open(LOCK, ">$cvs_lock") or error("$cvs_lock: $!", 3);

    # If a lock is already there, ignore this script invocation, we already have
    # one waiting for commit completion (in the background).
    if (not flock(LOCK, LOCK_EX|LOCK_NB)) {
        debug "  lock already held by previous instance, exiting\n";
        myexit 0;
    }
    debug "  acquired lock" ;

    # Fork ourselves as a background child process, CVS will believe that this
    # filter is done and succesfull, and will go on.
    if (my $child = fork()) {
        debug "  forked child <$child> in background, exiting\n";
        myexit 0;
    } else {
        error("could not fork background commit polling process.", 3) unless defined $child;
    }

    # From here we are the longer lasting process, we'll do the cleanup.
    cleanup_add $cvs_lock;

    # If we don't close these streams, parent 'cvs server' will select() on them indefinitely.
    # We could also reopen them but we won't use them, we don't have any tty to attach to.
    close STDOUT;
    close STDERR;

    # Ahem. Although in debug mode this is handy to reopen stderr to some place.
    open(STDERR, ">>$debug_file") if $do_debug;
    debug "\n\n** $cvs_lock";

    # Poll for $cvs_pid existence at a given interval.
    debug "spawned by parent cvsreport process";
    debug "  waiting for parent $cvs_pid termination";
    sleep 2;      # Since CVS has internal second precision, make sure we sleep at least more than 1s.
    while (is_alive $cvs_pid) {
        sleep 2;  # is_alive is cheap, we can afford to poll quite often
    }
    debug "  parent dead" ;

    # Release lock. It will be unlinked by myexit().
    close LOCK;

    # Our cwd is no longer valid since it was a temporary place cleaned up
    # by our parent. Try to go to some workable place.
    chdir() or chdir($tmp_path);
}

# Compute branch name from a file revision
#
# http://www.loria.fr/~molli/cvs/doc/cvs_5.html#SEC53
#
sub get_branch_rev {
    my @rev = split /\./, shift;

    return '' if @rev < 4;  # From trunk (ie. 1.x form). 1.x.y is bogus but we report trunk.
    return join '.', (@rev[0 .. (@rev - 3)], '0', $rev[-2]);
}

# Return previous revision from a revision (revise that)
#
sub get_previous_rev {
    my @rev = split /\./, shift;

    # Even number count : branch, increment is 2
    # Odd number count : regular revision, increment is 1
    my $step = (@rev & 1) ? 2 : 1;
    $rev[-1] -= $step if $rev[-1] >= $step;

    return join '.', @rev;
}

# Run rlog on a CVS file to retrieve branch and message information.
#
# This one is pretty heavy, it spawns a CVS instance per file, this is why we have the
# $single_cset hack. We would need something like 'pipelined rlog requests' in CVS.
#
sub get_file_info_remote {
    my $file = shift;
    my $rev = shift;

    my $branch = '';
    my $message = '';
    my $branch_rev = get_branch_rev $rev;

    my $command = "cvs -d \"$cvsroot\" rlog -r$rev \"$file\"";
    open MESG, "$command 2>/dev/null |" or error "running '$command': $!", 3;

    while (<MESG>) {
        last if /^date:/;  # After this field, we have the message body (see further)

        if ($branch_rev ne '') {
            # Catch branch name among symbolic names
            chomp;
            my @sym = split /: */;
            $branch = $sym[0] if (@sym > 1) and ($sym[1] eq $branch_rev);
        }
    }
    while (<MESG>) {
        next if /^branches:/;            # This one sometimes appear after 'date:'
        $message .= $_ unless /^={40}/;  # Concatenate message lines till we found separator
    }

    close MESG;

    $branch =~ s/[ \t]+//g;  # Remove spurious whitespaces
    chomp $message;
    return ($branch, $message);
}

# This is an helper for get_file_info_local, which is a RCS parser. This part
# only parse sections delimited by '@' symbols, but in the funky way RCS likes it.
#
sub parse_rcs_block {
    my $fetch = shift;  # If true, return section content. Otherwise skip hastily.
    my $text = '';

    while (<RCS>) {
        chomp;
        return '' if $_ eq '@@';   # Very common case (empty block)
        return '' if not s/^\@//;  # If the line did not start with a '@', what are we doing here ?

        $text .= "$_\n" if $fetch;
        last;  # We only parse the first line in this while()
    }
    while (<RCS>) {
        chomp;
        last if $_ eq '@';  # Very common case of block ending

        my $line = $_;  # Save unchoped line
        if (chop eq '@' and chop ne '@') {  # A single @ at end of line
            chop $line;
            $text .= $line if $fetch;
            last;
        }

        $text .= "$line\n" if $fetch;
    }

    $text =~ s/\@\@/\@/g if $fetch and $text ne '';
    return $text;
}

# Parse RCS file directly when $cvsroot happens to be local.
#
# It is 7x times faster than spawning CVS and require a lot less CPU and memory.
# It is also quite awful and a pain in the ass to code/maintain reliably.
#
sub get_file_info_local {
    my $file = shift;
    my $rev = shift;

    my $branch = '';
    my $message = '';
    my $branch_rev = get_branch_rev $rev;
    my $fullpath = "$cvsroot/$file,v";

    if (not -e $fullpath) {
        # File might be removed, try in attic
        my $slash = rindex $fullpath, '/';  # This slash does exist since $cvsroot is absolute
        $fullpath = (substr $fullpath, 0, $slash) . "/Attic" . (substr $fullpath, $slash);

        # It might have been removed by hand, complain but continue
        if (not -e $fullpath) {
            warning "could not find RCS file for '$file'.";
            return ($branch, $message);
        }
    }

    open RCS, "<$fullpath" or error "$fullpath: $!", 3;

    # Here we have a state-oriented parser, mainly because it's much easier for input
    # error/eof handling to have a single read loop (although parse_rcs_block scans
    # <RCS> recursively), and it turned out to be also much more readable and reliable.
    #
    # RCS file structure is insane, especially for a line-oriented parser. Whoever created
    # this stuff should be beaten to death with a heavy Perl book. This code has been carefully
    # optimized for speed while trying to use still readable Perl constructs. Really.
    #
    my $state = 0;
    my $match = 0;

    while (<RCS>) {
        chomp;

        # Looking up for symbol table start
        if ($state == 0) {
            ($state = 1, next) if $_ eq 'symbols';
            ($state = 2, next) if $_ eq 'symbols;';  # No symbols, skip that part
            next;
        }

        # Looking up for symbol table end, collecting symbols
        if ($state == 1) {
            s/^\t//;
            foreach my $branchdef (split / /) {
                my @tokens = split /:/, $branchdef;
                my $rcsrev = $tokens[1];
                my $last = ($rcsrev =~ s/;$//) > 0;  # If the line ends with ';', this is the last symbol
                if ($rcsrev eq $branch_rev) {
                    $branch = $tokens[0];
                    $last = 1;  # We have our branch symbolic name, we're done
                }
                ($state = 2, last) if $last;
            }
            next;
        }

        # Looking up for desc section
        #   Cheatin': we don't parse the 'comment' block which is in the way, so if a really
        #   bad guy put the single line 'desc' into its RCS comment, we're doomed.
        if ($state == 2) {
            next if $_ ne 'desc';
            parse_rcs_block 0;
            $state = 3;
            next;
        }

        # Looking up for revision info
        if ($state == 3) {
            next if $_ eq '';  # There are two blanks lines before a revision info
            my $rcsrev = $_;
            $match = $rcsrev eq $rev ? 1 : 0;  # Note if this is the info we're looking for
            $state = 4;
            next;
        }

        # Parse log and text sections
        #   We only extract message info if $match is true, otherwise we ask
        #   parse_rcs_block to skip over blocks as fast as possible.
        if ($state == 4) {
            if ($_ eq 'log') {
                $message = parse_rcs_block $match;
                $match and last or next;  # We have the message, all done !
            }
            if ($_ eq 'text') {
                parse_rcs_block 0;
                $state = 3;  # Let's do it again for next revision
                next;
            }
        }
    }

    close RCS;

    chomp $message;
    return ($branch, $message);
}

# ($branch, $message) = get_file_info 'module/path', revision
#
# Retrieves branch symbolic name (from revision number) and log message. This
# is a frontend which selects the remote or local method (way faster) given
# $cvsroot.
#
sub get_file_info {
    if (is_local $cvsroot) {
        return get_file_info_local @_;
    } else {
        return get_file_info_remote @_;
    }
}

# Parse history items and gathers them into csets.
#
# This is the core of cvsreport, where the magic actually happens. It calls 'cvs history',
# parse each history entry, and try an heuristic to detect cset changes. This method fills
# up @history and @csets with refs to hashes.
#
sub parse_history {
    # Fetch commit events (add, remove, modify) for the current user, starting from $cset_from

    # User selection :
    # * all    : any record event, use '-a' option
    # * single : pserver style, where user (CVS_USER) is not necessarily the Unix one (USER)
    # * single : Unix style (RSH/SSH), this is CVS default
    my $usersel = $all_users ? '-a' : (defined $ENV{CVS_USER} ? "-u \"$ENV{CVS_USER}\"" : '');
    my $command = "cvs -d \"$cvsroot\" history $usersel -c -D \"$cset_from\"";
    debug "parse_history():";
    debug "  command: $command";

    # We silence stderr because we will break the pipe on purpose if we have enough parsing.
    open(HISTORY, "$command 2>/dev/null |") or error "running '$command': $!", 3;

    my $h_count = 0;
    my $cset_count = 0;
    my $branch  = '';
    my $xbranch = '';
    my $message = '';
    my $xmessage = '';
    my $user = '';
    my $date = 0;         # Holds last parsed date to compute time deltas.
    my $cset_ref;         # Undef on purpose.
    my %cset_files = ();  # Holds file list for current cset, use by cset heuristic.

    while (<HISTORY>) {
        chomp;

        # Check if we have commit events on first history item.
        # Otherwise it's an error message from CVS.
        if (not /^[AMR] /) {
            last if /^No records/;  # This is OK, empty history.
            s/^[^:]+: //;           # Remove 'cvs [info]:' header
            error $_, 3;
        }

        # This _will_ fail if module or file names have spaces, but there's nothing we can do.
        # Actually we could parse directly $CVSROOT/CVSROOT/history but that would break remote usage.
        my @tokens = split / +/;
        my ($action, $date_ymd, $date_hm, $date_zone, $xuser, $revision, $file, $path, undef, $method) = @tokens;
        my $xdate = parse_utc_date "$date_ymd $date_hm $date_zone";
        my $fullpath = "$path/$file";

        ($xbranch, $xmessage) = get_file_info($fullpath, $revision) if not ($single_cset and $h_count > 0);

        # This is an heuristic to separate csets, true if :
        #   * first history item (ie. the first cset holds it)
        #   * branch, message or user changes
        #   * time leap
        #   * file already encountered in this very cset
        if ($h_count == 0 or
            $xbranch ne $branch or
            $xmessage ne $message or
            $xuser ne $user or
            ($date and ($xdate - $date) >= $cset_time_leap) or
            defined $cset_files{$fullpath}) {

            if ($cset_max and $cset_count == $cset_max) {
                debug "  stopped by cset_max=$cset_max";
                last;
            }

            $branch = $xbranch;
            $message = $xmessage;
            $user = $xuser;

            my %cset = (user    => $user,
                        branch  => $branch,
                        message => $message,
                        h_start => $h_count,
                        h_count => 0
                       );

            $cset_ref = \%cset;
            push @csets, $cset_ref;
            $cset_count++;
            %cset_files = ();

            debug "  cset: u:$cset{user} b:'$cset{branch}' start:$cset{h_start} m:'$cset{message}'";
        }
        $date = $xdate;

        # Adjust path representation
        my @path_split = split /\//, $fullpath;
        my $module = $path_split[0];
        $file = join '/', @path_split[1 .. (@path_split - 1)];

        # Compute previous entry (bogus for 'A' action)
        my $previous = get_previous_rev $revision;

        # Create entry and store into history
        my %entry = (cset     => $cset_ref,
                     action   => $action,
                     module   => $module,
                     file     => $file,
                     date     => $date,
                     revision => $revision,
                     previous => $previous
                    );

        push @history, \%entry;
        $cset_files{$fullpath} = 1;
        $h_count++;
        $cset_ref->{'h_count'}++;

        if ($do_debug) {
            my $datestr = strftime "%Y-%m-%d %H:%M UTC", gmtime($entry{date});
            debug "    $entry{action} $entry{revision}  $datestr  $entry{module}/$entry{file}";
        }
    }

    close(HISTORY);

    if ($h_count == 0) {
        debug "  no commit events, exiting";
        myexit 0;
    }
    debug "  total: $h_count entries, $cset_count csets";
}



#
# Diff helpers
#

# Retrieve a raw diff (header stripped) from an history entry.
#
# get_diff configset, history_entry
#
# Returns a file name containing the diff (use print_file).
#
sub get_diff {
    my $config = shift;
    my $entry = shift;
    my $path = "$entry->{module}/$entry->{file}";
    my $rev1 = $entry->{previous};
    my $rev2 = $entry->{revision};

    my $opt = '';
    my $format = $config->{diff_format};
    if (defined $format) {
        $opt = '-u' if $format eq 'unified';
        $opt = '-c' if $format eq 'context';
    }

    my $mangled = $path;
    $mangled =~ s/\//_/g;
    my $cache = "$tmp_path/cvsreport.$$.$mangled-$rev1-$rev2$opt";
    return $cache if -e $cache;  # Already computed

    my $command = "cvs -d \"$cvsroot\" rdiff $opt -r$rev1 -r$rev2 \"$path\"";
    debug "get_diff():";
    debug "  command: $command";

    open(DIFF, "$command |") or error "running '$command': $!", 3;
    open(CACHE, ">$cache") or error "writing to '$cache': $!", 3;
    cleanup_add $cache;

    my $skip = 4;
    while (<DIFF>) {
        $skip ? $skip-- : print CACHE $_;
    }

    close CACHE;
    close DIFF;

    return $cache;
}

# Return wether an inline diff for a given history entry is requested.
#
# want_diff configset, history_entry
#
sub want_diff {
    my $config = shift;
    my $entry = shift;
    return 0 if $entry->{action} eq 'R';  # Don't diff on removed files

    my $diff =  $config->{diff};
    my $path = "$entry->{module}/$entry->{file}";
    return 0 if not defined $diff;

    for my $pattern (split /,/, $diff) {
        return 1 if $path =~ /$pattern/;
    }

    return 0;
}



#
# TEXT output
#


# Retrieve the first line from a cset log message
#
sub get_short_message {
    my $cset = shift;
    my @lines = split /\n/, $cset->{message};

    return (@lines > 0 ? $lines[0] : '').(@lines > 1 ? ' [...]' : '');
}

sub text_cset {
    my $cset = shift;
    my $date = get_local_date $history[$cset->{h_start}]->{date};

    # If we're not on trunk, display branch name
    my $branch = $cset->{branch};
    my $branch_text = ($branch ne '') ? " on branch $branch" : "";

    my $header = "Commit from $cset->{user}$branch_text";
    my $underline = $header;
    $underline =~ s/./-/g;

    print "$header ($date)\n$underline\n\n$cset->{message}\n\n";
}

sub text_item_width {
    my $entry = shift;
    my $field = shift;
    my $colwidth = shift;

    my $len = length $entry->{$field};
    $colwidth->{$field} = $len if $colwidth->{$field} < $len;
}


sub text_item {
    my $entry = shift;
    my $colwidth = shift;

    my $action = $entry->{action};
    my $actsym = ' ';
    $actsym = '+' if $action eq 'A';
    $actsym = '-' if $action eq 'R';

    my $module = sprintf "%- $colwidth->{module}s", $entry->{module};
    my $file   = sprintf "%- $colwidth->{file}s", $entry->{file};

    print "$actsym $module  $file  $entry->{revision}\n";
}

sub text_diff {
    my $entry = shift;
    my $diff = shift;

    my $path = "$entry->{module}/$entry->{file}";
    my $underline = $path;
    $underline =~ s/./-/g;
    my $prev = $entry->{action} eq 'A' ? "" : "$entry->{previous} -> ";

    print "\n\n$underline\n$path  ($prev$entry->{revision})\n$underline\n\n";
    print_file $diff;
}

# Text report
#
# This is (for now) a very simple and concise text-based report. The only
# luxe relies on aligned columns.
#
sub text_report {
    my $conf = shift;

    my $cset_cnt = 0;
    foreach my $cset (@csets) {
        print "\n\n" if $cset_cnt++;
        text_cset $cset;

        my $a = $cset->{h_start};
        my $b = $cset->{h_count} + $a - 1;

        # First pass : evaluate columns size, collect difflist
        my %colwidth = ( module => 0, file => 0 );
        my @diffs = ();
        foreach my $entry (@history[$a..$b]) {
            text_item_width $entry, 'module', \%colwidth;
            text_item_width $entry, 'file', \%colwidth;
            push(@diffs, $entry) if want_diff $conf, $entry;
        }

        # Second pass : display summary
        foreach my $entry (@history[$a..$b]) {
            text_item $entry, \%colwidth;
        }

        # Third pass : display inline diffs (if any)
        foreach my $entry (@diffs) {
            text_diff $entry, get_diff($conf, $entry);
        }
    }
}



#
# HTML output
#

# Munge regular text into something acceptable for HTML embedding.
#
sub html_fix {
    local $_ = shift;

    # Order is important !
    s/&/&amp;/g;
    s/</&lt;/g;
    s/>/&gt;/g;

    return $_;
}

# Even better than html_fix, detects URLs and links them.
#
sub html_coolfix {
    local $_ = html_fix shift;

    s/([a-z]+:\/\/[^\(\) ]+)/<a href="$1">$1<\/a>/g;
    return $_;
}

sub html_begin {
    my $conf = shift;

    $conf->{modulo} = 0;  # Keep line number modulo 2 for color alternance in history
    my $title = $conf->{title};
    $title = html_fix(get_short_message($csets[0])) if $title eq 'message';

    print <<EOF;
<?xml version="1.0" encoding="$conf->{encoding}"?>
<!DOCTYPE html
     PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
    <title>$title</title>
    <style type="text/css">
      table.csetheader { color: inherit; background: #ebebeb; border-style: solid; border-color: #8d8d8d; border-width: 0px 1px 1px 0px; }
      table.history { border: solid black 1px; }
      table.diff { border: solid black 1px; }
      body       { color: black; background: white; }
      p.footer   { border-top: solid black 1px; }
      em.user    { font-style: normal; color: #000080; background: inherit; }
      em.branch  { font-style: normal; color: #800000; background: inherit; }
      b.diffA    { font-weight: normal; color: inherit; background: #e0f4e0; }
      b.diffR    { font-weight: normal; color: inherit; background: #f4e0e0; }
      tr.history { color: inherit; background-color: #c0c0c0; }
      th.history { padding-right: 3em; text-align: left; }
      th.haction { padding-right: 1em; text-align: left; }
      td.item    { padding-right: 3em; font-family: monospace; }
      td.itemact { padding: 0em 0.5em; font-family: monospace; text-align: center; }
      td.itemrev { padding-right: 1em; font-family: monospace; }
      tr.itemA0  { color: inherit; background: #e0f4e0; margin: 15px; }
      tr.itemA1  { color: inherit; background: #f0fff0; margin: 15px; }
      tr.itemM0  { color: inherit; background: #eaeaea; margin: 15px; }
      tr.itemM1  { color: inherit; background: #f4f4f4; margin: 15px; }
      tr.itemR0  { color: inherit; background: #f4e0e0; margin: 15px; }
      tr.itemR1  { color: inherit; background: #fff0f0; margin: 15px; }
      tr.diffhd  { color: inherit; background: #c0c0c0; }
      th.diffhd  { padding-left: 2em; text-align: left; font-family: monospace; }
      tr.diffblk { color: inherit; background: #ffffff; }
      td.diffblk { font-weight: bold; padding-top: 0.5em; padding-left: 1.5em; border-bottom: solid black 1px; }
      td.diffA   { color: inherit; background: #f0fff0; font-family: monospace; }
      td.diffM   { color: inherit; background: #f4f4f4; font-family: monospace; }
      td.diffR   { color: inherit; background: #fff0f0; font-family: monospace; }
    </style>
  </head>
  <body>

EOF
}

sub html_end {
    my $conf = shift;
    my $footer = $conf->{footer};

    if ($footer ne '') {
        print <<EOF;

    <p><br /></p>
    <p class="footer">$footer</p>

EOF
    }

    print "  </body>\n</html>\n";
}

sub html_cset_begin {
    my $cset = shift;
    my $date = get_local_date $history[$cset->{h_start}]->{date};

    # If we're not on trunk, display branch name
    my $branch = $cset->{branch};
    my $branch_text = ($branch ne '') ? " on branch <em class=\"branch\">$branch</em>" : "";

    # Print header with 'Commit from <user>' on left, date on right.
    print <<EOF;
    <table width="100\%" class="csetheader">
      <tr>
        <td align="left"><b>Commit from <em class="user">$cset->{user}</em>$branch_text</b></td>
        <td align="right">$date</td>
      </tr>
    </table>

EOF

    my $msg = html_coolfix $cset->{message};
    print <<EOF;
    <pre>$msg</pre>

EOF
}

sub html_cset_separator {
    print "\n    <p><br /></p>\n\n\n";
}

sub html_cset_end {
}

sub html_history_begin {
    print <<EOF;
    <table cellspacing="0" cellpadding="3" class="history">
      <tr class="history">
        <th class="haction"></th>
        <th class="history">Module</th>
        <th class="history">File name</th>
        <th class="history" colspan="3">Revision</th>
      </tr>
EOF
}

sub html_history_end {
    print <<EOF;
    </table>

EOF
}

# Retrieve a ViewCVS/CVSweb URL for a given path and revision.
# * http://viewcvs.sourceforge.net/
# * http://www.freebsd.org/projects/cvsweb.html
#
# Usage: get_viewcvs_url config 'module/path/file', rev1, rev2, action[=A,M,R,D]
#
# The 'D' action is for the diff request.
#
sub get_item_url {
    my $conf = shift;
    my $path = shift;
    my $rev1 = shift;
    my $rev2 = shift;
    my $action = shift;

    my $content = get_bool($conf->{cgi_download}) ? '' : '&amp;content-type=text/vnd.viewcvs-markup';

    if ($action eq 'R') {
        my @p = split /\//, $path;
        $path = join '/', (@p[0 .. (@p - 2)], 'Attic', $p[-1]);
    }
    return "$conf->{cgi_urlbase}/$path?rev=$rev1$content" if $action ne 'D';

    # A little different for a diff request
    return "$conf->{cgi_urlbase}/$path?r1=$rev1&amp;r2=$rev2";
}

# Usage: get_item_rev config entry version[=previous,diff,revision]
#
# Return the decorated given version of the entry. This is a simple passthru if
# we don't use ViewCVS/CVSweb. Otherwise it goes thru get_{viewcvs,cvsweb}_url
# and we set the proper hyperlink.
#
sub get_item_rev {
    my $conf = shift;
    my $entry = shift;
    my $version = shift;

    # Select text : either the revision number, either the '>>>' diff symbol
    my $rev = ($version eq 'diff') ? '>>>' : $entry->{$version};
    return "$rev" if not get_bool($conf->{cgi_links});

    # Fetch URL
    my $rev1 = ($version eq 'revision') ? $entry->{revision} : $entry->{previous};
    my $rev2 = $entry->{revision};  # Only used by diff
    my $act  = ($version eq 'diff') ? 'D' : $entry->{action};
    my $url = get_item_url $conf, "$entry->{module}/$entry->{file}", $rev1, $rev2, $act;
    return "<a href=\"$url\">$rev</a>";
}

sub html_history_item {
    my $conf = shift;
    my $entry = shift;

    # Action symbol (+, - or nothing, on leftmost column)
    my $action = $entry->{action};
    my $actsym = '';
    $actsym = '+' if $action eq 'A';
    $actsym = '-' if $action eq 'R';
    $actsym = "<b>$actsym</b>" if $actsym ne '';

    # Version info
    my $rev_pre  = ($action =~ /[MR]/) ? get_item_rev($conf, $entry, 'previous') : '';
    my $rev_diff = ($action =~ /[M]/ ) ? get_item_rev($conf, $entry, 'diff')     : '';
    my $rev_post = ($action =~ /[MA]/) ? get_item_rev($conf, $entry, 'revision') : '';

    print <<EOF;
      <tr class="item$action$conf->{modulo}">
        <td class="itemact">$actsym</td>
        <td class="item"><b>$entry->{module}</b></td>
        <td class="item">$entry->{file}</td>
        <td class="itemrev">$rev_pre</td>
        <td class="itemrev">$rev_diff</td>
        <td class="itemrev">$rev_post</td>
      </tr>
EOF
    $conf->{modulo} = $conf->{modulo} ? 0 : 1;
}

sub html_diff_begin {
    my $title = shift;

    print <<EOF;
    <p></p>

    <table cellspacing="0" cellpadding="3" class="diff" width="100%">
      <tr class="diffhd">
        <th class="diffhd">$title</th>
      </tr>
EOF
}

sub html_diff_end {
    print "        </td>\n      </tr>\n    </table>\n";
}

sub html_diff_block_begin {
    my $line = shift;

    print <<EOF;
      <tr class="diffblk">
        <td class="diffblk">Line $line</td>
      </tr>
EOF
}

sub html_diff_style_begin {
    my $style = shift;
    print "      <tr>\n        <td class=\"diff$style\">\n";
}

sub html_diff_style_end {
    my $style = shift;
    print "        </td>\n      </tr>\n" if $style ne '';
}

sub html_diff {
    my $conf = shift;
    my $entry = shift;

    $conf->{diff_format} = 'unified';  # Force unified format, we only parse that.
    if (not open(DIFF, get_diff($conf, $entry))) {
        warning "could not fetch cached diff: $!";
        return;
    }

    # Setup title (file name, versions)
    my $prev = $entry->{action} eq 'A' ? "" : "$entry->{previous} >>> ";
    my $title = "$entry->{module}/$entry->{file} &nbsp; $prev$entry->{revision}";
    html_diff_begin $title;

    # Parse diff, identify blocks
    my $block = 0;
    my $old_style = '';
    while (<DIFF>) {
        chomp;

        # Detect diff block header
        if (/^\@\@ \-([0-9]+),([0-9]+) \+.+ \@\@$/) {
            my $line = $1;
            html_diff_style_end $old_style;
            html_diff_block_begin $line;
            $block++;
            $old_style = '';
            next;
        }

        # Detect line style : ' '=identical, '+'=added, '-'=removed
        my $op = substr $_, 0, 1;
        my $style = $op;
        $style =~ tr/+\- /ARM/;
        $op =~ s/ /&nbsp;/;

        # Fix text for HTML output
        my $text = substr $_, 1;
        $text = html_coolfix $text;
        $text =~ s/ /&nbsp;/g;

        # Only change style when necessary
        if ($style ne $old_style) {
            html_diff_style_end $old_style;
            html_diff_style_begin $style;
        }
        print "          <b>$op</b> $text<br />\n";

        $old_style = $style;
    }
    # No need to call html_diff_style_end.
    # No html_diff_block_end.

    html_diff_end();
}

# HTML report
#
# Here you can see the skeleton of the document. The rest is boring and
# straight forward stuff. Okay, get_item_url+get_item_rev is actually
# tricky.
#
# Note that it's meant to output tidy and strict XHTML 1.0 + CSS1 conformant
# code, please keep it that way. Use a local install of the WDG validator or
# http://validator.w3.org.
#
sub html_report {
    my $conf = shift;

    html_begin $conf;

    my $cset_cnt = 0;
    foreach my $cset (@csets) {
        html_cset_separator if $cset_cnt++;
        html_cset_begin $cset;

        my @diffs = ();
        my $a = $cset->{h_start};
        my $b = $cset->{h_count} + $a - 1;
        html_history_begin();
        foreach my $entry (@history[$a..$b]) {
            html_history_item $conf, $entry;
            push(@diffs, $entry) if want_diff $conf, $entry;
        }
        html_history_end();

        foreach my $entry (@diffs) {
            html_diff $conf, $entry;
        }

        html_cset_end();
    }

    html_end $conf;
}



#
# Report cache
#

my %cache = ();

# get_report setname
#
# This function dispatches to different outputs and provide caching so commands can
# ask for the same report several times without performance hit. It returns the name
# of a temporary file holding the report content.
#
sub get_report {
    my $setname = shift;
    my $set = $config{set}{$setname};

    # If it's in cache, we're done.
    return $cache{$setname} if defined $cache{$setname};

    # Otherwise generate report.
    my $path = "$tmp_path/cvsreport.$$.$setname";
    open(TMP, ">$path") or error("$path: $!", 3);
    cleanup_add $path;
    select TMP;
  SWITCH:
    {
        if ($set->{format} eq 'text') { text_report $set; last SWITCH; }
        if ($set->{format} eq 'html') { html_report $set; last SWITCH; }
    }
    close TMP;
    select STDOUT;

    # Cache and return result.
    $cache{$setname} = $path;
    return $path;
}



#
# Configuration set check
#

# Check a report configuration set.
#
#  * since we're a parser helper, complain if we have actually no set as parameter
#  * check if this set is known
#  * check if it has a 'format' field
#  * check that 'format' is a supported output
#  * inherits defaults from the canonical set (depends on 'format')
#
sub check_report_set {
    my $setname = shift;

    if (not defined $setname) {
        warning "please specify a configuration set.";
        return 0;
    }

    my $set = $config{set}{$setname};
    if (not defined $set) {
        warning "'$setname': unknown configuration set";
        return 0;
    }

    if (not defined $set->{format}) {
        warning "'$setname' configuration set has no 'format' field";
        return 0;
    }

    if ($set->{format} ne 'text' and $set->{format} ne 'html') {
        warning "'$setname': '$set->{format}' format is unsupported";
        return 0;
    }

    my $setref = $config{set}{$set->{format}};
    for my $field (keys %{$setref}) {
        $set->{$field} = $setref->{$field} if not defined $set->{$field};
    }

    return 1;
}



#
# 'run' command
#

# Syntax: run args ...
#
sub command_run_check {
    my $command = shift;
    if ($command eq '') {
        warning "run command: needs an actual command to execute.";
        return 0;
    }

    return 1;
}

sub command_run_run {
    my $command = shift;
    system("$command 2>&1") and warning "running '$command': exited with error status $?";
}


#
# 'file' command
#

# Syntax: file setname [>>]file ...
#
sub command_file_check {
    return 0 if not check_report_set shift;

    if (@_ == 0) {
        warning "file command: needs file names to write to.";
        return 0;
    }

    # Check that we will be able to create/append files.
    my $error = 0;

    for my $file (@_) {
        $file =~ s/^>>//;                                              # Ignore append symbol
        my $parent = ($file =~ /^\//) ? "$file" : getcwd() ."/$file";  # Get $file absolute path
        $parent =~ s/\/[^\/]+$/\//;                                    # Remove last path component
        ($error++, warning "cannot create '$file'") if ($file ne '-' and not -w $parent);
    }

    return $error < @_;
}

# Output a report to a list of files.
#
sub command_file_run {
    my $report = get_report shift;

    for my $file (@_) {
        $file = ">$file" if not ($file =~ /^\>/);

        (warning "$file: $!", next) if not open(FILE, $file);
        select FILE;
        my $done = print_file $report;
        (warning("$report: $!"), next) if not $done;
        close FILE;
        select STDOUT;

        debug "    $file: $done bytes written";
    }
}



#
# Mail command
#

# Syntax: mail set1[+set2] address ...
#
sub command_mail_check {
    my $setname = shift;
    if (not defined $setname) {
        warning "mail command: please specify a simple or combined configuration set.";
        return 0;
    }

    # Check for multiset input
    my @sets = split /\+/, $setname;
    if (@sets > 2) {
        warning "mail command: maximum set number for multipart messages is 2.";
        return 0;
    }

    # Check for set()s existence, and check they are of different format
    my %fhash = ();
    for my $set (@sets) {
        return 0 if not check_report_set $set;

        my $format = $config{set}{$set}{format};
        if (defined $fhash{$format}) {
            warning "mail command: '$sets[0]' and '$sets[1]' sets must have a different format.";
            return 0;
        }
        $fhash{$format} = 1;
    }

    if (@_ == 0) {
        warning "mail command: needs email addresses to send to.";
        return 0;
    }

    # No check on email addresses, this is the MUA's job
    return 1;
}

# Return a MIME boundary marker suitable for multipart messages
#
sub get_mime_boundary {
    my $time= time();
    return sprintf "%X%X%X", $time, $$, int(rand($time));
}

# Return a list of MIME headers which completely describe a typed content
#
sub get_mime_headers {
    my $setname = shift;
    my $set = $config{set}{$setname};
    my @headers = ();
    my %mimetype = (text => 'text/plain', html => 'text/html');

    push @headers, "Content-Type: $mimetype{$set->{format}}";
    push @headers, "Content-Encoding: $set->{encoding}";
    push @headers, "Content-Tranfer-Encoding: 8bit";

    return @headers;
}

# Send plain or multipart e-mail to a list of addresses
#
sub command_mail_run {
    my @sets = split /\+/, shift;

    # MIME headers for mail body
    my $boundary = get_mime_boundary();
    my @mime_headers  = ("MIME-Version: 1.0");
    if (@sets > 1) {
        push @mime_headers, "Content-Type: multipart/alternative; boundary=$boundary";
    } else {
        push @mime_headers, get_mime_headers($sets[0]);
    }

    # Fetch mail option fields
    my $from    = $config{set}{mail}{'from'};
    my $replyto = $config{set}{mail}{'reply-to'};
    my $subject = $config{set}{mail}{'subject'};
    my $method  = $config{set}{mail}{'command'};

    # Set subject
    $subject = get_short_message($csets[0]) if $subject eq 'message';
    $subject =~ s/\"/\\\"/g;  # Protect against unsollicited double-quotes

    # Note : I'd really like to split the two following mail methods into two subs,
    #        but it's tied to a bunch of useful variables defined above.

    if ($method eq 'internal') {
        # Using the Net::SMTP support

        my $server = $config{set}{mail}{'smtp_server'};
        my $server_nick = defined $server ? $server : '<default>';

        # Figure out 'From' field if not explicitly given in config
        if (not defined $from) {
            my ($user, $nick);

            if (defined $ENV{CVS_USER}) {
                # pserver account, we don't have much info
                $user = 'pserver-user';
                $nick = $ENV{CVS_USER};
            } else {
                # Regular *nix account (rsh, ssh)
                $user = $ENV{USER};
                my $gcos = (getpwnam($user))[6];
                $nick = defined $gcos ? $gcos : $user;
            }
            # Make sure we have something looking like an address here
            $from = ($nick =~ /\@/) ? $nick : "$nick <$user\@". hostfqdn() .">";
        }

        # Set headers
        my @headers;
        push @headers, "From: $from";
        push(@headers ,"Reply-To: $replyto") if defined $replyto;
        push @headers, "To: ". join(', ', @_);
        push @headers, "Subject: $subject";
        push @headers, @mime_headers;

        # Start SMTP dialog
        debug "    using Net::SMTP($server_nick)";
        my $smtp;
        $smtp = Net::SMTP->new($server);
        if (not $smtp) {
            warning "mail command: internal: can't use '$server_nick' SMTP server.";
            return;
        }
        if (not $smtp->mail($from)) {
            warning "mail command: internal: '$from': invalid sender.";
            return;
        }
        my @recipients = $smtp->recipient(@_, {SkipBad=>1});
        if (@recipients == 0) {
            warning "mail command: internal: no valid recipient.";
            return;
        }

        # Send mail body
        $smtp->data();
        $smtp->datasend(join("\n", @headers). "\n\n");

        for my $set (@sets) {
            my $report = get_report($set);

            select $smtp;
            print "\n--$boundary\n". (join "\n", get_mime_headers($set)) ."\n\n" if @sets > 1;
            (warning("$report: $!"), next) if not print_file $report;
        }

        $smtp->dataend();
        $smtp->quit();
    } else {
        # Use an external mail command

        my @headers;
        push(@headers ,"From: $from") if defined $from;
        push(@headers ,"Reply-To: $replyto") if defined $replyto;
        push @headers, @mime_headers;

        # Using an external (mailx compatible) program
        my $mime_opt = join ' ', (map {"-a '$_'"} @headers);
        my $command = "$method $mime_opt -s \"$subject\" @_";
        debug "    command: $command";
        if (not open(MAIL, "|$command")) {
            warning "mail command: error running '$method'.";
            return;
        }

        # Compose mail body
        for my $set (@sets) {
            my $report = get_report($set);

            select MAIL;
            print "\n--$boundary\n". (join "\n", get_mime_headers($set)) ."\n\n" if @sets > 1;
            (warning("$report: $!"), next) if not print_file $report;
        }

        # All done
        close MAIL;
        select STDOUT;
    }
}



#
# Command dispatcher
#

# Check command syntax and make sure it can run.
#
# This check is run just before doing actual work (ie. fetching history), this way we
# can cancel the process early and report meaningful errors when it's time. The idea
# here is to only report errors as warnings. Then when all commands are parsed, if
# there's none valid, report an error.
#
sub command_check {
    my $comlist = $config{command};

    debug "command_check():";
    my @todo = ();
    for my $command (@{$comlist}) {
        my ($action, $argl) = split /[ \t]+/, $command, 2;
        $argl = '' if not defined $argl;
        my @args = split /[ \t]+/, $argl;
        my $valid = 0;

      SWITCH:
        {
            if ($action eq 'run')  { $valid = 1 if command_run_check  $argl; last SWITCH; }
            if ($action eq 'file') { $valid = 1 if command_file_check @args; last SWITCH; }
            if ($action eq 'mail') { $valid = 1 if command_mail_check @args; last SWITCH; }
            if ($action eq 'test') { $valid = 1; last SWITCH; }
            warning "$action: Unknown command.";
        }
        push(@todo, $command) if $valid;

        debug "  $action: $argl (". ($valid ? 'ok' : 'error') .")";
    }
    error("nothing to do.", 2) if @todo == 0;

    # Update command list, only keep validated commands
    $config{command} = \@todo;
}

# Execute commands
#
# This part is called after history has been retrieved, but no report has been generated
# yet. Each command can request a report via get_report() which will build and cache
# results accordingly.
#
sub command_run {
    my $comlist = $config{command};

    debug "command_run():";
    for my $command (@{$comlist}) {
        my ($action, $argl) = split /[ \t]+/, $command, 2;
        $argl = '' if not defined $argl;
        my @args = split /[ \t]+/, $argl;
        debug "  $action: $argl";

      SWITCH:
        {
            if ($action eq 'run')  { command_run_run  $argl; last SWITCH; }
            if ($action eq 'file') { command_file_run @args; last SWITCH; }
            if ($action eq 'mail') { command_mail_run @args; last SWITCH; }
        }
    }
}



#
# Configuration parsing
#

# Parse "key [= value ...]"
#
# Does $(var) substitution.
#
sub parse_assignation {
    my @tokens = split /=/, shift, 2;
    my $key = $tokens[0];
    my $val = @tokens > 1 ? $tokens[1] : '';
    $key =~ s/[ \t]*$//;  # Remove trailing blanks
    $val =~ s/[ \t]*$//;  # Idem
    $val =~ s/^[ \t]*//;  # Remove heading blanks
    $val = subst_vars $val;

    return ($key, $val);
}


sub parse_set_decl {
    my $conf = shift;

    # Check syntax
    if (@_ < 2) {
        warning "'@_': set name expected";
        return;
    }
    if (@_ > 2) {
        warning "'@_': { expected after $_[1]";
        return;
    }

    my $name = $_[1];
    my %empty = ();
    $conf->{set}{$name} = \%empty if not defined $conf->{set}{$name};
    $conf->{set_current} = $conf->{set}{$name};
    $conf->{state} = 'set_begin';
}

sub parse_set_begin {
    my $conf = shift;

    # Check syntax
    if ($_[0] ne '{') {
        warning "'@_': { expected";
        return 0;
    }
    $conf->{state} = 'set_stmt';
}

sub parse_set_stmt {
    my $conf = shift;
    my $line = shift;

    if ($line =~ /^\}/) {
        $conf->{set_current} = 0;
        $conf->{state} = '';
        return;
    }

    my ($key, $val) = parse_assignation $line;
    $conf->{set_current}{$key} = $val;
}

sub parse_my {
    my $conf = shift;
    my $line = shift;
    $line =~ s/my[ \t]*//;  # Remove the first 'my' token.

    my ($key, $val) = parse_assignation $line;
    $vars{$key} = $val;
}

sub parse_command {
    my $conf = shift;

    push @{$conf->{command}}, shift; # Store command string 'as is'
}

sub get_tokens {
    my $line = shift;

    return 0 if ($line =~ /^#/); # Ignore comments
    return split /[ \t]+/, $line;
}

# Parse configuration data.
#
# The general syntax is :
#
#   set config_set_name {
#     key = value ... [; key = value ...]
#   }
#   command [args ...] [; command ...]
#
sub parse_config {
    my $conf = shift;
    my $text = shift;

    # Perl is awful at character-oriented parsing, so we actually make sure we have
    # one statement per line and go the classical oriented-line parsing way.
    $text =~ s/{/\n{\n/g;
    $text =~ s/}/\n}\n/g;
    $text =~ s/([^\\])\;/$1\n/g;
    my @lines = split /\n/, $text;

    # Initialize parser state
    $conf->{state} = '';

    foreach my $line (@lines) {
        chomp $line;             # Remove trailing blanks
        $line =~ s/^ *//g;       # Remove heading blanks
        next if ($line =~ /^#/); # Ignore comments
        next if not my @tokens = split /[ \t]+/, $line;

        my $state = $conf->{state};
        if    ($state eq 'set_begin') { parse_set_begin $conf, @tokens }
        elsif ($state eq 'set_stmt')  { parse_set_stmt  $conf, $line   }
        elsif ($tokens[0] eq 'set')   { parse_set_decl  $conf, @tokens }
        elsif ($tokens[0] eq 'my')    { parse_my        $conf, $line }
        else                          { parse_command   $conf, $line }
    }
}

# Fetch configuration informations.
#
# There are several sources (they are all runtime parsed, from memory or files) :
#
#  * internal default values
#  * then we parse $globalconfig if $localconfig is not set and $globalconfig exists
#  * then we parse $configfiles if any
#  * then we parse command line args
#
sub get_config {
    # Setup an empty command list (easier to check if empty rather than undef later)
    my  @empty = ();
    $config{command} = \@empty;

    # Build a list of config sources to read.
    # If it starts with "#\n", it's an in memory script.
    my @files;
    push(@files, "#\n$defaults");
    push(@files, $globalconfig) if not $localconfig and -r $globalconfig;
    push(@files, @configfiles);
    for my $com (@execute) {
        push(@files, "#\n$com");
    }

    # Compute substition hash now, it will be needed in config parsing
    init_vars();

    # Parse config sources
    for my $file (@files) {
        my $text = '';

        if ($file =~ /^\#\n/) {
            $text = $file;  # In memory script
        } else {
            my $buffer;

            open(CFG, "<$file") or error "$file: $!", 2;
            $text .= $buffer while (read(CFG, $buffer, 64*1024));
            close CFG;
        }
        parse_config \%config, $text;
    }

    if ($do_debug) {
        debug "get_config():";

        for my $def (sort keys %{$config{set}}) {
            debug "  set: '$def'";
            for my $key (sort keys %{$config{set}{$def}}) {
                debug "    '$key' = '$config{set}{$def}{$key}'";
            }
        }
        debug "  command:" if @{$config{command}} > 0;
        for my $com (@{$config{command}}) {
            debug "    '$com'";
        }
    }
}



#
# Command line parsing
#

# Print short help upon bad command line usage
# Suggests to check full help, exits with error.
#
sub print_short_help {
    print STDERR "\nUsage: $package_name [options]\nTry '$package_name --help' for more information.\n";
    myexit 2;
}

# Print full help, exit
#
sub print_help {
    print STDERR <<EOF;
Usage: $package_name [options]

Options:
  -a, --all          Report commits from all users (default: current user)
  -c, --config       Read config file (may appear more than once)
  -d, --cvsroot      Specify repository path, overrides \$CVSROOT
  -D, --debug        Sends verbose info to stderr ($debug_file if no tty)
  -e, --execute      Execute a command (see below) (may appear more than once)
  -f, --from         Fetch csets from this date (CVS time format) (default: now)
  -h, --help         Display this information
  -l, --local        Local config only (skip $globalconfig)
  -n, --number       Fetch (at more) <n> csets (default: no limit)
  -v, --version      Display version number and copyright info

Command:
  file <set> [>>]file ...      Write/append report to a list of files
  mail <set1[+set2]> addr ...  Send simple/multipart email to list of addresses
  run args ...                 Run a user command (if report is not empty)
  test                         Fetch history but do not generate a report
EOF
    myexit 0;
}

# Print version and copyright info, exit
#
sub print_version {
    print "$package_name $package_version\nCopyright (C) 2003 Vincent Caron <v.caron\@zerodeux.net>\n";
    myexit 0;
}

# CVS filters have two tedious problems :
#
#  * you can't prevent them from having the path and affected files as filter argument
#    (this is why cvsreport ignores regular arguments and only relies on options)
#  * it does not support quoting
#
# This function emulates simple ' or " quoting
#
sub fix_cvs_args {
    my @args = ();
    my $state = '';
    my $val = '';

    for my $arg (@ARGV) {
        if ($state eq '') {
            if ($arg =~ /^[\'|\"]/) {
                $state = substr $arg, 0, 1;
                $val = substr $arg, 1;
                next;
            }
            push @args, $arg;
        } else {
            if ($arg =~ /$state$/) {
                push @args, $val .' '. substr($arg, 0, -1);
                $state = '';
                $val = '';
                next;
            }
            $val .= ' '. $arg;
        }
    }

    @ARGV = @args;
}

# Parse command line options with getopt, nicely straightforward
#
sub parse_opt {
    my $help = 0;
    my $version = 0;

    fix_cvs_args();

    GetOptions('all|a'       => \$all_users,
               'config|c=s'  => \@configfiles,
               'cvsroot|d=s' => \$cvsroot,
               'debug|D'     => \$do_debug,
               'execute|e=s' => \@execute,
               'from|f=s'    => \$cset_from,
               'help|h'      => \$help,
               'local|l'     => \$localconfig,
               'number|n=i'  => \$cset_max,
               'version|v'   => \$version
              ) or print_short_help();

    print_help() if $help;
    print_version() if $version;

    debug "parse_args():";
    debug "  cvsroot   : '$cvsroot'";
    debug "  cset_from : '$cset_from'";
    debug "  cset_max  : '$cset_max'";
    debug "  all_users : $all_users";
    debug "  local     : $localconfig";
    debug "  config    : @configfiles (". (scalar @configfiles) .")";
    debug "  do_debug  : $do_debug";
    debug "  execute   : ". join(', ', @execute);
    debug "  args      : @ARGV (". (scalar @ARGV) .")";

    # Save parsed args as a string for later hashing.
    $ARGS = "a:$all_users c:@configfiles d:$cvsroot D:$do_debug e:@execute f:$cset_from l:$localconfig n:$cset_max";
}



#
# Main program
#

# Get startup time stamp, the sooner the better (before CVS actually stamps any
# commit action). This will be used in 'filter mode', otherwise $cset_from is used.
my $now = get_timestamp();

# We set $cvsroot just like CVS : command line, then CVS/Root file, then $ENV{'CVSROOT'}.
# So we do the parsing in reverse, the last succesfull item taking precedence over previous :
# 1. $ENV{'CVSROOT'}
$cvsroot = $ENV{'CVSROOT'} if defined $ENV{'CVSROOT'};

# 2. parse CVS/root
if (-e 'CVS/Root' and open ROOT, '<CVS/Root') {
    $cvsroot = <ROOT>;
    chomp $cvsroot;
    close ROOT;
}

# 3. parse options, then do some sanity checking
parse_opt();

if ($cvsroot eq '') {
    error "CVSROOT unspecified.\nPlease either define the CVSROOT environment variable or use the -d option.", 2;
}

if ($all_users and $cset_from eq '') {
    warning "it is a bad idea to use -a without -f.";
}

# Figure out what to do and how to do it.
get_config();

# If no command, send default text report to stdout
if (@{$config{command}} == 0) {
    push(@{$config{command}}, 'file text -');
}

# Early command check, don't run parse_history() only to report failure
command_check();

# If we don't have the cset origin, it means we are called by CVS from a commit filter,
# we have to figure out the timestamp ourselves and wait for CVS doing the full commit.
if ($cset_from eq '') {
    # First, we know we will extract a single cset.
    $cset_max = 1;

    # And we use the $single_cset hack to spare us a bunch of get_file_info() lookups
    # in parse_history().
    $single_cset = 1;

    # $cset_from is obviously the startup time of this script (we should be called from
    # a pre-commit check filter like CVSROOT/commitinfo to be sure this timestamp comes
    # _before_ (or _at_) the date of the first commit action in history.
    $cset_from = $now;

    if (not is_local($cvsroot)) {
        error "no automatic report on remote repositories (try the -f option).", 2;
    }

    wait_commit_completion();
}

parse_history();

command_run();

myexit 0;

# That's all folks
