#!/usr/public/bin/perl
# ---------------------------------------------------------------------------
$Version = 'wwwstat-1.0';
#
# Copyright (c) 1994 Regents of the University of California.
# All rights reserved.
#
# This software has been developed by Roy Fielding <fielding@ics.uci.edu> as
# part of the Arcadia project at the University of California, Irvine.
# Wwwstat was originally based on a multi-server statistics program called
# fwgstat-0.035 by Jonathan Magid (jem@sunsite.unc.edu) which, in turn,
# was heavily based on xferstats (packaged with the version 17 of the
# Wuarchive FTP daemon) by Chris Myers (chris@wugate.wustl.edu).
# As such, this software and all derivations must remain in the public domain.
# See below for further licensing information.
#
sub usage {
    die <<"EndUsage";
usage: wwwstat [-helLoOuUrvxz] [-f logfile] [-s srmfile] [-i pathname]
                       [-a IP_address] [-n archive_name] [-d date] [-t time]
$Version
Process an NCSA httpd 1.x access_log file and output an HTML summary.
Display Options:
     -h  Help -- just display this message and quit.
     -e  Display all invalid log entries on STDERR. (default is to ignore them)
     -l  Do    display full IP address of clients in my domain.
     -L  Don't display full IP address of clients in my domain.       (default)
     -o  Do    display full IP address of clients from other domains.
     -O  Don't display full IP address of clients from other domains. (default)
     -u  Do    display IP address from unresolved domain names.
     -U  Don't display IP address from unresolved domain names.       (default)
     -r  If IdentityCheck, display table of requests by each remote ident.
     -v  Verbose display (to STDERR) of each log entry processed.
     -x  Display all requests of nonexistant files to STDERR.
Input Options:
     -f  Read from the following access_log file instead of the default.
     -z  Use zcat to uncompress the log file while reading [requires -f].
     -s  Get the server directives from the following srm.conf file.
     -i  Include the following file (assumed to be a prior wwwstat output).
Search Options (include in summary only those log entries):
     -a  Containing the following "substring" in the IP address.
     -d  Containing the following "substring" in the date.
     -t  Containing the following "substring" in the time.
     -n  Containing the following "substring" in the archive (URL) name.
EndUsage
}
#
#    The intention is that this be run by a wrapper program as a crontab
#    entry just before midnight, with its output redirected to a temporary
#    file which can then be moved to the site's summary file.  The temporary
#    file is necessary because the old summary file will be fstat'd for its
#    size during wwwstat's processing (resulting in 0 if output is redirected
#    to the summary file during processing).
#
#    One of the nicest things about wwwstat is that it does not make any
#    changes to or write any files in the server directories.  Thus, this
#    program can be safely run by any user with read access to the httpd
#    server's access_log and srm.conf files.  This allows people to do
#    specialized summaries of just the things they are interested in.
#
#    Note that the Search Options can specify that the substring must be
#    a prefix by using the caret  (e.g. "^substring") and/or must be
#    a suffix by using the dollar (e.g. 'substring$').  However, they may
#    need to be escaped on the command line to avoid shell interpretation.
#    Searches on date and time can include full Perl regular expressions,
#    including ranges such as -d " [1-7] " or -d " ( [8-9]|1[0-4]) " to
#    indicate the first or second week, respectively.
#
#    This program could easily be modified to run as a CGI script, but that
#    is not recommended for slow processors or heavily utilized servers
#    unless some effort is made to keep the active log file very small.
#
#    See the file Changes for known problems and version information.
#
# If you have any suggestions, bug reports, fixes, or enhancements,
# send them to the author Roy Fielding at <fielding@ics.uci.edu>.
#
# Redistribution and use in source and binary forms are permitted,
# subject to the restriction noted below, provided that the above
# copyright notice and this paragraph and the following paragraphs are
# duplicated in all such forms and that any documentation, advertising
# materials, and other materials related to such distribution and use
# acknowledge that the software was developed in part by the University of
# California, Irvine.  The name of the University may not be used to
# endorse or promote products derived from this software without
# specific prior written permission.  THIS SOFTWARE IS PROVIDED ``AS
# IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
# LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE.
#   
# Use of this software in any way or in any form, source or binary,
# is not allowed in any country which prohibits disclaimers of any
# implied warranties of merchantability or fitness for a particular
# purpose or any disclaimers of a similar nature.
#   
# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
# ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION
# (INCLUDING, BUT NOT LIMITED TO, LOST PROFITS) EVEN IF THE UNIVERSITY
# OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# ---------------------------------------------------------------------------
# Set the default configuration options:

# Edit the next line to specify the output document's HTML Title.

$OutputTitle = "World-Wide Web Access Statistics for www.ics.uci.edu";

# Edit the next line to specify the URL of the previous summary period
# (for use only as a hypertext link).  Set it = "" if link is unwanted.
# The three-letter abbrev for last month is substituted at any "%M"
# Note that this is the month prior to the earliest one in this summary.

$LastSummary = "http://www.ics.uci.edu/Admin/%M.wwwstats.html";

# Edit the next line to identify the server's default home page.

$ServerHome = "/ICShome.html";

# Edit the next line to locate the country-codes file.
# This is a file of the format:
# domain text
# which will allow expansion from domain to country name.

$countrycodefile = "/dc/ud/www/$Version/country-codes";

# Edit the next two lines to specify the location of your server access log
# and your server configuration (srm.conf) file.

$access_log = '/dc/ud/www/httpd_1.1/logs/access_log';
$srm_conf   = '/dc/ud/www/httpd_1.1/conf/srm.conf';

# Edit the next line to specify the command for displaying compressed files

$zcat = 'gunzip -c';       # specify as null string if none are available

# Estimate the size of a redirect message minus the two location URLs

$RedirEstimate     = 255;  # in bytes (either 255 or 259 depending on date fmt)
$DirectoryRedirect = 1;    # Does server do automatic redirect for slashless
                           # index reqs? (1 for httpd_1.1,  0 for httpd_1.0)

# Is the server running with rfc931 support (IdentityCheck on)?

$IdentityCheck = 0;        # Must = 1 if server uses rfc931 remote ident.

# NOTE: For security reasons, you should not publish to the web any report
# that lists the Remote Identifiers.  This option is intended for server
# maintenance only.  Use the -r option on the command-line instead.

$Do_Ident  = 0;            # Set = 2 if IdentityCheck and Ident always desired.

# If address in log entry is one word (a local host), append what?

$AppendToLocalhost = "";   # Either "" or appropriate ".sub.dom.ain"

# Edit the next two lines to customize for your domain.
# This will allow your domain to be separated in the domain listing.
# Note that you may also want to change the domain selection logic
# (where these variables are used) if you are at a site without
# multi-level subdomains.

$mydom1 = "uci";
$mydom2 = "edu";

# Edit the next three lines to specify whether (1) or not (0) you want to
# display the IP address on reversed subdomain listings as follows:

$LocalFullAddress  = 0;    # Show full address for hosts in my domain?
$OthersFullAddress = 0;    # Show full address for hosts outside my domain?
$ShowUnresolved    = 0;    # Show all unresolved addresses?

$Verbose           = 0;    # Display   valid log entries on STDERR?
$PrintInvalids     = 0;    # Display invalid log entries on STDERR?
$PrintNonexist     = 0;    # Display nonexistant file requests on STDERR?
$CompressedLog     = 0;    # Access log has been compressed (or gzipped)?
$IncludeFile       = "";   # Prior output file to include first.
$SearchAddress     = "";   # Substring to look for in IP addresses.
$SearchDate        = "";   # Substring to look for in Date stamps.
$SearchTime        = "";   # Substring to look for in Time stamps.
$SearchArchive     = "";   # Substring to look for in Archive names.

# ==========================================================================
# Get the command-line options

require "getopts.pl";
&Getopts('helLoOuUrvxzf:s:i:a:d:t:n:');
if ($@ || $opt_h) { &usage; }

if ($opt_e) { $PrintInvalids     = 1; }
if ($opt_l) { $LocalFullAddress  = 1; }
if ($opt_L) { $LocalFullAddress  = 0; }
if ($opt_o) { $OthersFullAddress = 1; }
if ($opt_O) { $OthersFullAddress = 0; }
if ($opt_u) { $ShowUnresolved    = 1; }
if ($opt_U) { $ShowUnresolved    = 0; }
if ($opt_r) { $Do_Ident          = 2; }
if ($opt_v) { $Verbose           = 1; }
if ($opt_x) { $PrintNonexist     = 1; }
if ($opt_z) {
   if ($opt_f) { $CompressedLog  = 1; }       # Require logfile name if
   else        { &usage; }                    # uncompression is desired
}
if ($opt_f) { $access_log    = $opt_f; }
if ($opt_s) { $srm_conf      = $opt_s; }
if ($opt_i) { $IncludeFile   = $opt_i; }
if ($opt_a) {
    $SearchAddress = $opt_a;
    $SearchAddress =~ s/(\W)/\\\1/g;          # Needed for later pattern match
    $SearchAddress =~ s/^\\\^/\^/;            # but resore "starts with"
    $SearchAddress =~ s/\\\$$/\$/;            #        and "ends   with"
}
if ($opt_d) {
    $SearchDate    = $opt_d;
}
if ($opt_t) {
    $SearchTime    = $opt_t;
}
if ($opt_n) {
    $SearchArchive = $opt_n;
    $SearchArchive =~ s/(\W)/\\\1/g;          # Needed for later pattern match
    $SearchArchive =~ s/^\\\^/\^/;            # but resore "starts with"
    $SearchArchive =~ s/\\\$$/\$/;            #        and "ends   with"
}
if ($CompressedLog && !$zcat) {
    die "No zcat decompression command has been defined, stopped";
}
if (!$IdentityCheck) { $Do_Ident = 0; }       # No users allowed if no ident

# ==========================================================================
# Get the other needed configuration items from the srm.conf file

open (SRM,$srm_conf) || die "Error opening config file: $srm_conf\n";

$UserDir        = "public_html";              # Start with NCSA defaults
$DirectoryIndex = "index.html";
$DocumentRoot   = "/usr/local/etc/httpd/htdocs";

while (<SRM>)
{
    next if ( ($_ eq "\n") || /^\#/ ); # Ignore blank and comment lines

    if (/^DocumentRoot (.+)\s/)
    {
        $DocumentRoot = $1;
    }
    elsif (/^UserDir (.+)\s/)
    {
        $UserDir = $1;
    }
    elsif (/^DirectoryIndex (.+)\s/)
    {
        $DirectoryIndex = $1;
    }
    elsif (/^Redirect\s+(\S+)\s+(\S+)\s/)
    {
        $alias = $1;
        $rname = RedirEstimate + (2 * length($2));  # Estimate size of message
        $alias =~ s/(\W)/\\\1/g;          # Needed for later pattern match
        $AllRedirects{$alias} = $rname;
    }
    elsif (/^Alias\s+(\S+)\s+(\S+)\s/)
    {
        $alias = $1;
        $rname = $2;
        $alias =~ s/(\W)/\\\1/g;          # Needed for later pattern match
        $AllAliases{$alias} = $rname;
    }
    elsif ( /^ScriptAlias\s+(\S+)\s+(\S+)\s/ ||
            /^OldScriptAlias\s+(\S+)\s+(\S+)\s/ )
    {
        $alias = $1;
        $rname = $2;
        $alias =~ s/(\W)/\\\1/g;          # Needed for later pattern match
        $AllScripts{$alias} = $rname;
    }
}
close SRM;

# ==========================================================================
# If an old output file is to be included, read it into the counters
# We assume that the old output file was created with the same options
# and that its content is disjunct from the current access_log.
# NOTE that using search options with inclusion cannot work unless the
# included file was also created with those search options.

$startTag = "\<PRE\>\n";       # Parse by finding the preformatted parts.
$endTag   = "\</PRE\>\n";      # Note that these vars are used by output code.

if ($IncludeFile)
{
    open (OLD,$IncludeFile) ||
         die "Error opening file for inclusion: $IncludeFile\n";

    # This code depends on the order of the output being similar
    # to the default distribution.  If you change the output content,
    # you must also check to ensure this code still works.

    $sequence = 0;

    OLDLINE: while (<OLD>)
    {
        if (!($sequence % 2))          # Are we outside a preformatted section?
        {
            if ($_ eq $startTag)       # Yes - Start of preformat?
            {
                $sequence++;
                if ($sequence != 1)    #       Unless this is first section,
                {
                    <OLD>;             #       Read past the two header lines
                    <OLD>;
                }
            }
            next OLDLINE;
        }
        elsif ($_ eq $endTag)          # No  - Is this the end of preformat?
        {
            $sequence++;
            if ($sequence == (12 + $Do_Ident)) #       Are we done yet?
            {
                last OLDLINE;
            }
            next OLDLINE;
        }

        if ($sequence == 1)            # Now at "Files Transmitted ..."
        {
            /\s(\d+)\n/;               #     get last numeric word
            $xferfiles = $1;
            $_ = <OLD>;                #     next at "Bytes Transmitted ..."
            /\s(\d+)\n/;               #     get last numeric word
            $xferbytes = $1;
            <OLD>; <OLD>;              #     skip the two averages
        }
        elsif ($sequence == 3)         # Now in Daily Transmission Statistics
        {
            /\s(\d+)\s+(\d+)\s+\|\s+(\S.*)\n/; # Parse out:
            $daytime            = $3;          # the date after "|"
            $dayfiles{$daytime} = $2;          # the first  number left of "|"
            $daybytes{$daytime} = $1;          # the second number left of "|"
        }
        elsif ($sequence == 5)         # Now in Hourly Transmission Statistics
        {
            /\s(\d+)\s+(\d+)\s+\|\s+(\S+)\s/;  # Parse out:
            $hour             = $3;            # the hour after "|"
            $hourfiles{$hour} = $2;            # the first  number left of "|"
            $hourbytes{$hour} = $1;            # the second number left of "|"
        }
        elsif ($sequence == 7)         # Now in Client Domain
        {
            /\s(\d+)\s+(\d+)\s+\|\s+(\S+)\s/;  # Parse out:
            $domain               = $3;        # the domain abbrev after "|"
            $domainfiles{$domain} = $2;        # the first  number left of "|"
            $domainbytes{$domain} = $1;        # the second number left of "|"
        }
        elsif ($sequence == 9)         # Now in Reversed Subdomain
        {
            /\s(\d+)\s+(\d+)\s+\|\s+(\S.*)\n/; # Parse out:
            $subdomain                  = $3;  # the subdomain after "|"
            $subdomainfiles{$subdomain} = $2;  # the first  number left of "|"
            $subdomainbytes{$subdomain} = $1;  # the second number left of "|"
        }
        elsif ($sequence == 11)        # Now in Archive Section
        {
            /\s(\d+)\s+(\d+)\s+\|\s+(\S.*)\n/; # Parse out:
            $pathkey              = $3;        # the pathkey after "|"
            $groupfiles{$pathkey} = $2;        # the first  number left of "|"
            $groupbytes{$pathkey} = $1;        # the second number left of "|"
        }
        elsif (($sequence == 13) && $Do_Ident )  # Now in Ident Section
        {
            /\s(\d+)\s+(\d+)\s+\|\s+(\S.*)\n/; # Parse out:
            $ident                = $3;        # the pathkey after "|"
            $identfiles{$ident}   = $2;        # the first  number left of "|"
            $identbytes{$ident}   = $1;        # the second number left of "|"
        }
        else                           # Now in Hell (too many preformats)
        {
            print (STDERR "Warning: Something is wrong with $IncludeFile");
            last OLDLINE;
        }
    }
    close OLD;
}

# ==========================================================================
# Now read and accumulate statistics for each access logged
# 

$thistime   = time;                     # Get the current date-time stamp
$Updated    = &wtime($thistime,'');     # Format it as local time
$UpdatedGMT = &wtime($thistime,'GMT');  #  and also as  GMT  time

if ($CompressedLog) { $access_log = "$zcat $access_log |"; }
if ($Verbose) {
    print(STDERR "$Version: $Updated\n");
    print(STDERR "Using access log \"$access_log\"\n");
}

open (LOG,$access_log) || die "Error opening access log file: $access_log\n";

LINE: while (<LOG>)
{
    $saveline = $_;

    if ($IdentityCheck)           # Does log include IdentityCheck info?
    {
        /^(.*@\S+)\s/;
        if ($_)
        {
            $ident = $1;                   # Save ident for later use
            $ident =~ s/\[.*\]/COOKIE/g;   # Replace all magic cookies
            $saveline =~ s/^.*@//;         # Remove the remote ident from log
        }
        else { $ident = ""; }
        $_ = $saveline;
    }

    @line = split;

    #
    # First, we have to figure out what file or script was accessed
    #

    $fname = $line[7];

    $fname =~ s/\?.*$//;          # Remove any trailing query information
    $fname =~ s/\#.*$//;          # Remove any trailing anchor information
    $fname =~ s#//#/#g;           # Remove any extra slashes

    if (($fname eq "") || ($fname eq "HTTP/1.0"))
    {
        print(STDERR "$.:$saveline") if $PrintInvalids;
        next LINE;                # Ignore garbage
    }

    FNAME:                        # Get the document's real name
    {
        $rname = "";                         # and start with it unknown
        $fsize = 0;
        study $fname;

        if (($fname eq "/") || ($fname eq "/$DirectoryIndex"))
        {
            $fname = "$ServerHome";          # Handle top file with extra care
            $rname = "$DocumentRoot$fname";
            last FNAME;
        }

        foreach $redir (keys(AllRedirects))  # Is it a redirected file?
        {
            if ( $fname =~ /^$redir/ )
            {
                $fname = "Redirected Document Requests";
                $fsize = $AllRedirects{$redir};
                last FNAME;
            }
        }

        foreach $alias (keys(AllAliases))    # Is it a file name alias?
        {
            if ( $fname =~ /^$alias/ )
            {
                $rname = $fname;
                $rname =~ s#^$alias#$AllAliases{$alias}#;
                last FNAME;
            }
        }

        if ($fname =~ /^\/~(\w+)\// )        # Is it a /~username/...?
        {
            ($name,$passwd,$uid,$gid,$quota,$comment,$gcos,$dir,$shell)
                = getpwnam($1);
            if ($dir)
            {
                $rname = $fname;
                $rname =~ s#^/~$1#$dir/$UserDir#;
            }
            else
            {
                $fname = "All Bad Usernames";
            }
            last FNAME;
        }

        if ($fname =~ /^\/~(\w+)$/ )         # Is it a /~username ?
        {
            ($name,$passwd,$uid,$gid,$quota,$comment,$gcos,$dir,$shell)
                = getpwnam($1);
            if ($dir)
            {
                $rname = $fname;
                $rname =~ s#^/~$1#$dir/$UserDir#;
                if (-e "$rname/$DirectoryIndex")
                {
                    if ($DirectoryRedirect)
                    {
                        $fsize = ($RedirEstimate + 60) + (2 * length($fname));
                        $fname = "Redirected Directory Requests";
                        $rname = "";
                        last FNAME;
                    }
                    $rname .= "/$DirectoryIndex";
                }
                $fname .= "/";               # Append a trailing slash
            }
            else
            {
                $fname = "All Bad Usernames";
            }
            last FNAME;
        }

        foreach $alias ( keys(AllScripts) )  # Is it a script directory alias?
        {
            if ( $fname =~ /^$alias/ )
            {
                $fname = "All Scripts";      # Don't report script names
                last FNAME;                  # We can't know the transmit bytes
            }
        }

        if (-d "$DocumentRoot$fname")        # Is it a directory?
        {
            $hasSlash = ($fname =~ s/\/$//); # Remove any trailing slash
            if (-e "$DocumentRoot$fname/$DirectoryIndex")
            {
                if (!$hasSlash && $DirectoryRedirect)
                {
                    $fsize = ($RedirEstimate + 60) + (2 * length($fname));
                    $fname = "Redirected Directory Requests";
                    last FNAME;
                }
                $rname = "$DocumentRoot$fname/$DirectoryIndex";
            }
            else
            {
                $rname = "$DocumentRoot$fname";
            }
            $fname .= "/";                   # Append a trailing slash
            last FNAME;
        }

        $rname = "$DocumentRoot$fname";      # It must be a normal file

    } # end FNAME

    $xname = "";

    if (!$fsize && $rname)                   # Get the file size
    {                                        # through use of a cache of Sizes
        ($fsize = $Sizes{$fname}) ||
        ($fsize = $Sizes{$fname} = (-s $rname)) ||
        (($xname = $fname) && ($fname = "Nonexistant Files"));
    }

    $fname =~ s#/$DirectoryIndex$#/#;        # Remove any trailing index name

    if ($SearchArchive) { next LINE unless ( $fname =~ /$SearchArchive/ ); }

    #
    # If you want to further restrict Archive Section names, do it here.
    # For example, if you wanted to show all GIFs as a single total,
    # then you would do:
    #
    #     if ($fname =~ /\.gif$/) { $pathkey = "All GIFs"; }
    #     else                    { $pathkey = $fname;     }
    #

    if    ($SearchArchive)              { $pathkey = $fname; }
    elsif ($fname =~ /^\/Icons\/\w/)    { $pathkey = "All Icons (site)"; }
    elsif ($fname =~ /^\/icons\/\w/)    { $pathkey = "All Icons (server)"; }
    elsif ($fname =~ /^\/pictures\/\w/) { $pathkey = "All Pictures"; }
    else                                { $pathkey = $fname; }

    if ($pathkey eq '')
    {
        print(STDERR "$.:$saveline") if $PrintInvalids;
        next LINE;                # Ignore garbage
    }

    #
    # Phew!  Now we have to figure out when and by whom it was accessed.
    #

    $_ = $saveline;

    /\[(.*)\]/;

    if ($1 eq "")
    {
        print(STDERR "$.:$saveline") if $PrintInvalids;
        next LINE;                # Ignore garbage
    }

    $daytime = substr($1, 4, 6) . substr($1, 19, 5);
    $hour    = substr($1,11, 2); 

    if ($SearchDate) { next LINE unless ( $daytime =~ /$SearchDate/ ); }
    if ($SearchTime) { next LINE unless ( $hour    =~ /$SearchTime/ ); }

    if ($Verbose)
    {
        print(STDERR "$daytime $hour $fsize $fname\n");
    }

    #
    # Get IP address and determine domain and reversed subdomain.
    #

    $afield  =  $line[0];
    $afield  =~ tr/A-Z/a-z/;

    if ($SearchAddress) { next LINE unless ( $afield =~ /$SearchAddress/ ); }

    @address =  split(/\./, $afield);

    if ( $#address < 1 )               # Usually caused by garbage in log
    {                                  # or perhaps a strange IP setup
        if ($AppendToLocalhost)        # or perhaps perfectly normal
        {
            $domain  = "$mydom1\.$mydom2";
            $ident  .= $AppendToLocalhost;
            $afield .= $AppendToLocalhost;
            @address =  split(/\./, $afield);
            if ((!$LocalFullAddress)&&($#address > 1))
            {
                shift(@address);
            }
            $subdomain = join('.', reverse(@address));
        }
        else
        {
            print(STDERR "$.:$saveline") if $PrintInvalids;
            $domain    = "localhost";
            $subdomain = $afield;
        }
    } 
    elsif ( $address[$#address] =~ /^[0-9]+$/ )
    {
        $domain    = "unresolved";
        if ($ShowUnresolved)
        {
            $subdomain = join('.', reverse(@address));
        }
        else
        {
            $subdomain = "Unresolved";
        }
    }
    elsif ($address[$#address-1] eq "$mydom1" &&
           $address[$#address]   eq "$mydom2"    )
    {
        $domain = "$mydom1\.$mydom2";
        if ((!$LocalFullAddress)&&($#address > 1))
        {                         # If the address has at least 3 components
            shift(@address);      #    clip off the machine name
        }
        $subdomain = join('.', reverse(@address));
    }
    else
    {
        $domain = $address[$#address];
        if ((!$OthersFullAddress)&&($#address > 1))
        {                         # If the address has at least 3 components
            shift(@address);      #    clip off the machine name
        }
        $subdomain = join('.', reverse(@address));
    }

    if ($PrintNonexist && $xname)
    {
        print(STDERR "$daytime $hour $xname  BY $afield\n");
    }

    #
    # Now that we have categorized it, add it to the corresponding counters
    #

    $xferfiles++;                             # total files sent
    $dayfiles{$daytime}++;                    #             per day
    $groupfiles{$pathkey}++;                  #             per file
    $domainfiles{$domain}++;                  #             per domain
    $subdomainfiles{$subdomain}++;            #             per subdomain

    $xferbytes                  += $fsize;    # total bytes sent     
    $daybytes{$daytime}         += $fsize;    #       bytes per day
    $groupbytes{$pathkey}       += $fsize;    #             per file
    $domainbytes{$domain}       += $fsize;    #             per domain
    $subdomainbytes{$subdomain} += $fsize;    #             per subdomain

    $hourfiles{$hour}++;                      # total files per hour
    $hourbytes{$hour}           += $fsize;    #       bytes per hour

    if ($Do_Ident)                            # Is Ident Table desired?
    {
        $identfiles{$ident}++;                #       files per ident
        $identbytes{$ident}     += $fsize;    #       bytes per ident
    }
}
close LOG;


@dates = sort datecompare keys(daybytes);

if ($xferfiles == 0) {die "There was no matching data to summarize.\n";}

# ==========================================================================
# Finally, we can print out the resulting statistics as a series of forms.
#
# Note: if you have a heavily used server, you may need to increase
#       the length of the numeric output fields in the forms below.

$DailyHeader     = "Daily Transmission Statistics";
$HourlyHeader    = "Hourly Transmission Statistics";
$DomainHeader    = "Total Transfers by Client Domain";
$SubdomainHeader = "Total Transfers by Reversed Subdomain";
$ArchiveHeader   = "Total Transfers from each Archive Section";
$IdentHeader     = "Total Transfers to each Remote Identifier";

print "\<HTML\>\<HEAD\>\n";
print "\<TITLE\>$OutputTitle\</TITLE\>\n";
print "\</HEAD\>\<BODY\>\n";
print "\<H1\>$OutputTitle\</H1\>\n";

print "\<EM\>Last updated: $Updated ($UpdatedGMT)\</EM\>\n";

print "\<UL\>\n";
print "\<LI\>\<A HREF=\"\#Daily\"\>$DailyHeader\</A\>\n";
print "\<LI\>\<A HREF=\"\#Hourly\"\>$HourlyHeader\</A\>\n";
print "\<LI\>\<A HREF=\"\#Domain\"\>$DomainHeader\</A\>\n";
print "\<LI\>\<A HREF=\"\#Subdomain\"\>$SubdomainHeader\</A\>\n";
print "\<LI\>\<A HREF=\"\#Archive\"\>$ArchiveHeader\</A\>\n";
if ($Do_Ident)
{
    print "\<LI\>\<A HREF=\"\#Ident\"\>$IdentHeader\</A\>\n";
}
if ($LastSummary)
{
  $prevmonth = &lastmonth($dates[0]);
  $LastSummary =~ s/%M/$prevmonth/g;
  print "\<LI\>\<A HREF=\"$LastSummary\"\>Previous Full Summary Period\</A\>\n";
}
print "\</UL\>\n";

print "\<H2\>Totals for Summary Period:  ","$dates[0]"," to ",$dates[$#dates],
      "\</H2\>\n";
print $startTag;
printf ("Files Transmitted During Summary Period  %14.0f\n", $xferfiles);
printf ("Bytes Transmitted During Summary Period  %14.0f\n", $xferbytes); 

printf ("Average Files Transmitted Daily          %14.0f\n",
        $xferfiles / ($#dates + 1));
printf ("Average Bytes Transmitted Daily          %14.0f\n",
        $xferbytes / ($#dates + 1));
print $endTag;

if ( $xferfiles < 1 ) { $xferfiles = 1; }
if ( $xferbytes < 1 ) { $xferbytes = 1; }

# ==========================================================================

print "\<HR\>\n";
print "\<H2\>\<A NAME=\"Daily\"\>$DailyHeader\</A\>\</H2\>\n";
print $startTag;
print 
"%Reqs %Byte  Bytes Sent  Requests   Date\n";
print 
"----- ----- ------------ -------- |------------\n";

foreach $date ( sort datecompare keys(daybytes) )
{
    $files = $dayfiles{$date};
    $bytes = $daybytes{$date};
    if ($files == $xferfiles) {
        $pctfiles = "100.0";
    } else {
        $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles);
    }
    if ($bytes == $xferbytes) {
        $pctbytes = "100.0";
    } else {
        $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes);
    }
    printf ("%s %s %12d %8d | %s\n",$pctfiles,$pctbytes,$bytes,$files,$date);
}
print $endTag;

# ==========================================================================

print "\<HR\>\n";
print "\<H2\>\<A NAME=\"Hourly\"\>$HourlyHeader\</A\>\</H2\>\n";
print $startTag;
print 
"%Reqs %Byte  Bytes Sent  Requests   Time\n";
print 
"----- ----- ------------ -------- |-----\n";

foreach $hour ( sort keys(hourbytes) )
{
    $files = $hourfiles{$hour};
    $bytes = $hourbytes{$hour};
    if ($files == $xferfiles) {
        $pctfiles = "100.0";
    } else {
        $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles);
    }
    if ($bytes == $xferbytes) {
        $pctbytes = "100.0";
    } else {
        $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes);
    }
    printf ("%s %s %12d %8d |  %s\n",$pctfiles,$pctbytes,$bytes,$files,$hour);
}
print $endTag;

# ==========================================================================

print "\<HR\>\n";
print "\<H2\>\<A NAME=\"Domain\"\>$DomainHeader\</A\>\</H2\>\n";
print $startTag;
print 
"%Reqs %Byte  Bytes Sent  Requests   Domain\n";
print 
"----- ----- ------------ -------- |------------------------------------\n";

%codetable=&initcountryname();
foreach $domain ( sort domnamcompare keys(domainfiles) )
{
    $country = $domain;
    $country = &countryname($domain,%codetable);
    $files   = $domainfiles{$domain};
    $bytes   = $domainbytes{$domain};
    if ($files == $xferfiles) {
        $pctfiles = "100.0";
    } else {
        $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles);
    }
    if ($bytes == $xferbytes) {
        $pctbytes = "100.0";
    } else {
        $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes);
    }
    printf ("%s %s %12d %8d | %-5s %s\n", $pctfiles,$pctbytes,$bytes,$files,
                                          $domain, $country);
}
print $endTag;

# ==========================================================================

print "\<HR\>\n";
print "\<H2\>\<A NAME=\"Subdomain\"\>$SubdomainHeader\</A\>\</H2\>\n";
print $startTag;
print 
"%Reqs %Byte  Bytes Sent  Requests   Reversed Subdomain\n";
print 
"----- ----- ------------ -------- |------------------------------------\n";

foreach $subdomain ( sort keys(subdomainfiles) )
{
    $files = $subdomainfiles{$subdomain};
    $bytes = $subdomainbytes{$subdomain};
    if ($files == $xferfiles) {
        $pctfiles = "100.0";
    } else {
        $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles);
    }
    if ($bytes == $xferbytes) {
        $pctbytes = "100.0";
    } else {
        $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes);
    }
    printf ("%s %s %12d %8d | %s\n", $pctfiles,$pctbytes,$bytes,$files,
                                     $subdomain);
}
print $endTag;

# ==========================================================================

print "\<HR\>\n";
print "\<H2\>\<A NAME=\"Archive\"\>$ArchiveHeader\</A\>\</H2\>\n";
print $startTag;
print 
"%Reqs %Byte  Bytes Sent  Requests   Archive Section\n";
print 
"----- ----- ------------ -------- |------------------------------------\n";

foreach $section ( sort keys(groupfiles) )
{
    $files = $groupfiles{$section};
    $bytes = $groupbytes{$section};
    if ($files == $xferfiles) {
        $pctfiles = "100.0";
    } else {
        $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles);
    }
    if ($bytes == $xferbytes) {
        $pctbytes = "100.0";
    } else {
        $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes);
    }
    printf ("%s %s %12d %8d | %s\n", $pctfiles, $pctbytes, $bytes, $files,
                                     $section);
}
print $endTag;

# ==========================================================================

if ($Do_Ident)                            # Is Ident Table desired?
{
    print "\<HR\>\n";
    print "\<H2\>\<A NAME=\"Ident\"\>$IdentHeader\</A\>\</H2\>\n";
    print $startTag;
    print 
    "%Reqs %Byte  Bytes Sent  Requests   Remote Identifier\n";
    print 
    "----- ----- ------------ -------- |------------------------------------\n";

    foreach $ident ( sort keys(identfiles) )
    {
        $files = $identfiles{$ident};
        $bytes = $identbytes{$ident};
        if ($files == $xferfiles) {
            $pctfiles = "100.0";
        } else {
            $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles);
        }
        if ($bytes == $xferbytes) {
            $pctbytes = "100.0";
        } else {
            $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes);
        }
        printf ("%s %s %12d %8d | %s\n", $pctfiles, $pctbytes, $bytes, $files,
                                         $ident);
    }
    print $endTag;
}

# ==========================================================================

print "\<HR\>\n";
print "\<ADDRESS\>This summary was generated by \n";
print "\<A HREF=\"http://www.ics.uci.edu/WebSoft/wwwstat/\"\>";
print "old$Version\</A\>\n";
print "\</ADDRESS\>\n";
print "\</BODY\>\</HTML\>\n";

exit(0);

# ==========================================================================

sub initcountryname {
    #read in table of ISO codes and country names -added by jem
    open (blah, "<$countrycodefile") || die "Can't open $countrycodefile";
    while (<blah>) {
        chop;
        local($iso,$name)=split('   ');
        $iso =~ y/A-Z/a-z/;
        $code{$iso}=$name;
    }
    close blah;
    return %code;
}

sub countryname {
    #returns country name for an iso code
    local($iso, %codetable) = @_;
    return $codetable{$iso};
}

sub datecompare {
    local($[) = 0;

    local($date1) = substr($a, 7, 4) * 512;
    local($date2) = substr($b, 7, 4) * 512;
    $date1 += index("JanFebMarAprMayJunJulAugSepOctNovDec",substr($a,0,3))*12;
    $date2 += index("JanFebMarAprMayJunJulAugSepOctNovDec",substr($b,0,3))*12;
    $date1 += substr($a, 4, 2);
    $date2 += substr($b, 4, 2);
    $date1 - $date2;

}

sub domnamcompare {

    $sdiff = length($a) - length($b);
    ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0;

}

sub bytecompare {

    $bdiff = $groupbytes{$b} - $groupbytes{$a};
    ($bdiff < 0) ? -1 : ($bdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0;

}

# ===========================================================================
# The following subroutine should be in a package, but I'm lazy.
# This is a modified (by Roy Fielding) version of Perl 4.036's ctime.pl
# library by Waldemar Kebsch <kebsch.pad@nixpbe.UUCP> and
# Marion Hakanson <hakanson@cse.ogi.edu>.  It is distributed under the
# Artistic License (included with your Perl distribution files).
# 
#
# wtime returns a time string in the format "Wkd, Dy Mon Year HH:MM:SS Zone"
#               with no newline appended.
#
# USAGE:
#
# wtime(time,'');     -- returns the local time with no timezone appended
#                        As in "Wed, 15 Dec 1993 23:59:59 "
#
# wtime(time,'GMT');  -- returns GMT time
#                        As in "Wed, 16 Dec 1993 07:59:59 GMT"
#

sub wtime {
    local($time, $TZ) = @_;
    local($[) = 0;
    local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst);

    local(@DoW) = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat');
    local(@MoY) = ('Jan','Feb','Mar','Apr','May','Jun',
                   'Jul','Aug','Sep','Oct','Nov','Dec');

    # Determine what time zone is in effect.  Use local time if
    # TZ is anything other than 'GMT'
    # There's no portable way to find the system default timezone.

    ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) =
        ($TZ eq 'GMT') ? gmtime($time) : localtime($time);

    $year += ($year < 70) ? 2000 : 1900;
    sprintf("%s, %02d %s %4d %02d:%02d:%02d %s",
      $DoW[$wday], $mday, $MoY[$mon], $year, $hour, $min, $sec, $TZ);
}

# ===========================================================================
# This last routine returns the three letter abbreviation for the month
# before the one in the date that was passed as an argument
#

sub lastmonth {
    local($date) = @_;        # Should be in the format "Feb 01 1994"
    local($[) = 0;

    local($Mstr) = 'JanFebMarAprMayJunJulAugSepOctNovDec';

    local($midx) = index($Mstr, substr($date,0,3));

    if    ($midx  < 0) { return 'Err'; }
    elsif ($midx == 0) { return 'Dec'; }
    else               { return substr($Mstr,($midx - 3),3); }
}