#!/usr/public/bin/perl # --------------------------------------------------------------------------- $Version = 'wwwstat-1.0'; # # Copyright (c) 1994 Regents of the University of California. # All rights reserved. # # This software has been developed by Roy Fielding as # part of the Arcadia project at the University of California, Irvine. # Wwwstat was originally based on a multi-server statistics program called # fwgstat-0.035 by Jonathan Magid (jem@sunsite.unc.edu) which, in turn, # was heavily based on xferstats (packaged with the version 17 of the # Wuarchive FTP daemon) by Chris Myers (chris@wugate.wustl.edu). # As such, this software and all derivations must remain in the public domain. # See below for further licensing information. # sub usage { die <<"EndUsage"; usage: wwwstat [-helLoOuUrvxz] [-f logfile] [-s srmfile] [-i pathname] [-a IP_address] [-n archive_name] [-d date] [-t time] $Version Process an NCSA httpd 1.x access_log file and output an HTML summary. Display Options: -h Help -- just display this message and quit. -e Display all invalid log entries on STDERR. (default is to ignore them) -l Do display full IP address of clients in my domain. -L Don't display full IP address of clients in my domain. (default) -o Do display full IP address of clients from other domains. -O Don't display full IP address of clients from other domains. (default) -u Do display IP address from unresolved domain names. -U Don't display IP address from unresolved domain names. (default) -r If IdentityCheck, display table of requests by each remote ident. -v Verbose display (to STDERR) of each log entry processed. -x Display all requests of nonexistant files to STDERR. Input Options: -f Read from the following access_log file instead of the default. -z Use zcat to uncompress the log file while reading [requires -f]. -s Get the server directives from the following srm.conf file. -i Include the following file (assumed to be a prior wwwstat output). Search Options (include in summary only those log entries): -a Containing the following "substring" in the IP address. -d Containing the following "substring" in the date. -t Containing the following "substring" in the time. -n Containing the following "substring" in the archive (URL) name. EndUsage } # # The intention is that this be run by a wrapper program as a crontab # entry just before midnight, with its output redirected to a temporary # file which can then be moved to the site's summary file. The temporary # file is necessary because the old summary file will be fstat'd for its # size during wwwstat's processing (resulting in 0 if output is redirected # to the summary file during processing). # # One of the nicest things about wwwstat is that it does not make any # changes to or write any files in the server directories. Thus, this # program can be safely run by any user with read access to the httpd # server's access_log and srm.conf files. This allows people to do # specialized summaries of just the things they are interested in. # # Note that the Search Options can specify that the substring must be # a prefix by using the caret (e.g. "^substring") and/or must be # a suffix by using the dollar (e.g. 'substring$'). However, they may # need to be escaped on the command line to avoid shell interpretation. # Searches on date and time can include full Perl regular expressions, # including ranges such as -d " [1-7] " or -d " ( [8-9]|1[0-4]) " to # indicate the first or second week, respectively. # # This program could easily be modified to run as a CGI script, but that # is not recommended for slow processors or heavily utilized servers # unless some effort is made to keep the active log file very small. # # See the file Changes for known problems and version information. # # If you have any suggestions, bug reports, fixes, or enhancements, # send them to the author Roy Fielding at . # # Redistribution and use in source and binary forms are permitted, # subject to the restriction noted below, provided that the above # copyright notice and this paragraph and the following paragraphs are # duplicated in all such forms and that any documentation, advertising # materials, and other materials related to such distribution and use # acknowledge that the software was developed in part by the University of # California, Irvine. The name of the University may not be used to # endorse or promote products derived from this software without # specific prior written permission. THIS SOFTWARE IS PROVIDED ``AS # IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT # LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE. # # Use of this software in any way or in any form, source or binary, # is not allowed in any country which prohibits disclaimers of any # implied warranties of merchantability or fitness for a particular # purpose or any disclaimers of a similar nature. # # IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY # FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES # ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION # (INCLUDING, BUT NOT LIMITED TO, LOST PROFITS) EVEN IF THE UNIVERSITY # OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # --------------------------------------------------------------------------- # Set the default configuration options: # Edit the next line to specify the output document's HTML Title. $OutputTitle = "World-Wide Web Access Statistics for www.ics.uci.edu"; # Edit the next line to specify the URL of the previous summary period # (for use only as a hypertext link). Set it = "" if link is unwanted. # The three-letter abbrev for last month is substituted at any "%M" # Note that this is the month prior to the earliest one in this summary. $LastSummary = "http://www.ics.uci.edu/Admin/%M.wwwstats.html"; # Edit the next line to identify the server's default home page. $ServerHome = "/ICShome.html"; # Edit the next line to locate the country-codes file. # This is a file of the format: # domain text # which will allow expansion from domain to country name. $countrycodefile = "/dc/ud/www/$Version/country-codes"; # Edit the next two lines to specify the location of your server access log # and your server configuration (srm.conf) file. $access_log = '/dc/ud/www/httpd_1.1/logs/access_log'; $srm_conf = '/dc/ud/www/httpd_1.1/conf/srm.conf'; # Edit the next line to specify the command for displaying compressed files $zcat = 'gunzip -c'; # specify as null string if none are available # Estimate the size of a redirect message minus the two location URLs $RedirEstimate = 255; # in bytes (either 255 or 259 depending on date fmt) $DirectoryRedirect = 1; # Does server do automatic redirect for slashless # index reqs? (1 for httpd_1.1, 0 for httpd_1.0) # Is the server running with rfc931 support (IdentityCheck on)? $IdentityCheck = 0; # Must = 1 if server uses rfc931 remote ident. # NOTE: For security reasons, you should not publish to the web any report # that lists the Remote Identifiers. This option is intended for server # maintenance only. Use the -r option on the command-line instead. $Do_Ident = 0; # Set = 2 if IdentityCheck and Ident always desired. # If address in log entry is one word (a local host), append what? $AppendToLocalhost = ""; # Either "" or appropriate ".sub.dom.ain" # Edit the next two lines to customize for your domain. # This will allow your domain to be separated in the domain listing. # Note that you may also want to change the domain selection logic # (where these variables are used) if you are at a site without # multi-level subdomains. $mydom1 = "uci"; $mydom2 = "edu"; # Edit the next three lines to specify whether (1) or not (0) you want to # display the IP address on reversed subdomain listings as follows: $LocalFullAddress = 0; # Show full address for hosts in my domain? $OthersFullAddress = 0; # Show full address for hosts outside my domain? $ShowUnresolved = 0; # Show all unresolved addresses? $Verbose = 0; # Display valid log entries on STDERR? $PrintInvalids = 0; # Display invalid log entries on STDERR? $PrintNonexist = 0; # Display nonexistant file requests on STDERR? $CompressedLog = 0; # Access log has been compressed (or gzipped)? $IncludeFile = ""; # Prior output file to include first. $SearchAddress = ""; # Substring to look for in IP addresses. $SearchDate = ""; # Substring to look for in Date stamps. $SearchTime = ""; # Substring to look for in Time stamps. $SearchArchive = ""; # Substring to look for in Archive names. # ========================================================================== # Get the command-line options require "getopts.pl"; &Getopts('helLoOuUrvxzf:s:i:a:d:t:n:'); if ($@ || $opt_h) { &usage; } if ($opt_e) { $PrintInvalids = 1; } if ($opt_l) { $LocalFullAddress = 1; } if ($opt_L) { $LocalFullAddress = 0; } if ($opt_o) { $OthersFullAddress = 1; } if ($opt_O) { $OthersFullAddress = 0; } if ($opt_u) { $ShowUnresolved = 1; } if ($opt_U) { $ShowUnresolved = 0; } if ($opt_r) { $Do_Ident = 2; } if ($opt_v) { $Verbose = 1; } if ($opt_x) { $PrintNonexist = 1; } if ($opt_z) { if ($opt_f) { $CompressedLog = 1; } # Require logfile name if else { &usage; } # uncompression is desired } if ($opt_f) { $access_log = $opt_f; } if ($opt_s) { $srm_conf = $opt_s; } if ($opt_i) { $IncludeFile = $opt_i; } if ($opt_a) { $SearchAddress = $opt_a; $SearchAddress =~ s/(\W)/\\\1/g; # Needed for later pattern match $SearchAddress =~ s/^\\\^/\^/; # but resore "starts with" $SearchAddress =~ s/\\\$$/\$/; # and "ends with" } if ($opt_d) { $SearchDate = $opt_d; } if ($opt_t) { $SearchTime = $opt_t; } if ($opt_n) { $SearchArchive = $opt_n; $SearchArchive =~ s/(\W)/\\\1/g; # Needed for later pattern match $SearchArchive =~ s/^\\\^/\^/; # but resore "starts with" $SearchArchive =~ s/\\\$$/\$/; # and "ends with" } if ($CompressedLog && !$zcat) { die "No zcat decompression command has been defined, stopped"; } if (!$IdentityCheck) { $Do_Ident = 0; } # No users allowed if no ident # ========================================================================== # Get the other needed configuration items from the srm.conf file open (SRM,$srm_conf) || die "Error opening config file: $srm_conf\n"; $UserDir = "public_html"; # Start with NCSA defaults $DirectoryIndex = "index.html"; $DocumentRoot = "/usr/local/etc/httpd/htdocs"; while () { next if ( ($_ eq "\n") || /^\#/ ); # Ignore blank and comment lines if (/^DocumentRoot (.+)\s/) { $DocumentRoot = $1; } elsif (/^UserDir (.+)\s/) { $UserDir = $1; } elsif (/^DirectoryIndex (.+)\s/) { $DirectoryIndex = $1; } elsif (/^Redirect\s+(\S+)\s+(\S+)\s/) { $alias = $1; $rname = RedirEstimate + (2 * length($2)); # Estimate size of message $alias =~ s/(\W)/\\\1/g; # Needed for later pattern match $AllRedirects{$alias} = $rname; } elsif (/^Alias\s+(\S+)\s+(\S+)\s/) { $alias = $1; $rname = $2; $alias =~ s/(\W)/\\\1/g; # Needed for later pattern match $AllAliases{$alias} = $rname; } elsif ( /^ScriptAlias\s+(\S+)\s+(\S+)\s/ || /^OldScriptAlias\s+(\S+)\s+(\S+)\s/ ) { $alias = $1; $rname = $2; $alias =~ s/(\W)/\\\1/g; # Needed for later pattern match $AllScripts{$alias} = $rname; } } close SRM; # ========================================================================== # If an old output file is to be included, read it into the counters # We assume that the old output file was created with the same options # and that its content is disjunct from the current access_log. # NOTE that using search options with inclusion cannot work unless the # included file was also created with those search options. $startTag = "\\n"; # Parse by finding the preformatted parts. $endTag = "\\n"; # Note that these vars are used by output code. if ($IncludeFile) { open (OLD,$IncludeFile) || die "Error opening file for inclusion: $IncludeFile\n"; # This code depends on the order of the output being similar # to the default distribution. If you change the output content, # you must also check to ensure this code still works. $sequence = 0; OLDLINE: while () { if (!($sequence % 2)) # Are we outside a preformatted section? { if ($_ eq $startTag) # Yes - Start of preformat? { $sequence++; if ($sequence != 1) # Unless this is first section, { ; # Read past the two header lines ; } } next OLDLINE; } elsif ($_ eq $endTag) # No - Is this the end of preformat? { $sequence++; if ($sequence == (12 + $Do_Ident)) # Are we done yet? { last OLDLINE; } next OLDLINE; } if ($sequence == 1) # Now at "Files Transmitted ..." { /\s(\d+)\n/; # get last numeric word $xferfiles = $1; $_ = ; # next at "Bytes Transmitted ..." /\s(\d+)\n/; # get last numeric word $xferbytes = $1; ; ; # skip the two averages } elsif ($sequence == 3) # Now in Daily Transmission Statistics { /\s(\d+)\s+(\d+)\s+\|\s+(\S.*)\n/; # Parse out: $daytime = $3; # the date after "|" $dayfiles{$daytime} = $2; # the first number left of "|" $daybytes{$daytime} = $1; # the second number left of "|" } elsif ($sequence == 5) # Now in Hourly Transmission Statistics { /\s(\d+)\s+(\d+)\s+\|\s+(\S+)\s/; # Parse out: $hour = $3; # the hour after "|" $hourfiles{$hour} = $2; # the first number left of "|" $hourbytes{$hour} = $1; # the second number left of "|" } elsif ($sequence == 7) # Now in Client Domain { /\s(\d+)\s+(\d+)\s+\|\s+(\S+)\s/; # Parse out: $domain = $3; # the domain abbrev after "|" $domainfiles{$domain} = $2; # the first number left of "|" $domainbytes{$domain} = $1; # the second number left of "|" } elsif ($sequence == 9) # Now in Reversed Subdomain { /\s(\d+)\s+(\d+)\s+\|\s+(\S.*)\n/; # Parse out: $subdomain = $3; # the subdomain after "|" $subdomainfiles{$subdomain} = $2; # the first number left of "|" $subdomainbytes{$subdomain} = $1; # the second number left of "|" } elsif ($sequence == 11) # Now in Archive Section { /\s(\d+)\s+(\d+)\s+\|\s+(\S.*)\n/; # Parse out: $pathkey = $3; # the pathkey after "|" $groupfiles{$pathkey} = $2; # the first number left of "|" $groupbytes{$pathkey} = $1; # the second number left of "|" } elsif (($sequence == 13) && $Do_Ident ) # Now in Ident Section { /\s(\d+)\s+(\d+)\s+\|\s+(\S.*)\n/; # Parse out: $ident = $3; # the pathkey after "|" $identfiles{$ident} = $2; # the first number left of "|" $identbytes{$ident} = $1; # the second number left of "|" } else # Now in Hell (too many preformats) { print (STDERR "Warning: Something is wrong with $IncludeFile"); last OLDLINE; } } close OLD; } # ========================================================================== # Now read and accumulate statistics for each access logged # $thistime = time; # Get the current date-time stamp $Updated = &wtime($thistime,''); # Format it as local time $UpdatedGMT = &wtime($thistime,'GMT'); # and also as GMT time if ($CompressedLog) { $access_log = "$zcat $access_log |"; } if ($Verbose) { print(STDERR "$Version: $Updated\n"); print(STDERR "Using access log \"$access_log\"\n"); } open (LOG,$access_log) || die "Error opening access log file: $access_log\n"; LINE: while () { $saveline = $_; if ($IdentityCheck) # Does log include IdentityCheck info? { /^(.*@\S+)\s/; if ($_) { $ident = $1; # Save ident for later use $ident =~ s/\[.*\]/COOKIE/g; # Replace all magic cookies $saveline =~ s/^.*@//; # Remove the remote ident from log } else { $ident = ""; } $_ = $saveline; } @line = split; # # First, we have to figure out what file or script was accessed # $fname = $line[7]; $fname =~ s/\?.*$//; # Remove any trailing query information $fname =~ s/\#.*$//; # Remove any trailing anchor information $fname =~ s#//#/#g; # Remove any extra slashes if (($fname eq "") || ($fname eq "HTTP/1.0")) { print(STDERR "$.:$saveline") if $PrintInvalids; next LINE; # Ignore garbage } FNAME: # Get the document's real name { $rname = ""; # and start with it unknown $fsize = 0; study $fname; if (($fname eq "/") || ($fname eq "/$DirectoryIndex")) { $fname = "$ServerHome"; # Handle top file with extra care $rname = "$DocumentRoot$fname"; last FNAME; } foreach $redir (keys(AllRedirects)) # Is it a redirected file? { if ( $fname =~ /^$redir/ ) { $fname = "Redirected Document Requests"; $fsize = $AllRedirects{$redir}; last FNAME; } } foreach $alias (keys(AllAliases)) # Is it a file name alias? { if ( $fname =~ /^$alias/ ) { $rname = $fname; $rname =~ s#^$alias#$AllAliases{$alias}#; last FNAME; } } if ($fname =~ /^\/~(\w+)\// ) # Is it a /~username/...? { ($name,$passwd,$uid,$gid,$quota,$comment,$gcos,$dir,$shell) = getpwnam($1); if ($dir) { $rname = $fname; $rname =~ s#^/~$1#$dir/$UserDir#; } else { $fname = "All Bad Usernames"; } last FNAME; } if ($fname =~ /^\/~(\w+)$/ ) # Is it a /~username ? { ($name,$passwd,$uid,$gid,$quota,$comment,$gcos,$dir,$shell) = getpwnam($1); if ($dir) { $rname = $fname; $rname =~ s#^/~$1#$dir/$UserDir#; if (-e "$rname/$DirectoryIndex") { if ($DirectoryRedirect) { $fsize = ($RedirEstimate + 60) + (2 * length($fname)); $fname = "Redirected Directory Requests"; $rname = ""; last FNAME; } $rname .= "/$DirectoryIndex"; } $fname .= "/"; # Append a trailing slash } else { $fname = "All Bad Usernames"; } last FNAME; } foreach $alias ( keys(AllScripts) ) # Is it a script directory alias? { if ( $fname =~ /^$alias/ ) { $fname = "All Scripts"; # Don't report script names last FNAME; # We can't know the transmit bytes } } if (-d "$DocumentRoot$fname") # Is it a directory? { $hasSlash = ($fname =~ s/\/$//); # Remove any trailing slash if (-e "$DocumentRoot$fname/$DirectoryIndex") { if (!$hasSlash && $DirectoryRedirect) { $fsize = ($RedirEstimate + 60) + (2 * length($fname)); $fname = "Redirected Directory Requests"; last FNAME; } $rname = "$DocumentRoot$fname/$DirectoryIndex"; } else { $rname = "$DocumentRoot$fname"; } $fname .= "/"; # Append a trailing slash last FNAME; } $rname = "$DocumentRoot$fname"; # It must be a normal file } # end FNAME $xname = ""; if (!$fsize && $rname) # Get the file size { # through use of a cache of Sizes ($fsize = $Sizes{$fname}) || ($fsize = $Sizes{$fname} = (-s $rname)) || (($xname = $fname) && ($fname = "Nonexistant Files")); } $fname =~ s#/$DirectoryIndex$#/#; # Remove any trailing index name if ($SearchArchive) { next LINE unless ( $fname =~ /$SearchArchive/ ); } # # If you want to further restrict Archive Section names, do it here. # For example, if you wanted to show all GIFs as a single total, # then you would do: # # if ($fname =~ /\.gif$/) { $pathkey = "All GIFs"; } # else { $pathkey = $fname; } # if ($SearchArchive) { $pathkey = $fname; } elsif ($fname =~ /^\/Icons\/\w/) { $pathkey = "All Icons (site)"; } elsif ($fname =~ /^\/icons\/\w/) { $pathkey = "All Icons (server)"; } elsif ($fname =~ /^\/pictures\/\w/) { $pathkey = "All Pictures"; } else { $pathkey = $fname; } if ($pathkey eq '') { print(STDERR "$.:$saveline") if $PrintInvalids; next LINE; # Ignore garbage } # # Phew! Now we have to figure out when and by whom it was accessed. # $_ = $saveline; /\[(.*)\]/; if ($1 eq "") { print(STDERR "$.:$saveline") if $PrintInvalids; next LINE; # Ignore garbage } $daytime = substr($1, 4, 6) . substr($1, 19, 5); $hour = substr($1,11, 2); if ($SearchDate) { next LINE unless ( $daytime =~ /$SearchDate/ ); } if ($SearchTime) { next LINE unless ( $hour =~ /$SearchTime/ ); } if ($Verbose) { print(STDERR "$daytime $hour $fsize $fname\n"); } # # Get IP address and determine domain and reversed subdomain. # $afield = $line[0]; $afield =~ tr/A-Z/a-z/; if ($SearchAddress) { next LINE unless ( $afield =~ /$SearchAddress/ ); } @address = split(/\./, $afield); if ( $#address < 1 ) # Usually caused by garbage in log { # or perhaps a strange IP setup if ($AppendToLocalhost) # or perhaps perfectly normal { $domain = "$mydom1\.$mydom2"; $ident .= $AppendToLocalhost; $afield .= $AppendToLocalhost; @address = split(/\./, $afield); if ((!$LocalFullAddress)&&($#address > 1)) { shift(@address); } $subdomain = join('.', reverse(@address)); } else { print(STDERR "$.:$saveline") if $PrintInvalids; $domain = "localhost"; $subdomain = $afield; } } elsif ( $address[$#address] =~ /^[0-9]+$/ ) { $domain = "unresolved"; if ($ShowUnresolved) { $subdomain = join('.', reverse(@address)); } else { $subdomain = "Unresolved"; } } elsif ($address[$#address-1] eq "$mydom1" && $address[$#address] eq "$mydom2" ) { $domain = "$mydom1\.$mydom2"; if ((!$LocalFullAddress)&&($#address > 1)) { # If the address has at least 3 components shift(@address); # clip off the machine name } $subdomain = join('.', reverse(@address)); } else { $domain = $address[$#address]; if ((!$OthersFullAddress)&&($#address > 1)) { # If the address has at least 3 components shift(@address); # clip off the machine name } $subdomain = join('.', reverse(@address)); } if ($PrintNonexist && $xname) { print(STDERR "$daytime $hour $xname BY $afield\n"); } # # Now that we have categorized it, add it to the corresponding counters # $xferfiles++; # total files sent $dayfiles{$daytime}++; # per day $groupfiles{$pathkey}++; # per file $domainfiles{$domain}++; # per domain $subdomainfiles{$subdomain}++; # per subdomain $xferbytes += $fsize; # total bytes sent $daybytes{$daytime} += $fsize; # bytes per day $groupbytes{$pathkey} += $fsize; # per file $domainbytes{$domain} += $fsize; # per domain $subdomainbytes{$subdomain} += $fsize; # per subdomain $hourfiles{$hour}++; # total files per hour $hourbytes{$hour} += $fsize; # bytes per hour if ($Do_Ident) # Is Ident Table desired? { $identfiles{$ident}++; # files per ident $identbytes{$ident} += $fsize; # bytes per ident } } close LOG; @dates = sort datecompare keys(daybytes); if ($xferfiles == 0) {die "There was no matching data to summarize.\n";} # ========================================================================== # Finally, we can print out the resulting statistics as a series of forms. # # Note: if you have a heavily used server, you may need to increase # the length of the numeric output fields in the forms below. $DailyHeader = "Daily Transmission Statistics"; $HourlyHeader = "Hourly Transmission Statistics"; $DomainHeader = "Total Transfers by Client Domain"; $SubdomainHeader = "Total Transfers by Reversed Subdomain"; $ArchiveHeader = "Total Transfers from each Archive Section"; $IdentHeader = "Total Transfers to each Remote Identifier"; print "\\\n"; print "\$OutputTitle\\n"; print "\\\n"; print "\$OutputTitle\\n"; print "\Last updated: $Updated ($UpdatedGMT)\\n"; print "\\n"; print "\\$DailyHeader\\n"; print "\\$HourlyHeader\\n"; print "\\$DomainHeader\\n"; print "\\$SubdomainHeader\\n"; print "\\$ArchiveHeader\\n"; if ($Do_Ident) { print "\\$IdentHeader\\n"; } if ($LastSummary) { $prevmonth = &lastmonth($dates[0]); $LastSummary =~ s/%M/$prevmonth/g; print "\\Previous Full Summary Period\\n"; } print "\\n"; print "\Totals for Summary Period: ","$dates[0]"," to ",$dates[$#dates], "\\n"; print $startTag; printf ("Files Transmitted During Summary Period %14.0f\n", $xferfiles); printf ("Bytes Transmitted During Summary Period %14.0f\n", $xferbytes); printf ("Average Files Transmitted Daily %14.0f\n", $xferfiles / ($#dates + 1)); printf ("Average Bytes Transmitted Daily %14.0f\n", $xferbytes / ($#dates + 1)); print $endTag; if ( $xferfiles < 1 ) { $xferfiles = 1; } if ( $xferbytes < 1 ) { $xferbytes = 1; } # ========================================================================== print "\\n"; print "\\$DailyHeader\\\n"; print $startTag; print "%Reqs %Byte Bytes Sent Requests Date\n"; print "----- ----- ------------ -------- |------------\n"; foreach $date ( sort datecompare keys(daybytes) ) { $files = $dayfiles{$date}; $bytes = $daybytes{$date}; if ($files == $xferfiles) { $pctfiles = "100.0"; } else { $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles); } if ($bytes == $xferbytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes); } printf ("%s %s %12d %8d | %s\n",$pctfiles,$pctbytes,$bytes,$files,$date); } print $endTag; # ========================================================================== print "\\n"; print "\\$HourlyHeader\\\n"; print $startTag; print "%Reqs %Byte Bytes Sent Requests Time\n"; print "----- ----- ------------ -------- |-----\n"; foreach $hour ( sort keys(hourbytes) ) { $files = $hourfiles{$hour}; $bytes = $hourbytes{$hour}; if ($files == $xferfiles) { $pctfiles = "100.0"; } else { $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles); } if ($bytes == $xferbytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes); } printf ("%s %s %12d %8d | %s\n",$pctfiles,$pctbytes,$bytes,$files,$hour); } print $endTag; # ========================================================================== print "\\n"; print "\\$DomainHeader\\\n"; print $startTag; print "%Reqs %Byte Bytes Sent Requests Domain\n"; print "----- ----- ------------ -------- |------------------------------------\n"; %codetable=&initcountryname(); foreach $domain ( sort domnamcompare keys(domainfiles) ) { $country = $domain; $country = &countryname($domain,%codetable); $files = $domainfiles{$domain}; $bytes = $domainbytes{$domain}; if ($files == $xferfiles) { $pctfiles = "100.0"; } else { $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles); } if ($bytes == $xferbytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes); } printf ("%s %s %12d %8d | %-5s %s\n", $pctfiles,$pctbytes,$bytes,$files, $domain, $country); } print $endTag; # ========================================================================== print "\\n"; print "\\$SubdomainHeader\\\n"; print $startTag; print "%Reqs %Byte Bytes Sent Requests Reversed Subdomain\n"; print "----- ----- ------------ -------- |------------------------------------\n"; foreach $subdomain ( sort keys(subdomainfiles) ) { $files = $subdomainfiles{$subdomain}; $bytes = $subdomainbytes{$subdomain}; if ($files == $xferfiles) { $pctfiles = "100.0"; } else { $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles); } if ($bytes == $xferbytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes); } printf ("%s %s %12d %8d | %s\n", $pctfiles,$pctbytes,$bytes,$files, $subdomain); } print $endTag; # ========================================================================== print "\\n"; print "\\$ArchiveHeader\\\n"; print $startTag; print "%Reqs %Byte Bytes Sent Requests Archive Section\n"; print "----- ----- ------------ -------- |------------------------------------\n"; foreach $section ( sort keys(groupfiles) ) { $files = $groupfiles{$section}; $bytes = $groupbytes{$section}; if ($files == $xferfiles) { $pctfiles = "100.0"; } else { $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles); } if ($bytes == $xferbytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes); } printf ("%s %s %12d %8d | %s\n", $pctfiles, $pctbytes, $bytes, $files, $section); } print $endTag; # ========================================================================== if ($Do_Ident) # Is Ident Table desired? { print "\\n"; print "\\$IdentHeader\\\n"; print $startTag; print "%Reqs %Byte Bytes Sent Requests Remote Identifier\n"; print "----- ----- ------------ -------- |------------------------------------\n"; foreach $ident ( sort keys(identfiles) ) { $files = $identfiles{$ident}; $bytes = $identbytes{$ident}; if ($files == $xferfiles) { $pctfiles = "100.0"; } else { $pctfiles = sprintf("%5.2f", 100*$files/$xferfiles); } if ($bytes == $xferbytes) { $pctbytes = "100.0"; } else { $pctbytes = sprintf("%5.2f", 100*$bytes/$xferbytes); } printf ("%s %s %12d %8d | %s\n", $pctfiles, $pctbytes, $bytes, $files, $ident); } print $endTag; } # ========================================================================== print "\\n"; print "\This summary was generated by \n"; print "\"; print "old$Version\\n"; print "\\n"; print "\\\n"; exit(0); # ========================================================================== sub initcountryname { #read in table of ISO codes and country names -added by jem open (blah, "<$countrycodefile") || die "Can't open $countrycodefile"; while () { chop; local($iso,$name)=split(' '); $iso =~ y/A-Z/a-z/; $code{$iso}=$name; } close blah; return %code; } sub countryname { #returns country name for an iso code local($iso, %codetable) = @_; return $codetable{$iso}; } sub datecompare { local($[) = 0; local($date1) = substr($a, 7, 4) * 512; local($date2) = substr($b, 7, 4) * 512; $date1 += index("JanFebMarAprMayJunJulAugSepOctNovDec",substr($a,0,3))*12; $date2 += index("JanFebMarAprMayJunJulAugSepOctNovDec",substr($b,0,3))*12; $date1 += substr($a, 4, 2); $date2 += substr($b, 4, 2); $date1 - $date2; } sub domnamcompare { $sdiff = length($a) - length($b); ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0; } sub bytecompare { $bdiff = $groupbytes{$b} - $groupbytes{$a}; ($bdiff < 0) ? -1 : ($bdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0; } # =========================================================================== # The following subroutine should be in a package, but I'm lazy. # This is a modified (by Roy Fielding) version of Perl 4.036's ctime.pl # library by Waldemar Kebsch and # Marion Hakanson . It is distributed under the # Artistic License (included with your Perl distribution files). # # # wtime returns a time string in the format "Wkd, Dy Mon Year HH:MM:SS Zone" # with no newline appended. # # USAGE: # # wtime(time,''); -- returns the local time with no timezone appended # As in "Wed, 15 Dec 1993 23:59:59 " # # wtime(time,'GMT'); -- returns GMT time # As in "Wed, 16 Dec 1993 07:59:59 GMT" # sub wtime { local($time, $TZ) = @_; local($[) = 0; local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst); local(@DoW) = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat'); local(@MoY) = ('Jan','Feb','Mar','Apr','May','Jun', 'Jul','Aug','Sep','Oct','Nov','Dec'); # Determine what time zone is in effect. Use local time if # TZ is anything other than 'GMT' # There's no portable way to find the system default timezone. ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = ($TZ eq 'GMT') ? gmtime($time) : localtime($time); $year += ($year < 70) ? 2000 : 1900; sprintf("%s, %02d %s %4d %02d:%02d:%02d %s", $DoW[$wday], $mday, $MoY[$mon], $year, $hour, $min, $sec, $TZ); } # =========================================================================== # This last routine returns the three letter abbreviation for the month # before the one in the date that was passed as an argument # sub lastmonth { local($date) = @_; # Should be in the format "Feb 01 1994" local($[) = 0; local($Mstr) = 'JanFebMarAprMayJunJulAugSepOctNovDec'; local($midx) = index($Mstr, substr($date,0,3)); if ($midx < 0) { return 'Err'; } elsif ($midx == 0) { return 'Dec'; } else { return substr($Mstr,($midx - 3),3); } }