#!/usr/public/bin/perl # --------------------------------------------------------------------------- $Version = 'oldlog2new-1.0'; # # Copyright (c) 1994 Regents of the University of California. # All rights reserved. # # This software has been developed by Roy Fielding as # part of the Arcadia project at the University of California, Irvine. # It is based on the wwwstat log analyzer. All this program does is read # in the old log, figure out what each entry points to, find the current # file size for that entity, and output the new format including a reasonable # approximation of the server response code. # sub usage { die <<"EndUsage"; usage: oldlog2new [-hez] [-f logfile] [-s srmfile] $Version Convert an NCSA httpd 1.1 access_log file to a 1.2 access_log Display Options: -h Help -- just display this message and quit. -e Display all invalid log entries on STDERR. (default is to ignore them) Input Options: -f Read from the following access_log file instead of the default. -z Use zcat to uncompress the log file while reading [requires -f]. -s Get the server directives from the following srm.conf file. EndUsage } # # If you have any suggestions, bug reports, fixes, or enhancements, # send them to the author Roy Fielding at . # # Redistribution and use in source and binary forms are permitted, # subject to the restriction noted below, provided that the above # copyright notice and this paragraph and the following paragraphs are # duplicated in all such forms and that any documentation, advertising # materials, and other materials related to such distribution and use # acknowledge that the software was developed in part by the University of # California, Irvine. The name of the University may not be used to # endorse or promote products derived from this software without # specific prior written permission. THIS SOFTWARE IS PROVIDED ``AS # IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT # LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE. # # Use of this software in any way or in any form, source or binary, # is not allowed in any country which prohibits disclaimers of any # implied warranties of merchantability or fitness for a particular # purpose or any disclaimers of a similar nature. # # IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY # FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES # ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION # (INCLUDING, BUT NOT LIMITED TO, LOST PROFITS) EVEN IF THE UNIVERSITY # OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # --------------------------------------------------------------------------- # Set the default configuration options: # Edit the next line to specify the (+/-)HHMM offset from GMT $GMToffset = '-0700'; # Edit the next line to identify the server's default home page. $ServerHome = "/ICShome.html"; # Edit the next two lines to specify the location of your server access log # and your server configuration (srm.conf) file. $access_log = '/dc/ud/www/httpd_1.1/logs/access_log'; $srm_conf = '/dc/ud/www/httpd_1.1/conf/srm.conf'; # Edit the next line to specify the command for displaying compressed files $zcat = 'gunzip -c'; # specify as null string if none are available # Estimate the size of a redirect message minus the two location URLs $ScriptEstimate = 1000; # in bytes (must be greater than zero) $DirectoryRedirect = 1; # Does server do automatic redirect for slashless # index reqs? (1 for httpd_1.1, 0 for httpd_1.0) # Is the server running with rfc931 support (IdentityCheck on)? $IdentityCheck = 0; # Must = 1 if server uses rfc931 remote ident. # Edit the next few lines to specify whether (1) or not (0) you want: $PrintInvalids = 0; # Display invalid log entries on STDERR? $CompressedLog = 0; # Access log has been compressed (or gzipped)? # ========================================================================== # Get the command-line options require "getopts.pl"; &Getopts('hezf:s:'); if ($@ || $opt_h) { &usage; } if ($opt_e) { $PrintInvalids = 1; } if ($opt_z) { if ($opt_f) { $CompressedLog = 1; } # Require logfile name if else { &usage; } # uncompression is desired } if ($opt_f) { $access_log = $opt_f; } if ($opt_s) { $srm_conf = $opt_s; } if ($CompressedLog && !$zcat) { die "No zcat decompression command has been defined, stopped"; } # ========================================================================== # Get the other needed configuration items from the srm.conf file open (SRM,$srm_conf) || die "Error opening config file: $srm_conf\n"; $UserDir = "public_html"; # Start with NCSA defaults $DirectoryIndex = "index.html"; $DocumentRoot = "/usr/local/etc/httpd/htdocs"; while () { next if ( ($_ eq "\n") || /^\#/ ); # Ignore blank and comment lines if (/^DocumentRoot (.+)\s/) { $DocumentRoot = $1; } elsif (/^UserDir (.+)\s/) { $UserDir = $1; } elsif (/^DirectoryIndex (.+)\s/) { $DirectoryIndex = $1; } elsif (/^Redirect\s+(\S+)\s+(\S+)\s/) { $alias = $1; $rname = $2; $alias =~ s/(\W)/\\\1/g; # Needed for later pattern match $AllRedirects{$alias} = $rname; } elsif (/^Alias\s+(\S+)\s+(\S+)\s/) { $alias = $1; $rname = $2; $alias =~ s/(\W)/\\\1/g; # Needed for later pattern match $AllAliases{$alias} = $rname; } elsif ( /^ScriptAlias\s+(\S+)\s+(\S+)\s/ || /^OldScriptAlias\s+(\S+)\s+(\S+)\s/ ) { $alias = $1; $rname = $2; $alias =~ s/(\W)/\\\1/g; # Needed for later pattern match $AllScripts{$alias} = $rname; } } close SRM; # ========================================================================== # Now read log, figure out the response code and bytes, and output new format # if ($CompressedLog) { $access_log = "$zcat $access_log |"; } open (LOG,$access_log) || die "Error opening access log file: $access_log\n"; LINE: while () { $saveline = $_; $ident = "-"; if ($IdentityCheck) # Does log include IdentityCheck info? { /^(.*)@\S+\s/; if ($_) { $ident = $1; # Save ident for later use $saveline =~ s/^.*@//; # Remove the remote ident from log } $_ = $saveline; } $htv = ''; ($afield, $date, $method, $oname, $htv) = /^(\S+) \[(.+)\] (\S+)\s+(\S+)\s(.*)$/; if (!($afield && $date && $method && $oname && (length($date) == 24))) { print(STDERR "$.:$saveline") if $PrintInvalids; next LINE; } # # First, we have to figure out what file or script was accessed # $fname = $oname; $fname =~ s/\?.*$//; # Remove any trailing query information $fname =~ s/\#.*$//; # Remove any trailing anchor information $fname =~ s#//#/#g; # Remove any extra slashes if (($fname eq "") || ($fname eq "HTTP/1.0")) { print(STDERR "$.:$saveline") if $PrintInvalids; next LINE; } FNAME: # Get the document's real name { $rname = ""; # and start with it unknown $rcode = 200; $fsize = 0; study $fname; if (($fname eq "/") || ($fname eq "/$DirectoryIndex")) { $fname = "$ServerHome"; # Handle top file with extra care $rname = "$DocumentRoot$fname"; last FNAME; } foreach $redir (keys(AllRedirects)) # Is it a redirected file? { if ( $fname =~ /^$redir/ ) { $rcode = 302; last FNAME; } } foreach $alias (keys(AllAliases)) # Is it a file name alias? { if ( $fname =~ /^$alias/ ) { $rname = $fname; $rname =~ s#^$alias#$AllAliases{$alias}#; last FNAME; } } if ($fname =~ /^\/~(\w+)\// ) # Is it a /~username/...? { ($name,$passwd,$uid,$gid,$quota,$comment,$gcos,$dir,$shell) = getpwnam($1); if ($dir) { $rname = $fname; $rname =~ s#^/~$1#$dir/$UserDir#; } else { $rcode = 404; } last FNAME; } if ($fname =~ /^\/~(\w+)$/ ) # Is it a /~username ? { ($name,$passwd,$uid,$gid,$quota,$comment,$gcos,$dir,$shell) = getpwnam($1); if ($dir) { $rname = $fname; $rname =~ s#^/~$1#$dir/$UserDir#; if (-e "$rname/$DirectoryIndex") { if ($DirectoryRedirect) { $rcode = 302; last FNAME; } $rname .= "/$DirectoryIndex"; } $fname .= '/'; } else { $rcode = 404; } last FNAME; } foreach $alias ( keys(AllScripts) ) # Is it a script directory alias? { if ( $fname =~ /^$alias/ ) { $fsize = $ScriptEstimate; # Estimate bytes from script last FNAME; } } if (-d "$DocumentRoot$fname") # Is it a directory? { $hasSlash = ($fname =~ s/\/$//); # Remove any trailing slash if (-e "$DocumentRoot$fname/$DirectoryIndex") { if (!$hasSlash && $DirectoryRedirect) { $rcode = 302; last FNAME; } $rname = "$DocumentRoot$fname/$DirectoryIndex"; } else { $rname = "$DocumentRoot$fname"; } $fname .= '/'; last FNAME; } $rname = "$DocumentRoot$fname"; # It must be a normal file } # end FNAME $xname = 0; if (!$fsize && ($rcode == 200) && $rname) # Get the file size { # through use of a cache of Sizes ($fsize = $Sizes{$fname}) || ($fsize = $Sizes{$fname} = (-s $rname)) || ($xname = 1); } if ($xname) { $rcode = 404; } if (!(($method eq 'GET')||($method eq 'HEAD')||($method eq 'POST'))) { $rcode = 400; } if ($rcode != 200) { $fsize = '-'; } elsif ($method eq 'HEAD') { $fsize = 0; } if ($htv) { $oname .= ' '. $htv; } # # Phew! Now we have to swap the date format around # $newdate = substr($date, 8, 2) .'/'. substr($date, 4, 3) .'/'. substr($date,20, 4) .':'. substr($date,11, 9) . $GMToffset; $newdate =~ s/^ /0/; # # Now that we have categorized it, print it in the new format # print($afield,' ',$ident,' - [',$newdate,'] "',$method,' ',$oname, '" ',$rcode,' ',$fsize," \n"); } close LOG; exit(0);