#!/usr/bin/perl
############################################################
#
# File: addates.pl
#
# Arguments: filespec -s
#   filespec  file specification to process (if none, then assumes all)
#   -s        if specified, recurse subdirectories
#
# Description:
# Adds a date field to W3C log files that don't have them
# Assumes that all log entries are for the date specified
# in the header of the file.
#
# NOTE: This only works for log files rotated at midnight each day!
#       No longer true (see mod 2000Mar01) but assumes that there are
#       no 24hour gaps in the logfile
#
# WARNING: This changes the original files! Backup first!
#
# Created: 12.February.1999 by Jeremy Wadsack ([email protected])
#       Copyright (C) 1999 Wadsack-Allen. All rights reserved.
# Modified: 1.March.2000 by Guido Van Hoecke ([email protected])
#       No copyrights: released in the public domain
#
# This script is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
##########################################################################
# Date      Modification                                            Author
# ------------------------------------------------------------------------
# 1999Apr09 Included Expand funciton instead of module.                 JW
# 2000Mar01 Added NextDate function and logic.                         GVH
# 2000May31 Allows for multiple #Field lines (but note that it ignores
#           multiple #Date: lines because these are likely wrong).      JW
# 2000May31 Optimized by removing extra loop on contents.               JW
##########################################################################

use Time::Local;

# If no arguments, then assume all files
if( @ARGV == 0 ) {
   $ARGV[0] = '*.*';
} # end if
# If -s then recurse subdirectories
if( $ARGV[0] =~ /\-s/i ) {
   $ARGV[1] = $ARGV[0];
   $ARGV[0] = '*.*';
} # end if

my @filelist = Expand_Files( $ARGV[0], $ARGV[1] =~ /\-s/i );

foreach $filename (@filelist ) {
   # Open file and get contents (!WARNING this uses lot of memory)
   print "Checking file $filename. ";
   open( INPUT, $filename );
   @contents = <INPUT>;
   close( INPUT );
   unless( @contents ) {
       print "File is empty. Skipping.\n";
       next;
   } # end unless

   # -- Find the date in the first pass, so we can
   #    abort if there is none
   $Date = '_empty_';
   foreach $contents (@contents) {
       if( $contents =~ /^\#Date: (.{10})/i ) {
           $Date = $1;
           print "Date $Date. ";
           last;
       } # end if
       # -- Jump out of loop if not a starting spec line (just a little OPT)
       last if ( $contents =~ /^[^\#]/ );
   } # end foreach
   if( $Date eq '_empty_' ) {
       print "File has no date. Skipping.\n";
       next;
   } # end if

   # -- Find and update Field specification and replace all log file lines
   $skipFile = 0;
   print "Processing... ";
   $previousTime = '00:00:00'; #smallest possible time
   foreach $contents (@contents) {
       if( $contents =~ /^\#Fields:(.+)$/i ) {
           $Spec = $1;
           if( $Spec =~ /date/i ) {
               # -- File already has a date field so skip the file
               $skipFile = 1;
               print "File has date field. {$Spec} Skipped\n";
               last
           } else {
               $contents = "#Fields: date$Spec\n";
           } # end if
       } elsif( $contents =~ /^[^\#]/ ) {
           $contents =~ /(\S+)\s+/; # match first word, i.e. the time
           $thisTime = $1;
           if ( $thisTime lt $previousTime ) {
               # date has changed
               $Date = NextDate( $Date );
               print "Date $Date. ";
           } #end if
           $previousTime = $thisTime;
           $contents = "$Date $contents";
       } # end if
   } # end foreach
   next if $skipFile;

   # Write new file to same name
   print "done.\n";
   open( OUTPUT, ">$filename" );
   print OUTPUT @contents;
   close( OUTPUT );

} # end foreach



#------------------------------------
# Sub: Expand_Files
#
# Arguments: $filespec, $cascade
#
# Returns: @filelist
#
# Description:
#  returns a list of filenames based on the file
#  specification and the choice to cascade/resurse
#  subdirectories.
#-------------------------------------
sub Expand_Files {
   my ( $spec, $cascade ) = @_;
   my (
       @filenames,
       $dir, $filespec, $file,
       @dirnames, $dirs
       );

   @filenames = ("");

   if(( $spec =~ /.*[\*\?].*/ ) || $cascade) {
       # expand the set
       if( !( ($dir, $filespec) = ( $spec =~ /^(.*)[\\\/](.+)$/ ) ) ) {
           $filespec = $spec;
           $dir = '.';
       } # end if
       # mung any re metacharacters to literals
       quotemeta( $filespec );
       # mung the wildcards to re's
       $filespec =~ s/\*/\.\*/;
       $filespec =~ s/\?/\./;
       # cascade if enabled
       if( $cascade != 0 ) {
           #recurse the directory list
           opendir(DIR, $dir) || die "Error: Can't read directory $dir: $!";
           @dirnames = grep { -d "$dir/$_" } readdir(DIR);
           closedir DIR;
           foreach $dirs( @dirnames ) {
               @filenames = (@filenames, &Expand_Files( "$dirs/$filespec" ));
           } # end foreach
       } # end if
       opendir(DIR, $dir) || die "Error: Can't read directory $dir: $!";
       @filenames = (@filenames, grep { /$filespec/ && -f "$dir/$_" } readdir(DIR));
       closedir DIR;
       foreach $file( @filenames ) {
           $file = "$dir/$file";
       } # end foreach
   } else {
       # Just a single file, return it
       @filenames = ( $spec )
       } # end if
   return( @filenames );
} # end Expand_Files


#------------------------------
# Sub: NextDate
#
# Arguments: $startDate (as yyyy-mm-dd)
#
# Returns: $nextDate
#
# Description: increments the $startDate variable to obtain $nextDate
#
#------------------------------
sub NextDate {
   my ( $startDate ) = @_;
   my ( $year, $mon, $mday, $time, $nextDate );
   $startDate =~ /(\w+)\W+(\w+)\W+(\w+)/;
   $year = $1 - 1900;
   $mon = $2 - 1;
   $mday = $3;
   $time = timelocal(0, 0, 12, $mday, $mon, $year);         # Noon
   $time += 86400;                                          # Tomorrow
   ($mday, $mon, $year) = (localtime $time)[3..5];
   $nextDate = sprintf "%04d-%02d-%02d", 1900 + $year, $mon + 1, $mday;
 }