#!/usr/bin/perl
############################################################
#
# File: addates.pl
#
# Arguments: filespec -s
# filespec file specification to process (if none, then assumes all)
# -s if specified, recurse subdirectories
#
# Description:
# Adds a date field to W3C log files that don't have them
# Assumes that all log entries are for the date specified
# in the header of the file.
#
# NOTE: This only works for log files rotated at midnight each day!
# No longer true (see mod 2000Mar01) but assumes that there are
# no 24hour gaps in the logfile
#
# WARNING: This changes the original files! Backup first!
#
# Created: 12.February.1999 by Jeremy Wadsack (
[email protected])
# Copyright (C) 1999 Wadsack-Allen. All rights reserved.
# Modified: 1.March.2000 by Guido Van Hoecke (
[email protected])
# No copyrights: released in the public domain
#
# This script is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
##########################################################################
# Date Modification Author
# ------------------------------------------------------------------------
# 1999Apr09 Included Expand funciton instead of module. JW
# 2000Mar01 Added NextDate function and logic. GVH
# 2000May31 Allows for multiple #Field lines (but note that it ignores
# multiple #Date: lines because these are likely wrong). JW
# 2000May31 Optimized by removing extra loop on contents. JW
##########################################################################
use Time::Local;
# If no arguments, then assume all files
if( @ARGV == 0 ) {
$ARGV[0] = '*.*';
} # end if
# If -s then recurse subdirectories
if( $ARGV[0] =~ /\-s/i ) {
$ARGV[1] = $ARGV[0];
$ARGV[0] = '*.*';
} # end if
my @filelist = Expand_Files( $ARGV[0], $ARGV[1] =~ /\-s/i );
foreach $filename (@filelist ) {
# Open file and get contents (!WARNING this uses lot of memory)
print "Checking file $filename. ";
open( INPUT, $filename );
@contents = <INPUT>;
close( INPUT );
unless( @contents ) {
print "File is empty. Skipping.\n";
next;
} # end unless
# -- Find the date in the first pass, so we can
# abort if there is none
$Date = '_empty_';
foreach $contents (@contents) {
if( $contents =~ /^\#Date: (.{10})/i ) {
$Date = $1;
print "Date $Date. ";
last;
} # end if
# -- Jump out of loop if not a starting spec line (just a little OPT)
last if ( $contents =~ /^[^\#]/ );
} # end foreach
if( $Date eq '_empty_' ) {
print "File has no date. Skipping.\n";
next;
} # end if
# -- Find and update Field specification and replace all log file lines
$skipFile = 0;
print "Processing... ";
$previousTime = '00:00:00'; #smallest possible time
foreach $contents (@contents) {
if( $contents =~ /^\#Fields:(.+)$/i ) {
$Spec = $1;
if( $Spec =~ /date/i ) {
# -- File already has a date field so skip the file
$skipFile = 1;
print "File has date field. {$Spec} Skipped\n";
last
} else {
$contents = "#Fields: date$Spec\n";
} # end if
} elsif( $contents =~ /^[^\#]/ ) {
$contents =~ /(\S+)\s+/; # match first word, i.e. the time
$thisTime = $1;
if ( $thisTime lt $previousTime ) {
# date has changed
$Date = NextDate( $Date );
print "Date $Date. ";
} #end if
$previousTime = $thisTime;
$contents = "$Date $contents";
} # end if
} # end foreach
next if $skipFile;
# Write new file to same name
print "done.\n";
open( OUTPUT, ">$filename" );
print OUTPUT @contents;
close( OUTPUT );
} # end foreach
#------------------------------------
# Sub: Expand_Files
#
# Arguments: $filespec, $cascade
#
# Returns: @filelist
#
# Description:
# returns a list of filenames based on the file
# specification and the choice to cascade/resurse
# subdirectories.
#-------------------------------------
sub Expand_Files {
my ( $spec, $cascade ) = @_;
my (
@filenames,
$dir, $filespec, $file,
@dirnames, $dirs
);
@filenames = ("");
if(( $spec =~ /.*[\*\?].*/ ) || $cascade) {
# expand the set
if( !( ($dir, $filespec) = ( $spec =~ /^(.*)[\\\/](.+)$/ ) ) ) {
$filespec = $spec;
$dir = '.';
} # end if
# mung any re metacharacters to literals
quotemeta( $filespec );
# mung the wildcards to re's
$filespec =~ s/\*/\.\*/;
$filespec =~ s/\?/\./;
# cascade if enabled
if( $cascade != 0 ) {
#recurse the directory list
opendir(DIR, $dir) || die "Error: Can't read directory $dir: $!";
@dirnames = grep { -d "$dir/$_" } readdir(DIR);
closedir DIR;
foreach $dirs( @dirnames ) {
@filenames = (@filenames, &Expand_Files( "$dirs/$filespec" ));
} # end foreach
} # end if
opendir(DIR, $dir) || die "Error: Can't read directory $dir: $!";
@filenames = (@filenames, grep { /$filespec/ && -f "$dir/$_" } readdir(DIR));
closedir DIR;
foreach $file( @filenames ) {
$file = "$dir/$file";
} # end foreach
} else {
# Just a single file, return it
@filenames = ( $spec )
} # end if
return( @filenames );
} # end Expand_Files
#------------------------------
# Sub: NextDate
#
# Arguments: $startDate (as yyyy-mm-dd)
#
# Returns: $nextDate
#
# Description: increments the $startDate variable to obtain $nextDate
#
#------------------------------
sub NextDate {
my ( $startDate ) = @_;
my ( $year, $mon, $mday, $time, $nextDate );
$startDate =~ /(\w+)\W+(\w+)\W+(\w+)/;
$year = $1 - 1900;
$mon = $2 - 1;
$mday = $3;
$time = timelocal(0, 0, 12, $mday, $mon, $year); # Noon
$time += 86400; # Tomorrow
($mday, $mon, $year) = (localtime $time)[3..5];
$nextDate = sprintf "%04d-%02d-%02d", 1900 + $year, $mon + 1, $mday;
}