#!/usr/local/bin/perl -w
########################################################################################################################
# anaml.pl - converts Analog's HTML-2 logfile reports into XML, mapping each report onto a HTML-style table #
# Version 1.0: 23.1.99 15:39 - 25.1.99 #
#
[email protected] - mail me with bugfixes, requests, marriage proposals &c. #
# #
# Do anything you want with this script, but please keep this header, and let me know if you use it for anything cool #
########################################################################################################################
use strict;
#########################################################################################################################
# #
# You'll need to set the following three variables to get things working - the rest can be adjusted according to taste #
# #
#########################################################################################################################
my $SOURCE="/export/htdocs/myreport.html"; # HTML Analog report to be processed
my $OUTDIR="/export/htdocs/anaml"; # Full path of xml output directory (can be a new directory if you have permissions)
my $LOCALE="/usr/local/analog/lang/uk.lng"; # Make sure you specify the html language file ie *h.lng (if available)
#########################################################################################################################
my $GENERATOR="Generator"; # Localize this (Analog Version/Platform)
my $HOSTNAME="Hostname"; # And this (Server Name)
my $META="Meta Report"; # And this (Analog Uber Report)
my $TABLE="REPORT"; # Fiddle with these to roll your own tags
my $TR="RR"; # Row delimiter
my $TH="RH"; # Column heading
my $TD="RD"; # Report element
my $NAME="NAME"; # Report name
my $SUMMARY="SUMMARY"; # Some reports have one line summaries (busiest month &c.)
# my $DEBUG=" BORDER=1"; # Use this to place additional (e.g formatting/debug) tags in the top level ($TABLE) tag
my $DEBUG=""; # Make sure $DEBUG is defined (even if it's just "")
#########################################################################################################################
open (SOURCE,"<$SOURCE") or die ("Can't open $SOURCE: $!"); # Open the file to be xmlised
open (LOCALE,"<$LOCALE") or die ("Can't open $LOCALE: $!"); # And the language file (needed to correctly parse reports)
undef ($/);
my $slurp=(<SOURCE>);
$/="\n";
my $index=0;
my (@lingo);
while (<LOCALE>) { # Load the language file into a list for easy access (skip comments)
next if /^#/;
chop;
$lingo[$index++]=$_;
}
my %names=( # This hash maps verbose report names onto their short version - used to name the output file
$lingo[61]=>'general', # General Summary
$lingo[62]=>'monthly', # Monthly Report
$lingo[64]=>'weekly', # Weekly Report
$lingo[67]=>'fulldaily', # Daily Report
$lingo[66]=>'daily', # Daily Summary
$lingo[69]=>'fullhourly', # Hourly Report
$lingo[70]=>'hourly', # Hourly Summary
$lingo[72]=>'quarter', # Quarter-Hour Report
$lingo[74]=>'five', # Five-Minute Report
$lingo[76]=>'host', # Host Report
$lingo[136]=>'domain', # Domain Report
$lingo[88]=>'request', # Request Report
$lingo[80]=>'directory', # Directory Report
$lingo[84]=>'filetype', # File Type Report
$lingo[144]=>'size', # File Size Report
$lingo[92]=>'redir', # Redirection Report
$lingo[96]=>'failure', # Failure Report
$lingo[100]=>'referrer', # Referrer Report
$lingo[104]=>'refsite', # Referring Site Report
$lingo[108]=>'redirref', # Redirected Referrer Report
$lingo[112]=>'failref', # Failed Referrer Report
$lingo[132]=>'fullbrowser', # Browser Report
$lingo[128]=>'browser', # Browser Summary
$lingo[116]=>'vhost', # Virtual Host Report
$lingo[120]=>'user', # User Report
$lingo[124]=>'failuser', # Failed User Report
$lingo[140]=>'status', # Status Code Report
$META=>'meta' # Meta Report
);
# Set up the hash containing the xml to be exported
my %exports;
# Now grab a couple of pertinent fields
my ($timestamp,$from,$to,$days,$generator,$runtime,$general)=(@lingo[156..158],$lingo[23],@lingo[145..146],$lingo[61]);
my @reports=split(/<hr>/i,$slurp);
my $report=$reports[0]; # The stuff at the top before the proper reports
my ($title,$hostname,$host,$stats)=($report=~/^.+?<title>(.+?)<\/title>.+?(<a href=[^>]+>([^<]+)<\/a>)<\/h1>(.+)$/is);
my $xml="<$TABLE $NAME=\"$META\" $SUMMARY=\"$title\"$DEBUG>\n";
$xml.="<$TR><$TH>$HOSTNAME</$TH><$TH>$timestamp</$TH><$TH>$from</$TH><$TH>$to</$TH><$TH>$days</$TH><$TH>$GENERATOR</$TH><$TH>$runtime</$TH></$TR>\n";
$stats.=$reports[-1]; # The stuff at the bottom - this and the stats at the top make up the Meta Report
$stats=~s/<\/i>.+$//s; # Remove the tail
$stats=~s/\n+//g; # Death to newlines
$stats=~s/ +/ /g; # Double spaces -> single spaces
($timestamp,$from,$to,$days,$generator,$runtime)=
($stats=~/^$timestamp (.+?)\.<br>$from (.+?) $to (.+?) \((.+?) $days\)\.<i>$generator (<a href.+?<\/a>).+?$runtime:<\/b> (.+?)\.$/i);
$xml.=
"<$TR><$TD>$hostname</$TD><$TD>$timestamp</$TD><$TD>$from</$TD><$TD>$to</$TD><$TD>$days</$TD><$TD>$generator</$TD><$TD>$runtime</$TD></$TR>\n";
$xml.="</$TABLE>";
$exports{$names{$META}}=$xml;
REPORT: for ($index=1;$index<$#reports;$index++) {
$report=$reports[$index];
my ($name)=($report=~m/<h2><.*?>(.+)<.*?><\/h2>/i);
my ($dope,$lastseven);
if ($name=~/$general/) { # General summary needs special treatment
if (($dope)=($report=~/<p><b>(.+)$/si)) { # No summary for the last seven days
$lastseven="";
} else {
($lastseven,$dope)=($report=~/<p>\((?!<b>)(.+?)\)\.\n<br><b>(.+)$/is); # Extract the summary and the stats
$lastseven=" $SUMMARY=\"$lastseven\.\""; # Format the L7 summary
}
$dope=~s/\n|://g; # Kill newlines and colons
$dope=~s/(<\/b>)( +)/$1/ig;
my (%summaries)=split(/(?:<br>)?<\/?b>/ig,$dope);
my ($key);
my $row="<$TR>";
$xml="<$TABLE $NAME=\"$general\"$lastseven$DEBUG>\n<$TR>";
foreach $key (keys %summaries) {
$xml.="<$TH>$key</$TH>";
$row.="<$TD>$summaries{$key}</$TD>";
}
$xml.="</$TR>\n";
$row.="</$TR>\n</$TABLE>";
$xml.=$row;
$exports{$names{$name}}=$xml;
next REPORT;
}
my ($meat,$summary)=($report=~/<pre><tt>(.+)<\/tt><\/pre>(.*)$/is); # Extract the meat and the summary (if present)
$summary=~s/\n//g; # Strip newlines from summary
$meat=~s/^\s+//g; # Strip opening spaces
$meat=~s/: +<img .+//ig; # Lose the graphics
$meat=~s/(?:: +)?\n+ */\n/gs; # Clean out double newlines and orphaned delimiters
$meat=~s/ +/ /; # Squash multiple spaces
my ($head,$body)=($meat=~/^([^\n]+)\n[^\n]+\n(.+?)\n$/s);
$head=~s/: +/<\/$TH><$TH>/g;
$head="<$TR><$TH>$head<\/$TH><\/$TR>";
$body=~s/\n/<\/$TD><\/$TR>\n<$TR><$TD>/g;
$body=~s/: +/<\/$TD><$TD>/g;
$body=~s/(\[.+?)<\/$TD><$TD>(.+?\])/$1: $2/gi; # Colons inside square brackets can be left alone
$body="<$TR><$TD>$body<\/$TD><\/$TR>";
$summary=$summary ? " $SUMMARY=\"$summary\"" : "";
$xml="<$TABLE $NAME=\"$name\"$summary$DEBUG>\n$head\n$body\n<\/$TABLE>";
$exports{$names{$name}}=$xml;
}
if (!(-e $OUTDIR)) { # If it doesn't exist, make it
mkdir ($OUTDIR,0755) or die ("Can't create $OUTDIR: $!");
}
if (!(-w $OUTDIR)) {
die "Can't write to output directory: $!"; # If it does exist, make sure it's writable
}
foreach $report (keys %exports) { # Let's go
open (XML,">$OUTDIR/$report.xml");
print XML $exports{$report};
close XML;
}
close SOURCE;
close LOCALE;
exit (0);