#!/usr/bin/perl
#
# Convert sendmail, postfix, smail, or qmail logs to common log format so
# they can be processed by standard web log processing software.
#
# Here's a sample log entry, in common log format:
#
#
[email protected] - - [31/May/1996:13:55:28 -0400] "GET /fred/" 200 541
#
# Meaning that
[email protected] sent mail to fred, on the given date, and the
# message was 541 k long.
#
# Only mail that was successfully sent is logged.
#
# Maillog2Commonlog v. 3.2 is copyright 1995, 1996 by Joey Hess.
# May be distributed under the terms of the GPL.
# (
http://www.gnu.org/copyleft/gpl.html)
#
# Usage:
# maillog2commonlog [sendmail|smail|newsmail|qmail] < logfile
#
# Note: if your smail is < version 3.2, then use smail. If it is 3.2 or
# greater, the logfile format changed, and you must use newsmail instead.
#
# Note: it only works for qmail if qmail is set up to log messages via
# syslog. Otherwise, it isn't going to find timestamps.
$logtype=shift;
lc $logtype;
if ($logtype ne 'sendmail' and $logtype ne 'smail' and $logtype ne 'newsmail'
and $logtype ne 'qmail' and $logtype ne 'postfix') {
print <<eof;
Usage:
maillog2commonlog [sendmail|smail|newsmail|qmail] < logfile
eof
exit;
}
# Enter text to use for a timezone offset here:
$tzoffset=' -0400';
# Enter a list of hosts for which we will log the actual username of the people
# sending/recieving mail. Otherwise, we will just log the hostname.
@pub_hosts=('localhost','box','box.kite.ml.org','kite','kite.ml.org',
'kite.preferred.com','kitenet.net','box.kitenet.net',
'kite.kitenet.net');
sub Log { my $message_id=shift;
print "$msg_buf{$message_id}{from} - - [$msg_buf{$message_id}{day}/$msg_buf{$message_id}{mon}/$year:$msg_buf{$message_id}{time}$tzoffset] \"GET /$msg_buf{$message_id}{to}/\" 200 $msg_buf{$message_id}{size}\n";
undef $msg_buf{$message_id};
}
sub FixEmail { $_=shift;
s/[<|>]//g;
if (m/\@(.*)$/ ne '') {
if ($pub_hosts_hash{$1}) { ($_)=m/^(.*)\@/ } else { $_=$1 }
}
return $_;
}
foreach (@pub_hosts) {
$pub_hosts_hash{$_}=1;
}
# Could use internal localtime function, but it doesn't tell century..
@_=split/ /,`date`;
$year=@_[$#_];
chomp($year);
# Now on to actually processing the logs. Sendmail and smail use very
# different file formats, sendmail is all on 1 line, smail is a muilt-
# line format that's easier to process, with \n\n seperating each multi-
# line record. And newsmail is ugly ('nuff said..)
if ($logtype eq 'smail') {
# read in a whole multi-line record at one go.
$/="\n\n";
}
if ($logtype=~m/smail/) {
# Set up numeric date to Mmm date translation table for smail.
my $i=1;
foreach (Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec) {
$date_trans[$i++]=$_;
}
}
while (<>) {
# There are 2 distinct log lines types, either mail is being recieved or sent.
# We have to combine the 2 lines to get a clear picture of a mail message.
# For qmail, there ate 3 log line types: mail recieved, delivery
# started, and delivery completed.
if ((/: from=/ ne undef) || (/\] received\n/m ne undef) ||
(/\] Received / ne undef) || (/info msg .* from/ ne undef)) { # Recieved mail.
if (/: from=/ ne undef) { # SENDMAIL and POSTFIX
($message_id,$from,$size)=m/\w+\s+\d+\s+\d+:\d+:\d+\s+\w+\s+(?:sendmail|sm-mta|postfix\/qmgr)\[\d+\]:\s+(.*?):\s+from=(.*?),\s+size=(.*?),/;
}
elsif (/\] received\n/m ne undef) { # SMAIL
($message_id,$from)=m/^\d+\/\d+\/\d+\s+\d+\:\d+\:\d+\:\s+\[(.*?)\]\s+received\n\|\s+from:\s+(.*?)\n/m;
($size)=m/\|\s+size:\s+(\d+)\s+bytes\n/m;
}
elsif (/\] Received / ne undef) { # NEWSMAIL
($message_id)=m/\[(.*?)\]/;
($from)=m/Received FROM:(.*?) /;
($size)=m/SIZE:(\d+)\s/;
}
elsif (/info msg .* from/ ne undef) { # QMAIL
($message_id,$size,$from)=m/info msg (\d+): bytes (\d+) from <(.*)>/;
}
if (!$from) { $from="unknown" }
$from=FixEmail($from);
$msg_buf{$message_id}{from}=$from;
$msg_buf{$message_id}{size}=$size;
if ($msg_buf{$message_id}{to}) { &Log($message_id) }
}
elsif ((/: to=.*stat(us)?=sent/i ne undef) || (/\] delivered\n/m ne undef) ||
(/\] Delivered / ne undef) || (/starting delivery/ ne undef)) { # The line logs mail being sent ok.
if (/: to=.*stat(us)?=sent/i ne undef) {
($mon,$day,$time,$message_id,$to)=m/(\w+)\s+(\d+)\s+(\d+:\d+:\d+)\s+\w+\s+(?:sendmail|sm-mta|postfix\/(?:local|smtp))\[.*?\]:\s+(.*?):\s+to=(.*?),/;
}
elsif (/\] delivered\n/m ne undef) {
($mon,$day,$time,$message_id,$to)=m/(\d+)\/(\d+)\/\d+\s+(\d+:\d+:\d+):\s\[(.*?)\] delivered\n\|\s+to:\s+(.*?)\n/m;
$mon=$date_trans[$mon]; # Translate to Mmm format.
}
elsif (/\] Delivered / ne undef) {
($mon,$day,$time,$message_id)=m/(\d+)\/(\d+)\/\d+\s+(\d+:\d+:\d+):\s\[(.*?)\]/;
($to)=m/TO:(.*?)\s/;
$mon=$date_trans[$mon]; # Translate to Mmm format.
}
elsif (/starting delivery/ ne undef) {
($mon,$day,$time,$message_id,$to)=m/^(\w+)\s+(\d+)\s+(\d+:\d+:\d+)\s+.*\s+msg\s+(\d+)\s+to\s+.*?\s+(.*)$/;
}
$to=FixEmail($to);
if (length($day) eq 1 ) { $day="0$day" }
$msg_buf{$message_id}{mon}=$mon;
$msg_buf{$message_id}{day}=$day;
$msg_buf{$message_id}{time}=$time;
$msg_buf{$message_id}{to}=$to;
if ($msg_buf{$message_id}{from}) { &Log($message_id) }
}
}