Xref: feenix.metronet.com comp.infosystems.gopher:4460
Newsgroups: comp.infosystems.gopher
Path: feenix.metronet.com!news.utdallas.edu!tamsun.tamu.edu!cs.utexas.edu!uwm.edu!cs.uwp.edu!datta
From: [email protected] (David Datta)
Subject: new Log stats program
Message-ID: <[email protected]>
Organization: University of Wisconsin - Parkside
X-Newsreader: TIN [version 1.2 PL1]
Date: Sat, 14 Aug 1993 06:42:50 GMT
Lines: 350

I have just finished modifying the xferstats program that analyzes data
from wuarchive's ftpd program to also handle Gopher logfiles.

It only does stats on Gopher file transfers (it doesn't do stats on
directory requests.) It turns out, the music archives gets about 18,000
files sent out a day.

This xferstats is quite different from the one that is distributed,
I take the blame for all bugs and I appreciate comments.

Edit the defaults at the top for your sites....

(Don't forget to clip the signature from the end...)

----CUT HERE -----------------------------------------------------------------
#! /usr/local/bin/perl
# ---------------------------------------------------------------------------
#
# USAGE: xferstats <options>
#
# OPTIONS:
#       -f <filename>   Use <filename> for the log file
#       -r              include real users
#       -a              include anonymous users
#       -h              include report on hourly traffic
#       -d              include report on domain traffic
#       -t              report on total traffic by section
#       -D <domain>     report only on traffic from <domain>
#       -l <depth>      Depth of path detail for sections
#       -s <section>    Section to report on, For example: -s /pub will report
#                               only on paths under /pub
#       -g              Process a Gopher file, not an FTP file
#
# ---------------------------------------------------------------------------

# edit the next two lines to customize for your domain.
# This will allow your domain to be seperated in the domain listing.

$mydom1 = "uwp";
$mydom2 = "edu";

# edit the next line to customize for your default log file
$usage_file = "/usr/adm/xferlog";
$gopherlog = "/usr/adm/gopherlog";

# Edit the following lines for default report settings.
# Entries defined here will be over-ridden by the command line.

$opt_h = 1;
$opt_d = 0;
$opt_t = 1;
$opt_l = 3;

require 'getopts.pl';
&Getopts('f:rahdD:l:s:g');

if ($opt_r) { $real = 1;}
if ($opt_a) { $anon = 1;}
if ($real == 0 && $anon == 0) { $anon = 1; }
if ($opt_g) {$usage_file = $gopherlog;}
if ($opt_f) {$usage_file = $opt_f;}

open (LOG,$usage_file) || die "Error opening usage log file: $usage_file\n";

if ($opt_g) {print "This report is for GOPHER transfers.\n";}
else {print "This report is for FTP transfers.\n";}

if ($opt_D) {print "Transfer Totals include the '$opt_D' domain only.\n";
            print "All other domains are filtered out for this report.\n\n";}

if ($opt_s) {print "Transfer Totals include the '$opt_s' section only.\n";
            print "All other sections are filtered out for this report.\n\n";}

line: while (<LOG>) {

  @line = split;
  if (!$opt_g) {
       next if ($#line != 16);
       next if (!$anon && $line[12] eq "a");
       next if (!$real && $line[12] eq "r");
       }

  $daytime = substr($_, 0, 10) . substr($_, 19, 5);
  $time = substr($_,11,2);

  if ($opt_g) {
       if ( $line[9] eq "range") {
           $fsize=$line[12]-$line[10];
           $fname=$line[15];
       } else {
           {next if ( $line[9] ne "file");}
           $fname=$line[10];
           $fsize = (-s "/usr/ftp/".$line[10]);
       }}
  else {
       $fname=$line[8];
       $fsize = $line[7];
       if ($fname eq "\.") { $fname = "/unreadable/filename";}
  }

  next if (substr($fname,0,length("$opt_s")) ne "$opt_s");
  $fname = substr($fname,length("$opt_s"));
  @path = split(/\//, $fname);

# Things in the top-level directory are assumed to be informational files

  if ($#path == 1)
     { $pathkey = "Index/Informational Files"; }
     else {
       $pathkey = "";
       for ($i=1; $i <= $#path-1 && $i <= $opt_l;$i++) {
               $pathkey = $pathkey . "/" . $path[$i];
               }
       }

  if ($opt_g)
       {$afield=$line[7];}
  else
       {$afield=$line[6];}

  $afield =~ tr/A-Z/a-z/;

  @address = split(/\./, $afield);

  $domain = $address[$#address];
  if ($domain eq "$mydom2" && $address[$#address-1] eq "$mydom1")
     { $domain = $mydom1 . "." . $mydom2; }
  if ( int($address[0]) > 0 || $#address < 2 )
     { $domain = "unresolved"; }

  $count = 1;
  if ($opt_D)
    {if (substr($domain,0,length("$opt_D")) eq "$opt_D" ) { $count = 1;} else
    {$count = 0;}
    }


  if ($count) {


  $xferfiles++;                                # total files sent
  $xfertfiles++;                               # total files sent
  $xferfiles{$daytime}++;                      # files per day
  $groupfiles{$pathkey}++;                     # per-group accesses
  $domainfiles{$domain}++;

  $xfersecs{$daytime}    += $line[5];          # xmit seconds per day
  $domainsecs{$domain}   += $line[5];          # xmit seconds for domain
  $xferbytes{$daytime}   += $fsize;            # bytes per day
  $domainbytes{$domain}  += $line[7];          # xmit bytes to domain
  $xferbytes             += $fsize;            # total bytes sent
  $groupbytes{$pathkey}  += $fsize;            # per-group bytes sent

  $xfertfiles{$time}++;                        # files per hour
  $xfertsecs{$time}      += $line[5];          # xmit seconds per hour
  $xfertbytes{$time}     += $fsize;            # bytes per hour
  $xfertbytes            += $fsize;            # total bytes sent
  }
}
close LOG;

@syslist = keys(systemfiles);
@dates = sort datecompare keys(xferbytes);

if ($xferfiles == 0) {die "There was no data to process.\n";}


print "TOTALS FOR SUMMARY PERIOD ", $dates[0], " TO ", $dates[$#dates], "\n\n";
printf ("Files Transmitted During Summary Period  %12.0f\n", $xferfiles);
printf ("Bytes Transmitted During Summary Period  %12.0f\n", $xferbytes);
printf ("Systems Using Archives                   %12.0f\n\n", $#syslist+1);

printf ("Average Files Transmitted Daily          %12.0f\n",
  $xferfiles / ($#dates + 1));
printf ("Average Bytes Transmitted Daily          %12.0f\n",
  $xferbytes / ($#dates + 1));

format top1 =

Daily Transmission Statistics

                Number Of    Number of    Average    Percent Of  Percent Of
    Date        Files Sent  Bytes  Sent  Xmit  Rate  Files Sent  Bytes Sent
---------------  ----------  -----------  ----------  ----------  ----------


format line1 =
@<<<<<<<<<<<<<<  @>>>>>>>>>  @>>>>>>>>>>  @>>>>>>>>>  @>>>>>>>    @>>>>>>>
$date,           $nfiles,    $nbytes,     $avgrate,   $pctfiles,  $pctbytes


$^ = top1;
$~ = line1;

foreach $date ( sort datecompare keys(xferbytes) ) {

  $nfiles   = $xferfiles{$date};
  $nbytes   = $xferbytes{$date};
  $avgrate  = sprintf("%5.1f KB/s", $xferbytes{$date}/$xfersecs{$date}/1000);
  $pctfiles = sprintf("%8.2f", 100*$xferfiles{$date} / $xferfiles);
  $pctbytes = sprintf("%8.2f", 100*$xferbytes{$date} / $xferbytes);
  write;
}

if ($opt_t) {
format top2 =

Total Transfers from each Archive Section (By bytes)

                                                ---- Percent  Of ----
    Archive Section      Files Sent Bytes Sent  Files Sent Bytes Sent
------------------------- ---------- ----------- ---------- ----------


format line2 =
@<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>>>>> @>>>>>>>>>> @>>>>>>>   @>>>>>>>
$section,                 $files,    $bytes,     $pctfiles, $pctbytes


$| = 1;
$- = 0;
$^ = top2;
$~ = line2;

foreach $section ( sort bytecompare keys(groupfiles) ) {

  $files = $groupfiles{$section};
  $bytes = $groupbytes{$section};
  $pctbytes = sprintf("%8.2f", 100 * $groupbytes{$section} / $xferbytes);
  $pctfiles = sprintf("%8.2f", 100 * $groupfiles{$section} / $xferfiles);
  write;

}

if ( $xferfiles < 1 ) { $xferfiles = 1; }
if ( $xferbytes < 1 ) { $xferbytes = 1; }
}

if ($opt_d) {
format top3 =

Total Transfer Amount By Domain

            Number Of    Number of     Average    Percent Of  Percent Of
Domain Name  Files Sent   Bytes Sent   Xmit  Rate  Files Sent  Bytes Sent
-----------  ----------  ------------  ----------  ----------  ----------


format line3 =
@<<<<<<<<<<  @>>>>>>>>>  @>>>>>>>>>>>  @>>>>>>>>>  @>>>>>>>    @>>>>>>>
$domain,     $files,     $bytes,       $avgrate,   $pctfiles,  $pctbytes


$- = 0;
$^ = top3;
$~ = line3;

foreach $domain ( sort domnamcompare keys(domainfiles) ) {

  if ( $domainsecs{$domain} < 1 ) { $domainsecs{$domain} = 1; }

  $files = $domainfiles{$domain};
  $bytes = $domainbytes{$domain};
  $avgrate  = sprintf("%5.1f KB/s",
                 $domainbytes{$domain}/$domainsecs{$domain}/1000);
  $pctfiles = sprintf("%8.2f", 100 * $domainfiles{$domain} / $xferfiles);
  $pctbytes = sprintf("%8.2f", 100 * $domainbytes{$domain} / $xferbytes);
  write;

}

print "\n";
print "These figures only reflect ANONYMOUS FTP transfers.  There are many\n";
print "sites which mount the archives via NFS, and those transfers are not\n";
print "logged and reported by this program.\n\n";

}

if ($opt_h) {

format top8 =

Hourly Transmission Statistics

                Number Of    Number of    Average    Percent Of  Percent Of
    Time        Files Sent  Bytes  Sent  Xmit  Rate  Files Sent  Bytes Sent
---------------  ----------  -----------  ----------  ----------  ----------


format line8 =
@<<<<<<<<<<<<<<  @>>>>>>>>>  @>>>>>>>>>>  @>>>>>>>>>  @>>>>>>>    @>>>>>>>
$time,           $nfiles,    $nbytes,     $avgrate,   $pctfiles,  $pctbytes



$| = 1;
$- = 0;
$^ = top8;
$~ = line8;

foreach $time ( sort keys(xfertbytes) ) {

  $nfiles   = $xfertfiles{$time};
  $nbytes   = $xfertbytes{$time};
  $avgrate  = sprintf("%5.1f KB/s", $xfertbytes{$time}/$xfertsecs{$time}/1000);
  $pctfiles = sprintf("%8.2f", 100*$xfertfiles{$time} / $xferfiles);
  $pctbytes = sprintf("%8.2f", 100*$xfertbytes{$time} / $xferbytes);
  write;
}
}
exit(0);

sub datecompare {

  $date1  = substr($a, 11, 4) * 4800;
  $date2  = substr($b, 11, 4) * 4800;
  $date1 += index("JanFebMarAprMayJunJulAugSepOctNovDec",substr($a, 4, 3))*400;
  $date2 += index("JanFebMarAprMayJunJulAugSepOctNovDec",substr($b, 4, 3))*400;
  $date1 += substr($a, 8, 2);
  $date2 += substr($b, 8, 2);
  $date1 - $date2;

}

sub domnamcompare {

  $sdiff = length($a) - length($b);
  ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0;

}

sub bytecompare {

  $bdiff = $groupbytes{$b} - $groupbytes{$a};
  ($bdiff < 0) ? -1 : ($bdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0;

}

sub faccompare {

  $fdiff = $fac{$b} - $fac{$a};
  ($fdiff < 0) ? -1 : ($fdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0;

}

----CUT HERE -----------------------------------------------------------------
--
- Dave [email protected]

       "Information wants to be free." - Elektric Music