/* analog.c 0.9beta3 */
/* Please read the README, or
http://www.statslab.cam.ac.uk/~sret1/analog/ */
/* Update history:
/* to 0.8: initial program, just default options */
/* 0.89: added commandline args, and many new options */
/* 0.89beta2: solved problem with over-long log entries */
/* 0.89beta3: Hash tables introduced for all categories except domains */
/* Now count number of distinct hosts */
/* Included all successes (searches are stripped down to ?) */
/* 0.89beta4: Understands searches even when the URL gets very long */
/* 0.9beta: Introduced HOSTURL, and subdomain analysis. */
/* Fixed bug that required logfile to be in chronological order. */
/* Translated it into ANSI C (from K & R) */
/* 0.9beta2: Wrote my own scanf; made whole program 30% faster! */
/* Included 304's in successful, not redirected requests. */
/* 0.9beta3: Domains implemented as non-clashing hash table. */
/* Calculated reqs/bytes per day correctly for short periods. */
#define VERSION "0.9beta3" /* the version number of this program */
/* #define PROF /* include this line for home-made profiling */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sys/time.h>
#include <unistd.h>
#define TRUE (1)
#define FALSE (0)
#define ON (TRUE)
#define OFF (FALSE)
#define OK (0)
#define ERR (-1)
#define BYREQUESTS (0) /* ways of sorting */
#define BYBYTES (1)
#define ALPHABETICAL (2)
#define ALL (2) /* see PAGELINKS in analhead.h */
#define COMMON (2) /* three types of input lines */
#define NCSAOLD (1)
#define CORRUPT (0)
typedef int flag;
#define MAX(a,b) (((a)>(b))?(a):(b))
#define MIN(a,b) (((a)<(b))?(a):(b))
#define FIRSTMONTH (23891) /* earliest poss month = 12 * year + month */
/* Nov 1990; very conservative */
#define DOMHASHSIZE (1354) /* = 2 * 26^2 + 2 by description of domain algorithm */
#include "analhead.h"
/* (Global) commandline variables */
/* Defaults for all these can be set in analhead.h; they are explained there */
char logfile[MAXSTRINGLENGTH];
char domainsfile[MAXSTRINGLENGTH];
flag mq, dq, hq, oq, iq, rq, sq, q7; /* whether we want each type of report */
int monthlyunit, dailyunit, hourlyunit; /* the size of the mark in the graphical displays */
int domfloor, domsortby, dirfloor, dirsortby;
int dirlevel, reqfloor, reqsortby, pagewidth;
char markchar;
char hostname[MAXSTRINGLENGTH], hosturl[MAXSTRINGLENGTH];
int kq; /* default PAGELINKS */
void commandline(int argc, char **argv)
{
int i;
/* First put in the default values */
strcpy(logfile, LOGFILE);
strcpy(domainsfile, DOMAINSFILE);
mq = MONTHLY;
dq = DAILY;
hq = HOURLY;
oq = DOMAIN;
iq = DIRECTORY;
rq = REQUEST;
sq = COUNTHOSTS;
q7 = LASTSEVEN;
monthlyunit = MONTHLYUNIT;
hourlyunit = HOURLYUNIT;
dailyunit = DAILYUNIT;
domsortby = DOMSORTBY;
dirsortby = DIRSORTBY;
reqsortby = REQSORTBY;
if (domsortby == BYBYTES)
domfloor = MIN_DOM_BYTES;
else
domfloor = MIN_DOM_REQS;
if (reqsortby == BYBYTES)
reqfloor = MIN_URL_BYTES;
else
reqfloor = MIN_URL_REQS;
if (dirsortby == BYBYTES)
dirfloor = MIN_DIR_BYTES;
else
dirfloor = MIN_DIR_REQS;
dirlevel = DIRLEVEL;
pagewidth = PAGEWIDTH;
markchar = MARKCHAR;
strcpy(hostname, HOSTNAME);
strcpy(hosturl, HOSTURL);
kq = PAGELINKS;
/* now read the arguments */
for (i = 1; i < argc; i++) {
if (argv[i][0] != '+' && argv[i][0] != '-')
strcpy(logfile, argv[i]);
else switch (argv[i][1]) {
case '\0': /* read stdin */
strcpy(logfile, "stdin");
break;
case '7': /* stats for last 7 days */
if (argv[i][0] == '-')
q7 = OFF;
else
q7 = ON;
break;
case 'c': /* markchar */
markchar = argv[i][2];
break;
case 'd': /* daily summary */
if (argv[i][0] == '-')
dq = OFF;
else {
dq = ON;
if (argv[i][2] != '\0')
dailyunit = atoi(argv[i] + 2);
}
break;
case 'f': /* domains file */
strcpy(domainsfile, argv[i] + 2);
break;
case 'h': /* hourly summary */
if (argv[i][0] == '-')
hq = OFF;
else {
hq = ON;
if (argv[i][2] != '\0')
hourlyunit = atoi(argv[i] + 2);
}
break;
case 'i': /* directory report */
if (argv[i][0] == '-')
iq = OFF;
else {
iq = ON;
switch (argv[i][2]) {
case 'a':
dirsortby = ALPHABETICAL;
if (argv[i][3] == '\0')
dirfloor = MIN_DIR_REQS;
else
dirfloor = atoi(argv[i] + 3);
break;
case 'b':
dirsortby = BYBYTES;
if (argv[i][3] == '\0')
dirfloor = MIN_DIR_BYTES;
else
dirfloor = atoi(argv[i] + 3);
break;
case 'r':
dirsortby = BYREQUESTS;
if (argv[i][3] == '\0')
dirfloor = MIN_DIR_REQS;
else
dirfloor = atoi(argv[i] + 3);
break;
default:
if (argv[i][2] != '\0')
dirfloor = atoi(argv[i] + 2);
}
}
break;
case 'k': /* link to pages in req. report? */
if (argv[i][0] == '-')
kq = OFF;
else {
kq = ON;
if (argv[i][2] == 'k')
kq = ALL;
}
break;
case 'l': /* 'level' of dir report */
dirlevel = atoi(argv[i] + 2);
break;
case 'm': /* monthly report */
if (argv[i][0] == '-')
mq = OFF;
else {
mq = ON;
if (argv[i][2] == '\0')
monthlyunit = atoi(argv[i] + 2);
}
break;
case 'n': /* hostname */
strcpy(hostname, argv[i] + 2);
break;
case 'o': /* domain report */
if (argv[i][0] == '-')
oq = OFF;
else {
oq = ON;
switch (argv[i][2]) {
case 'a':
domsortby = ALPHABETICAL;
if (argv[i][3] == '\0')
domfloor = MIN_DOM_REQS;
else
domfloor = atoi(argv[i] + 3);
break;
case 'b':
domsortby = BYBYTES;
if (argv[i][3] == '\0')
domfloor = MIN_DOM_BYTES;
else
domfloor = atoi(argv[i] + 3);
break;
case 'r':
domsortby = BYREQUESTS;
if (argv[i][3] == '\0')
domfloor = MIN_DOM_REQS;
else
domfloor = atoi(argv[i] + 3);
break;
default:
if (argv[i][2] != '\0')
domfloor = atoi(argv[i] + 2);
}
}
break;
case 'r': /* request report */
if (argv[i][0] == '-')
rq = OFF;
else {
rq = ON;
switch (argv[i][2]) {
case 'a':
reqsortby = ALPHABETICAL;
if (argv[i][3] == '\0')
reqfloor = MIN_URL_REQS;
else
reqfloor = atoi(argv[i] + 3);
break;
case 'b':
reqsortby = BYBYTES;
if (argv[i][3] == '\0')
reqfloor = MIN_URL_BYTES;
else
reqfloor = atoi(argv[i] + 3);
break;
case 'r':
reqsortby = BYREQUESTS;
if (argv[i][3] == '\0')
reqfloor = MIN_URL_REQS;
else
reqfloor = atoi(argv[i] + 3);
break;
default:
if (argv[i][2] != '\0')
reqfloor = atoi(argv[i] + 2);
}
}
break;
case 's': /* count hosts? */
if (argv[i][0] == '-')
sq = OFF;
else
sq = ON;
break;
case 'u': /* host URL */
strcpy(hosturl, argv[i] + 2);
break;
case 'w': /* pagewidth */
pagewidth = atoi(argv[i] + 2);
if (pagewidth < MINPAGEWIDTH || pagewidth > MAXPAGEWIDTH) {
fprintf(stderr, "Warning: at option %s, page width should be between %d and %d\n",
MINPAGEWIDTH, MAXPAGEWIDTH);
fprintf(stderr, "Resetting to default value of %d\n", PAGEWIDTH);
pagewidth = PAGEWIDTH;
}
break;
default:
fprintf(stderr, "Warning: Ignoring unknown option %s: see README for correct usage\n", argv[i]);
fprintf(stderr, "or go to
http://www.statslab.cam.ac.uk/~sret1/analog/\n");
}
}
}
int strtomonth(char month[3]) /* convert 3 letter month abbrev. to int */
{
int monthno;
switch (month[0]) {
case 'A':
switch (month[1]) {
case 'p':
monthno = 4;
break;
case 'u':
monthno = 8;
break;
}
break;
case 'D':
monthno = 12;
break;
case 'F':
monthno = 2;
break;
case 'J':
switch (month[1]) {
case 'a':
monthno = 1;
break;
case 'u':
switch (month[2]) {
case 'l':
monthno = 7;
break;
case 'n':
monthno = 6;
break;
}
break;
}
break;
case 'M':
switch (month[2]) {
case 'r':
monthno = 3;
break;
case 'y':
monthno = 5;
break;
}
break;
case 'N':
monthno = 11;
break;
case 'O':
monthno = 10;
break;
case 'S':
monthno = 9;
break;
}
return(monthno);
}
int dateoffset[13] = {0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
int dayofdate(int date, int monthno, int year) /* day of week of given date */
{
int x;
x = dateoffset[monthno] + date + year + (year / 4) + 5;
/* every fourth year until 2099 is a leap year */
return(x % 7);
}
int minsbetween(int date1, int monthno1, int year1, int hr1, int min1,
int date2, int monthno2, int year2, int hr2, int min2)
{
int x, y;
x = dateoffset[monthno1] + date1 + year1 * 365 + (year1 / 4);
y = dateoffset[monthno2] + date2 + year2 * 365 + (year2 / 4);
return((y - x) * 1440 + (hr2 - hr1) * 60 + (min2 - min1));
}
void int3printf(int x) /* print +ve integer with spaces every 3 digits */
{
int i = 1;
while (x / 1000 >= i) /* i * 1000 might overflow */
i *= 1000; /* find how big x is, so we know where to start */
printf("%d", (x / i) % 1000); /* now run down again, printing each clump */
for ( i /= 1000; i >= 1; i /= 1000)
printf(" %03d", (x / i) % 1000);
}
void double3printf(double x) /* the same, only with +ve doubles */
{
double i = 1;
while (x / 1000 >= i)
i *= 1000;
printf("%d", ((int)(x / i)) % 1000);
for ( i /= 1000; i >= 1; i /= 1000)
printf(" %03d", ((int)(x / i)) % 1000);
}
/* Define functions to replace scanf, which is very slow */
int sscanf_common(char *inputline, char hostn[MAXSTRINGLENGTH], int *date, char month[4], int *year,
int *hr, int *min, char filename[MAXSTRINGLENGTH], int *code, char bytestr[16])
{
char *cin = inputline; /* the character we are reading */
char *cout; /* where we are putting it */
int i;
/* read in hostname */
i = 0;
for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
if (*cin != ' ')
return(0);
*cout = '\0';
/* scan until next '[' */
for (cin++; *cin != '[' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(1);
/* read in date */
cin++;
if (!isdigit(*cin))
return(1);
else
*date = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(1);
else
*date += (*cin - '0');
/* read in month */
cin++;
if (*cin != '/')
return(2);
cin++;
cout = month;
for (i = 0; i < 3 && *cin != '\0'; i++) {
*cout = *cin;
cout++;
cin++;
}
if (*cin == '\0')
return(2);
*cout = '\0';
/* read in year */
if (*cin != '/')
return(3);
cin++;
if (!isdigit(*cin))
return(3);
else
*year = 1000 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(3);
else
*year += 100 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(3);
else
*year += 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(3);
else
*year += (*cin - '0');
/* read in hour */
cin++;
if (*cin != ':')
return(4);
cin++;
if (!isdigit(*cin))
return(4);
else
*hr = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(4);
else
*hr += (*cin - '0');
/* read in minute */
cin++;
if (*cin != ':')
return(5);
cin++;
if (!isdigit(*cin))
return(5);
else
*min = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(5);
else
*min += (*cin - '0');
/* ignore second & timezone; so scan to next '"' */
for (cin++; *cin != '"' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(6);
/* ignore method; so read to next ' ' */
for (cin++; *cin != ' ' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(6);
/* read in filename */
cin++;
i = 0;
for (cout = filename; *cin != ' ' && *cin != '\0' && *cin != '"' && *cin != '?' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
if (*cin != ' ' && *cin != '"' && *cin != '?')
return(6);
*cout = '\0';
/* scan to next " */
for ( ; *cin != '"' && *cin != '\0' ; cin++)
;
if (*cin == '\0')
return(7);
/* read in return code; always 3 digits */
cin++;
if (*cin != ' ')
return(7);
cin++;
if (!isdigit(*cin))
return(7);
else
*code = 100 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(7);
else
*code += 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(7);
else
*code += (*cin - '0');
/* finally, read in bytestr */
cin++;
if (*cin != ' ')
return (8);
cin++;
i = 0;
for (cout = bytestr; *cin != ' ' && *cin != '\n' && i < 16; cin++) {
*cout = *cin;
cout++;
}
*cout = '\0';
return(9);
}
int sscanf_ncsaold(char *inputline, char hostn[MAXSTRINGLENGTH], char month[4], int *date,
int *hr, int *min, int *year, char filename[MAXSTRINGLENGTH])
{
char *cin = inputline; /* the character we are reading */
char *cout; /* where we are putting it */
int i;
/* read in hostname */
i = 0;
for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
if (*cin != ' ')
return(0);
*cout = '\0';
/* scan until next '[' */
for (cin++; *cin != '[' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(1);
/* ignore day of week, so scan until next ' ' */
for (cin++; *cin != ' ' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(1);
/* read in month */
cin++;
cout = month;
for (i = 0; i < 3 && *cin != '\0'; i++) {
*cout = *cin;
cout++;
cin++;
}
if (*cin == '\0')
return(2);
*cout = '\0';
/* read in date */
if (*cin != ' ')
return(3);
cin++;
if (!isdigit(*cin) && *cin != ' ')
return(3);
else if (*cin != ' ')
*date = 10 * (*cin - '0');
else
*date = 0;
cin++;
if (!isdigit(*cin))
return(3);
else
*date += (*cin - '0');
/* read in hour */
cin++;
if (*cin != ' ')
return(4);
cin++;
if (!isdigit(*cin))
return(4);
else
*hr = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(4);
else
*hr += (*cin - '0');
/* read in minute */
cin++;
if (*cin != ':')
return(5);
cin++;
if (!isdigit(*cin))
return(5);
else
*min = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(5);
else
*min += (*cin - '0');
/* ignore second; skip to year and read it */
cin++;
cin++;
cin++;
cin++;
cin++;
if (!isdigit(*cin))
return(6);
else
*year = 1000 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(6);
else
*year += 100 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(6);
else
*year += 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(6);
else
*year += (*cin - '0');
/* ignore method, so skip to second space */
for (cin++; *cin != ' ' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(7);
for (cin++; *cin != ' ' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(7);
/* finally, read in the filename */
cin++;
i = 0;
for (cout = filename; *cin != ' ' && *cin != '\n' && *cin != '?' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
if (i = MAXSTRINGLENGTH - 1)
return(7);
*cout = '\0';
return (8);
}
int main(int argc, char **argv)
{
FILE *lf, *df; /* logfile, domains file */
int rc; /* return code */
char inputline[MAXLINELENGTH]; /* a particular input line */
int linetype; /* COMMON, NCSAOLD or CORRUPT */
char hostn[MAXSTRINGLENGTH];
int day, date, monthno, year, monthcode, hr, min;
int firstdate, firstmonthno, firstyear, firstmonthcode, firsthr, firstmin;
int lastdate, lastmonthno, lastyear, lastmonthcode, lasthr, lastmin;
int timecode; /* monthcode * 60 * 24 * 31 + date * 60 * 24 + hr * 60 + min */
int firsttimecode = 1000000000, lasttimecode = 0; /* first and last stats analysed */
int totalmins; /* between first and last entries analysed */
int olddate, oldmonthno, oldyear, oldhr, oldmin, oldsec, oldmonthcode;
int oldtimecode; /* a week before present */
char oldmonth[4];
char dayname[7][4] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
char monthname[13][4] = {"", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
"Aug", "Sep", "Oct", "Nov", "Dec"};
char month[4];
char filename[MAXSTRINGLENGTH];
int code;
double bytes; /* long is not big enough; double has more sig. figs,
and copes with overflow automatically. */
char bytestr[16];
int dirsufflength; /* the length of DIRSUFFIX */
int bq = ON; /* Count bytes? On until we find a line in old format. */
int fieldwidth; /* width we require to print certain integers in */
int monthlyreq[MAXMONTHS]; /* # requests in each month */
int maxmonthlyreq = 0; /* the maximum of those */
int dailyreq[7];
int maxdailyreq = 0;
int hourlyreq[24];
int maxhourlyreq = 0;
int total_fail_reqs = 0; /* the number of failed requests so far (400s, 500s) */
int total_fail_reqs7 = 0; /* in last 7 days */
int total_succ_reqs = 0; /* the number of successful requests so far (200s) */
int total_succ_reqs7 = 0;
int total_other_reqs = 0; /* redirects (300s) */
int total_other_reqs7 = 0;
double total_bytes = 0;
double total_bytes7 = 0;
int corrupt_lines = 0; /* the number of corrupt lines in the logfile */
/* (These tend to be URLs with spaces or quotes in */
int no_urls = 0; /* the number of distinct URLs found so far */
int no_urls7 = 0; /* the number used in the last 7 days */
int url_max_reqs = 0; /* the maximum number of requests for any URL */
struct url { /* define a structure containing information about a URL ... */
char name[MAXSTRINGLENGTH]; /* the name of the URL */
int reqs; /* the number of requests it has received */
double bytes; /* the number of bytes transferred due to it */
flag last7; /* whether it has been used in the last 7 days */
struct url *next; /* the next url in the list */
} *urlhead[URLHASHSIZE], *urlsorthead, *urlp, *urlp2, *urllastp, *urlnextp;
/* ... and set up some pointers to such structures */
int no_dirs = 0; /* directories ditto */
int dir_max_reqs = 0;
struct dir {
char name[MAXSTRINGLENGTH];
int reqs;
double bytes;
struct dir *next;
} *dirhead[DIRHASHSIZE], *dirsorthead, *dirp, *dirp2, *dirlastp, *dirnextp;
int no_hosts = 0; /* hosts ditto */
int no_hosts7 = 0; /* the number of all hosts in the last 7 days */
int no_new_hosts7 = 0; /* the number of new hosts in the last 7 days */
struct host {
char name[MAXSTRINGLENGTH];
flag last7;
struct host *next;
} *hosthead[HOSTHASHSIZE], *hostp;
struct domain { /* and domains */
char id[256]; /* can be as long as host for domains like 'statslab.cam.ac.uk' */
char name[256]; /* the geographical location of the domain */
int reqs;
double bytes;
int nexti; /* the index of the next domain alphabetically */
struct domain *next; /* the next subdomain of the current domain */
} *domainhead[DOMHASHSIZE], *domp, *domp2, *domlastp;
int firstdom, domnextj; /* for sorting */
int dom_max_reqs = 0; /* the max. of domain_reqs */
char domainname[MAXSTRINGLENGTH]; /* a domain for a particular request */
flag last7q; /* are we now in the last 7 days? */
int magicnumber; /* the magic hash number of a file, host etc. */
int onlist; /* which list we are on, while sorting */
double bytepc;
int bytepc1, bytepc2; /* the % of bytes for a particular URL */
struct timeval starttime, stoptime;
long oldtime;
struct timezone tzp;
char starttimestr[26];
char oldtimestr[26];
int i, j, tempint; /* useful bits and bobs */
double tempdouble;
flag tempflag;
char *tempp;
char tempstr[MAXSTRINGLENGTH], tempstr2[MAXSTRINGLENGTH];
#ifdef PROF
long timein = 0, timeout = 0, timescan1 = 0, timescan2 = 0;
long timescan = 0, timehost = 0, timesort = 0, timedomsort = 0;
long timegets = 0, timedom = 0, timedir = 0, timereq = 0, timeother = 0, timetot = 0;
struct timeval lasttime, thistime;
#endif
/* Initialisation */
gettimeofday(&starttime, &tzp);
strcpy(starttimestr, ctime(&starttime.tv_sec));
commandline(argc, argv);
if (q7) {
oldtime = starttime.tv_sec - 604800; /* seconds in a week */
strcpy(oldtimestr, ctime(&oldtime));
oldyear = (oldtimestr[23] - '0') + (oldtimestr[22] - '0') * 10
+ (oldtimestr[21] - '0') * 100 + (oldtimestr[20] - '0') * 1000;
oldmin = (oldtimestr[15] - '0') + (oldtimestr[14] - '0') * 10;
oldhr = (oldtimestr[12] - '0') + (oldtimestr[11] - '0') * 10;
olddate = (oldtimestr[9] - '0');
if (oldtimestr[8] != ' ')
olddate += (oldtimestr[8] - '0') * 10;
oldtimestr[7] = '\0';
strcpy(oldmonth, oldtimestr + 4);
oldmonthno = strtomonth(oldmonth);
oldmonthcode = 12 * oldyear + oldmonthno - FIRSTMONTH;
oldtimecode = oldmonthcode * 44640 + olddate * 1440 + oldhr * 60 + oldmin;
}
dirsufflength = strlen(DIRSUFFIX);
if (rq) {
for (i = 0; i < URLHASHSIZE; i++) {
urlhead[i] = (struct url *) malloc(sizeof(struct url));
urlhead[i] -> name[0] = '\0';
}
}
if (iq) {
for (i = 0; i < DIRHASHSIZE; i++) {
dirhead[i] = (struct dir *) malloc(sizeof(struct dir));
dirhead[i] -> name[0] = '\0';
}
}
if (sq) {
for (i = 0; i < HOSTHASHSIZE; i++) {
hosthead[i] = (struct host *) malloc(sizeof(struct host));
hosthead[i] -> name[0] = '\0';
}
}
if (oq) {
for (i = 0; i < DOMHASHSIZE; i++) {
domainhead[i] = (struct domain *) malloc(sizeof(struct domain));
domainhead[i] -> name[0] = '\0';
}
df = fopen(domainsfile, "r"); /* calculate all domains */
if (df == NULL) {
fprintf(stderr, "Warning: Failed to open domains file %s: will not construct domain report\n", domainsfile);
oq = OFF;
}
}
/* We put the domains in the following order. aa = 0, ab = 2, ...,
ba = 52, ... Domains with more than two letters go in the spaces;
co = 134, com = 135, cp = 136. We assume that there are no two long
domain names with the same two initial letters. Finally zz = 1350,
zzspam = 1351, Unknown = 1352, Numerical = 1353. Each domain contains
a 'nexti' element to show which is the next domain that occurs in the
domains file. */
if (oq) {
domp = domainhead[DOMHASHSIZE - 2];
strcpy(domp -> id, "*UNK");
strcpy(domp -> name, "unknown");
domp -> reqs = 0;
domp -> bytes = 0;
domp -> next = (struct domain *) malloc(sizeof(struct domain));
domp -> next -> name[0] = '\0';
domp = domainhead[DOMHASHSIZE - 1];
strcpy(domp -> id, "*NUM");
strcpy(domp -> name, "numerical hosts");
domp -> reqs = 0;
domp -> bytes = 0;
domp -> next = (struct domain *) malloc(sizeof(struct domain));
domp -> next -> name[0] = '\0';
domp2 = domp;
domp = domp -> next;
while ((rc = fscanf(df, "%255s %255[^\n]", tempstr, tempstr2)) != EOF) {
if (rc == 2) {
if ((!isdigit(tempstr[0])) && strchr(tempstr, '.') == NULL) { /* new domain */
magicnumber = (tempstr[0] - 'a') * 52 + (tempstr[1] - 'a') * 2 + (tempstr[2] != '\0');
if (magicnumber < 0 || magicnumber > DOMHASHSIZE - 3)
fprintf(stderr,"Warning: Ignoring corrupt line in domains file\n");
domp = domainhead[magicnumber];
strcpy(domp -> id, tempstr);
strcpy(domp -> name, tempstr2);
domp -> reqs = 0;
domp -> bytes = 0;
domp -> next = (struct domain *) malloc(sizeof(struct domain));
domp -> next -> name[0] = '\0';
domp2 -> nexti = magicnumber; /* domp2 is the last domain we looked at */
domp2 = domp;
domp = domp -> next;
}
else { /* subdomain of last domain */
strcpy(domp -> id, tempstr);
strcpy(domp -> name, tempstr2);
domp -> reqs = 0;
domp -> bytes = 0;
domp -> next = (struct domain *) malloc(sizeof(struct domain));
domp -> next -> name[0] = '\0';
domp = domp -> next;
}
}
else { /* rc != 2 */
fprintf(stderr,"Warning: Ignoring corrupt line in domains file\n");
}
}
domp2 -> nexti = -1; /* marker; last domain has no subsequent one */
fclose(df);
} /* end if (oq) */
if (strcmp(logfile, "stdin") == 0)
lf = stdin;
else {
lf = fopen(logfile, "r");
if (lf == NULL) {
fprintf(stderr, "Error: Failed to open log file %s: exiting\n", logfile);
exit(ERR);
}
}
#ifdef PROF
gettimeofday(&thistime, &tzp);
timein += thistime.tv_usec - starttime.tv_usec + (thistime.tv_sec - starttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
/* Now start scanning */
while(fgets(inputline, MAXLINELENGTH, lf) != NULL) {
#ifdef PROF
gettimeofday(&thistime, &tzp);
timegets += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
linetype = CORRUPT; /* paranoia :) */
if (sscanf_common(inputline, hostn, &date, month, &year, &hr, &min, filename, &code, bytestr) == 9) {
linetype = COMMON;
#ifdef PROF
gettimeofday(&thistime, &tzp);
timescan1 += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
}
else if (sscanf_ncsaold(inputline, hostn, month, &date, &hr, &min, &year, filename) == 7) {
linetype = NCSAOLD;
if (bq) {
bq = OFF;
if ((domsortby == BYBYTES && oq) || (reqsortby == BYBYTES && rq) ||
(dirsortby == BYBYTES && iq)) {
fprintf(stderr, "Warning: Logfile contains old-style lines with no bytes data:\n");
fprintf(stderr, " Sorting will be by requests, not bytes\n");
if (domsortby == BYBYTES && oq) {
domsortby = BYREQUESTS;
domfloor = MIN_DOM_REQS;
}
if (reqsortby == BYBYTES && rq) {
reqsortby = BYREQUESTS;
reqfloor = MIN_URL_REQS;
}
if (dirsortby == BYBYTES && iq) {
dirsortby = BYREQUESTS;
dirfloor = MIN_DIR_REQS;
}
}
}
}
#ifdef PROF
gettimeofday(&thistime, &tzp);
timescan += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
if (linetype != CORRUPT) {
monthno = strtomonth(month);
monthcode = 12 * year + monthno - FIRSTMONTH;
timecode = monthcode * 44640 + date * 1440 + hr * 60 + min;
firsttimecode = MIN(firsttimecode, timecode);
lasttimecode = MAX(lasttimecode, timecode);
/* Are we in the last 7 days? Check this every time in case */
/* logfile is not in chronological order */
if (q7) {
last7q = FALSE;
if (timecode > oldtimecode)
last7q = TRUE;
}
bytes = atof(bytestr);
total_bytes += bytes;
if (last7q)
total_bytes7 += bytes;
if (code <= 299 || code == 304) { /* successes */
day = dayofdate(date, monthno , year);
++total_succ_reqs;
if (last7q)
++total_succ_reqs7;
/* date cataloguing */
if (monthcode >= 0 && monthcode < MAXMONTHS)
++monthlyreq[monthcode]; /* ignore months earlier than first month we saw */
++dailyreq[day]; /* these are so little work, it's easier just to */
++hourlyreq[hr]; /* do them rather than to check mq, dq and hq */
#ifdef PROF
gettimeofday(&thistime, &tzp);
timeother += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
/* Now for the request report */
if (rq || iq) {
if (strcmp(filename + MAX(strlen(filename) - dirsufflength, 0), DIRSUFFIX) == 0)
/* if it ends with the DIRSUFFIX (i.e. index.html), strip it */
filename[strlen(filename) - dirsufflength] = '\0';
if (filename[1] == '%' && filename[2] == '7' && (filename[3] == 'E' || filename[3] == 'e')) {
filename[1] = '~'; /* change %7E and %7e to ~ */
strcpy(filename + 2, filename + 4);
}
}
if (rq) {
/* First calculate filename's "magic number" */
magicnumber = 0;
for (i = 0; filename[i] != '\0'; i++) {
magicnumber += magicnumber + filename[i];
while (magicnumber >= URLHASHSIZE)
magicnumber -= URLHASHSIZE;
}
/* now look through the magicnumber'th list for that URL */
tempflag = TRUE; /* tempflag means 'still need to look' */
urlp = (urlhead[magicnumber]);
while (urlp -> name[0] != '\0' && tempflag) {
if (strcmp(urlp -> name, filename) == 0) { /* then done */
(urlp -> reqs)++;
urlp -> bytes += bytes;
if (last7q && !(urlp -> last7)) {
no_urls7++;
urlp -> last7 = TRUE;
}
tempflag = FALSE;
}
else { /* look at the next one */
urlp = urlp -> next;
}
}
if (tempflag) { /* reached the end of the list without success; new URL */
no_urls++;
strcpy(urlp -> name, filename);
urlp -> reqs = 1;
urlp -> bytes = bytes;
if (last7q) {
no_urls7++;
urlp -> last7 = TRUE;
}
else
urlp -> last7 = FALSE;
urlp -> next = (struct url *) malloc(sizeof(struct url));
urlp -> next -> name[0] = '\0';
}
} /* end if (rq) */
#ifdef PROF
gettimeofday(&thistime, &tzp);
timereq += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
/* Now for the directory report. This is just the same as the request report,
but with the filename truncated. */
if (iq) {
i = 0;
for (j = 0; j < dirlevel; j++) {
if (filename[i] == '/')
i++;
for ( ; filename[i] != '/' && filename[i] != '\0'; i++)
; /* run through to level'th slash, if any */
}
if (filename[i] == '\0') /* not j levels; run back */
for ( i-- ; filename[i] != '/'; i--)
;
/* Now filename[i] == '/' */
filename[i + 1] = '\0'; /* Terminate it there */
if (strcmp(filename, "/") == 0)
strcpy(filename, "root directory");
/* Now look through the directories so far, as above. */
magicnumber = 0;
for (i = 0; filename[i] != '\0'; i++) {
magicnumber += magicnumber + filename[i];
while (magicnumber >= DIRHASHSIZE)
magicnumber -= DIRHASHSIZE;
}
tempflag = TRUE;
dirp = (dirhead[magicnumber]);
while (dirp -> name[0] != '\0' && tempflag) {
if (strcmp(dirp -> name, filename) == 0) {
(dirp -> reqs)++;
dirp -> bytes += bytes;
tempflag = FALSE;
}
else {
dirp = dirp -> next;
}
}
if (tempflag) {
no_dirs++;
strcpy(dirp -> name, filename);
dirp -> reqs = 1;
dirp -> bytes = bytes;
dirp -> next = (struct dir *) malloc(sizeof(struct dir));
dirp -> next -> name[0] = '\0';
}
} /* end if (iq) */
#ifdef PROF
gettimeofday(&thistime, &tzp);
timedir += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
/* Now for the hostname count. Again, just the same as above, but we
don't measure bytes and requests for each host. */
for (i = strlen(hostn) - 1; i >= 0; i--)
hostn[i] = tolower(hostn[i]);
if (sq) {
magicnumber = 0;
for (i = 0; hostn[i] != '\0'; i++) {
magicnumber += magicnumber + hostn[i];
while (magicnumber >= HOSTHASHSIZE)
magicnumber -= HOSTHASHSIZE;
}
tempflag = TRUE;
hostp = (hosthead[magicnumber]);
while (hostp -> name[0] != '\0' && tempflag) {
if (strcmp(hostp -> name, hostn) == 0) {
if (last7q && !(hostp -> last7)) {
no_hosts7++;
hostp -> last7 = TRUE;
}
tempflag = FALSE;
}
else {
hostp = hostp -> next;
}
}
if (tempflag) {
no_hosts++;
strcpy(hostp -> name, hostn);
if (last7q) {
no_hosts7++;
no_new_hosts7++;
hostp -> last7 = TRUE;
}
else
hostp -> last7 = FALSE;
hostp -> next = (struct host *) malloc(sizeof(struct host));
hostp -> next -> name[0] = '\0';
}
} /* end if (sq) */
#ifdef PROF
gettimeofday(&thistime, &tzp);
timehost += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
/* Now the domain report. This is different because we already know */
/* all domains, so there need be no clashes in the hash table. */
if (oq) {
/* first change hostn into a domain */
for (i = strlen(hostn) - 1; hostn[i] != '.' && i > 0; i--)
; /* run back to final .; or initial char if hostn is corrupt */
if (i == 0) {
magicnumber = DOMHASHSIZE - 2; /* representing unknown hosts */
}
else if (hostn[i + 1] <= '9' && hostn[i + 1] >= '0') {
magicnumber = DOMHASHSIZE - 1; /* representing numerical domains */
}
else {
strcpy(domainname, hostn + i + 1);
magicnumber = (domainname[0] - 'a') * 52 + (domainname[1] - 'a') * 2 + (domainname[2] != '\0');
if (magicnumber < 0 || magicnumber > DOMHASHSIZE - 3)
magicnumber = DOMHASHSIZE - 2;
else if (strcmp(domainhead[magicnumber] -> id, domainname) != 0)
magicnumber = DOMHASHSIZE - 2;
}
(domainhead[magicnumber] -> reqs)++;
domainhead[magicnumber] -> bytes += bytes;
/* now run through the rest of the list corresponding to that domain,
and check each one against the hostn */
domp = domainhead[magicnumber] -> next;
while (domp -> name[0] != '\0') {
if (magicnumber != DOMHASHSIZE - 1) {
strcpy(tempstr, ".");
strcat(tempstr, domp -> id);
}
else if (magicnumber == DOMHASHSIZE - 1) {
strcpy(tempstr, domp -> id);
strcat(tempstr, ".");
}
if ((strcmp(domp -> id, hostn) == 0) ||
(magicnumber != DOMHASHSIZE - 1 && strcmp(tempstr, hostn + MAX(strlen(hostn) - strlen(tempstr), 0)) == 0) ||
(magicnumber == DOMHASHSIZE - 1 && strncmp(tempstr, hostn, strlen(tempstr)) == 0)) {
(domp -> reqs)++;
domp -> bytes += bytes;
}
domp = domp -> next;
}
} /* end if (oq) */
#ifdef PROF
gettimeofday(&thistime, &tzp);
timedom += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
} /* end if code <= 299 || code == 304 */
else if (code >= 400) {
++total_fail_reqs;
if (last7q)
++total_fail_reqs7;
#ifdef PROF
gettimeofday(&thistime, &tzp);
timeother += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
}
else { /* code 300's (not 304): redirects */
++total_other_reqs;
if (last7q)
++total_other_reqs7;
#ifdef PROF
gettimeofday(&thistime, &tzp);
timeother += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
}
} /* end if linetype != CORRUPT */
else { /* line is corrupt */
++corrupt_lines;
if (strchr(inputline, '\n') == NULL) /* line corrupt by being too long; */
fscanf(lf, "%*[^\n]"); /* read to end of line */
#ifdef PROF
gettimeofday(&thistime, &tzp);
timeother += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
}
}
fclose(lf);
/* OUTPUT */
printf("<html>\n<head><title>Web Server Statistics for %s</title></head>\n", hostname);
if (hosturl[0] == '-')
printf("<body><a NAME=\"Top\">\n<h1>Web Server Statistics for %s</h1>\n\n", hostname);
else {
printf("<body><a NAME=\"Top\">\n<h1>Web Server Statistics for <a HREF=\"%s\">%s</a></h1>\n\n",
hosturl, hostname);
}
printf("<hr>\nProgram started at %c%c%c-%c%c-%c%c%c-%c%c%c%c %c%c:%c%c (all times local).\n",
starttimestr[0], starttimestr[1], starttimestr[2],
starttimestr[8], starttimestr[9], starttimestr[4], starttimestr[5], starttimestr[6],
starttimestr[20], starttimestr[21], starttimestr[22], starttimestr[23],
starttimestr[11], starttimestr[12], starttimestr[14], starttimestr[15]);
if (firsttimecode > oldtimecode)
q7 = OFF;
if (total_succ_reqs > 0) {
firstmonthcode = firsttimecode / 44640;
firsttimecode -= firstmonthcode * 44640;
firstmonthcode += FIRSTMONTH;
firstyear = firstmonthcode / 12;
firstmonthno = firstmonthcode % 12;
firstdate = firsttimecode / 1440;
firsttimecode -= firstdate * 1440;
firsthr = firsttimecode / 60;
firstmin = firsttimecode % 60;
lastmonthcode = lasttimecode / 44640;
lasttimecode -= lastmonthcode * 44640;
lastmonthcode += FIRSTMONTH;
lastyear = lastmonthcode / 12;
lastmonthno = lastmonthcode % 12;
lastdate = lasttimecode / 1440;
lasttimecode -= lastdate * 1440;
lasthr = lasttimecode / 60;
lastmin = lasttimecode % 60;
totalmins = minsbetween(firstdate, firstmonthno, firstyear, firsthr, firstmin,
lastdate, lastmonthno, lastyear, lasthr, lastmin) + 1;
printf("<br>Analysed requests from %s-%d-%s-%d %02d:%02d to %s-%d-%s-%d %02d:%02d (%.1f days).\n\n",
dayname[dayofdate(firstdate, firstmonthno, firstyear)],
firstdate, monthname[firstmonthno], firstyear, firsthr, firstmin,
dayname[dayofdate(lastdate, lastmonthno, lastyear)],
lastdate, monthname[lastmonthno], lastyear, lasthr, lastmin,
(double)totalmins / 1440.0);
}
if (total_succ_reqs7 + total_fail_reqs7 + total_other_reqs7 == 0)
q7 = OFF; /* just total_bytes no good in case (!bq) */
printf("<p><b>Total completed requests:</b> ");
int3printf(total_succ_reqs);
if (q7) {
printf(" (");
int3printf(total_succ_reqs7);
printf(")");
}
printf("\n<br><b>Total failed requests:</b> ");
int3printf(total_fail_reqs);
if (q7) {
printf(" (");
int3printf(total_fail_reqs7);
printf(")");
}
printf("\n<br><b>Total redirected requests:</b> ");
int3printf(total_other_reqs);
if (q7) {
printf(" (");
int3printf(total_other_reqs7);
printf(")");
}
if (totalmins > 30) {
printf("\n<br><b>Average requests per day:</b> ");
double3printf(((double)(total_succ_reqs + total_fail_reqs + total_other_reqs - 1)) * 1440.0 / (totalmins + 0.0));
if (q7) {
printf(" (");
int3printf((total_succ_reqs7 + total_fail_reqs7 + total_other_reqs7) / 7);
printf(")");
}
}
if (rq) { /* These data are not collected o/wise */
printf("\n<br><b>Number of distinct files requested:</b> ");
int3printf(no_urls);
if (q7) {
printf(" (");
int3printf(no_urls7);
printf(")");
}
}
if (sq) {
printf("\n<br><b>Number of distinct hosts served:</b> ");
int3printf(no_hosts);
if (q7) {
printf(" (");
int3printf(no_hosts7);
printf(")");
printf("\n<br><b>Number of new hosts served in last 7 days:</b> ");
int3printf(no_new_hosts7);
}
}
printf("\n<br><b>Corrupt logfile entries:</b> ");
int3printf(corrupt_lines);
if (bq) {
printf("\n<br><b>Total bytes transferred:</b> ");
double3printf(total_bytes);
if (q7) {
printf(" (");
double3printf(total_bytes7);
printf(")");
}
if (totalmins > 30) {
printf("\n<br><b>Average bytes transferred per day:</b> ");
double3printf((total_bytes * 1440) / (totalmins + 0.0));
if (q7) {
printf(" (");
double3printf(total_bytes7 / 7.0);
printf(")");
}
}
}
if (q7)
printf("\n<br>(Figures in parentheses refer to the last 7 days).");
/* We need to check here which reports are going to appear
(empty reports do not appear even if we requested them).
We do this while sorting them. */
if (total_succ_reqs == 0) {
mq = OFF;
dq = OFF;
hq = OFF;
oq = OFF;
iq = OFF;
rq = OFF;
}
else {
if (rq) {
rq = OFF; /* turn it off unless we find a big enough one */
urlsorthead = (struct url *) malloc(sizeof(struct url));
/* build up the sort in this list */
urlsorthead -> name[0] = '\0'; /* as marker */
onlist = 0; /* the list we are on */
urlp = urlhead[0]; /* starting at list 0 */
for (i = 0; i < no_urls; i++) { /* run through all the URLs */
if (urlp -> name[0] == '\0') { /* then this member isn't a URL */
urlnextp = urlhead[++onlist]; /* so look at the next list instead */
i--; /* and don't count this one */
}
else if ((reqsortby == BYBYTES && (urlp -> bytes / (total_bytes / 10000)) < reqfloor) ||
(reqsortby != BYBYTES && urlp -> reqs < reqfloor)) { /* we don't want it */
urlnextp = urlp -> next;
}
else {
rq = ON;
url_max_reqs = MAX(urlp -> reqs, url_max_reqs);
if ((urlp -> bytes > urlsorthead -> bytes && reqsortby == BYBYTES) ||
(urlp -> reqs > urlsorthead -> reqs && reqsortby == BYREQUESTS) ||
(strcmp(urlp -> name, urlsorthead -> name) < 0 && reqsortby == ALPHABETICAL) ||
(urlsorthead -> name[0] == '\0')) {
/* if it's before the first item currently on the list, slot it in */
urlnextp = urlp -> next; /* the next one we're going to look at */
urlp -> next = urlsorthead;
urlsorthead = urlp;
}
else { /* otherwise compare with the ones so far */
tempflag = ON;
urllastp = urlsorthead;
for (urlp2 = urlsorthead -> next; urlp2 -> name[0] != '\0' && tempflag;
urlp2 = urlp2 -> next) {
if ((urlp -> bytes > urlp2 -> bytes && reqsortby == BYBYTES) ||
(urlp -> reqs > urlp2 -> reqs && reqsortby == BYREQUESTS) ||
(strcmp(urlp -> name, urlp2 -> name) < 0 && reqsortby == ALPHABETICAL)) {
/* if urlp comes before urlp2 in the chosen ordering, slot it in */
urlnextp = urlp -> next;
urlp -> next = urlp2;
urllastp -> next = urlp;
tempflag = OFF;
}
urllastp = urlp2;
}
if (tempflag) { /* we've reached the end of the list; slot it in at */
/* the end */
urlnextp = urlp -> next;
urlp -> next = urlp2;
urllastp -> next = urlp;
}
}
}
urlp = urlnextp; /* so, on to the next one */
} /* end for i */
} /* end if (rq) */
if (iq) {
iq = OFF;
dirsorthead = (struct dir *) malloc(sizeof(struct dir));
/* build up the sort in this list */
dirsorthead -> name[0] = '\0'; /* as marker */
onlist = 0; /* the list we are on */
dirp = dirhead[0]; /* starting at list 0 */
for (i = 0; i < no_dirs; i++) { /* run through all the DIRs */
if (dirp -> name[0] == '\0') { /* then this member isn't a DIR */
dirnextp = dirhead[++onlist]; /* so look at the next list instead */
i--; /* and don't count this one */
}
else if ((dirsortby == BYBYTES && (dirp -> bytes / (total_bytes / 10000)) < dirfloor) ||
(dirsortby != BYBYTES && dirp -> reqs < dirfloor)) { /* we don't want it */
dirnextp = dirp -> next;
}
else {
iq = ON;
dir_max_reqs = MAX(dirp -> reqs, dir_max_reqs);
if ((dirp -> bytes > dirsorthead -> bytes && dirsortby == BYBYTES) ||
(dirp -> reqs > dirsorthead -> reqs && dirsortby == BYREQUESTS) ||
(strcmp(dirp -> name, dirsorthead -> name) < 0 && dirsortby == ALPHABETICAL) ||
(dirsorthead -> name[0] == '\0')) {
/* if it's before the first item currently on the list, slot it in */
dirnextp = dirp -> next; /* the next one we're going to look at */
dirp -> next = dirsorthead;
dirsorthead = dirp;
}
else { /* otherwise compare with the ones so far */
tempflag = ON;
dirlastp = dirsorthead;
for (dirp2 = dirsorthead -> next; dirp2 -> name[0] != '\0' && tempflag;
dirp2 = dirp2 -> next) {
if ((dirp -> bytes > dirp2 -> bytes && dirsortby == BYBYTES) ||
(dirp -> reqs > dirp2 -> reqs && dirsortby == BYREQUESTS) ||
(strcmp(dirp -> name, dirp2 -> name) < 0 && dirsortby == ALPHABETICAL)) {
/* if dirp comes before dirp2 in the chosen ordering, slot it in */
dirnextp = dirp -> next;
dirp -> next = dirp2;
dirlastp -> next = dirp;
tempflag = OFF;
}
dirlastp = dirp2;
}
if (tempflag) { /* we've reached the end of the list; slot it in at */
/* the end */
dirnextp = dirp -> next;
dirp -> next = dirp2;
dirlastp -> next = dirp;
}
}
}
dirp = dirnextp; /* so, on to the next one */
} /* end for i */
} /* end if (iq) */
/* domain check */
if (oq) {
oq = OFF;
firstdom = DOMHASHSIZE - 2; /* start with unknown domains at front of list */
domainhead[firstdom] -> nexti = -1;
j = DOMHASHSIZE - 1; /* the domain we are on; start with numerical domains */
while (j >= 0) { /* run through all the domains */
domp = domainhead[j];
domnextj = domp -> nexti; /* the one we're going to look at after this one */
if (!((domsortby == BYBYTES && domp -> reqs == 0) ||
(domsortby == BYBYTES && (domp -> bytes / (total_bytes / 10000)) < domfloor) ||
(domsortby != BYBYTES && domp -> reqs < domfloor))) { /* else we don't want it */
oq = ON;
dom_max_reqs = MAX(domp -> reqs, dom_max_reqs);
if ((domp -> bytes > domainhead[firstdom] -> bytes && domsortby == BYBYTES) ||
(domp -> reqs > domainhead[firstdom] -> reqs && domsortby == BYREQUESTS) ||
(strcmp(domp -> name, domainhead[firstdom] -> name) < 0 && domsortby == ALPHABETICAL)) {
/* if it's before the first item currently on the list, slot it in */
domp -> nexti = firstdom;
firstdom = j;
}
else { /* otherwise compare with the ones so far */
tempflag = ON;
domlastp = domainhead[firstdom];
for (i = domainhead[firstdom] -> nexti; i >= 0 && tempflag;
i = domainhead[i] -> nexti) {
if ((domp -> bytes > domainhead[i] -> bytes && domsortby == BYBYTES) ||
(domp -> reqs > domainhead[i] -> reqs && domsortby == BYREQUESTS) ||
(strcmp(domp -> name, domainhead[i] -> name) < 0 && domsortby == ALPHABETICAL)) {
/* if domp comes before domp2 in the chosen ordering, slot it in */
domp -> nexti = i;
domlastp -> nexti = j;
tempflag = OFF;
}
domlastp = domainhead[i];
}
if (tempflag) { /* we've reached the end of the list; slot it in at */
/* the end */
domp -> nexti = -1; /* meaning, last item on the list */
domlastp -> nexti = j;
}
}
}
j = domnextj; /* so, on to the next one */
} /* end while j >= 0 */
} /* end if (oq) */
}
#ifdef PROF
gettimeofday(&thistime, &tzp);
timesort += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
if (mq || dq || hq || oq || iq || rq)
printf("\n\n<p><b>Go To</b>");
if (mq)
printf(": <a HREF=\"#Monthly\">Monthly report</a>");
if (dq)
printf(": <a HREF=\"#Daily\">Daily summary</a>");
if (hq)
printf(": <a HREF=\"#Hourly\">Hourly summary</a>");
if (oq)
printf(": <a HREF=\"#Domain\">Domain report</a>");
if (iq)
printf(": <a HREF=\"#Directory\">Directory report</a>");
if (rq)
printf(": <a HREF=\"#Request\">Request report</a>");
printf("\n");
/* Monthly requests */
if (mq) {
printf("<hr>\n<h2><a NAME=\"Monthly\">Monthly Report</h2>\n");
printf("\n\n<p>(<b>Go To</b>: <a HREF=\"#Top\">Top</a>");
if (dq)
printf(": <a HREF=\"#Daily\">Daily summary</a>");
if (hq)
printf(": <a HREF=\"#Hourly\">Hourly summary</a>");
if (oq)
printf(": <a HREF=\"#Domain\">Domain report</a>");
if (iq)
printf(": <a HREF=\"#Directory\">Directory report</a>");
if (rq)
printf(": <a HREF=\"#Request\">Request report</a>");
printf(")\n");
lastmonthcode -= FIRSTMONTH;
firstmonthcode -= FIRSTMONTH;
for (i = firstmonthcode; i <= lastmonthcode; i++)
if(monthlyreq[i] > maxmonthlyreq)
maxmonthlyreq = monthlyreq[i];
tempint = 10000;
for (fieldwidth = 5; maxmonthlyreq / tempint >= 10; fieldwidth++)
tempint *= 10; /* so f.w. is log_10(maxmonthlyreq), but at least 5 */
if (monthlyunit == 0) { /* (o/wise just use the given amount) */
monthlyunit = maxmonthlyreq * 3 / (2 * (pagewidth - fieldwidth - 12));
/* except we want a 'nice' amount, so ... */
/* (Nice amount is 1, 1.5, 2, 2.5, 3, 4, 5, 6, 8 * 10^n */
j = 0;
while (monthlyunit > 30) {
monthlyunit /= 10;
j++;
}
if (monthlyunit == 0) /* if maxmonthlyreq < 40 */
monthlyunit = 1;
else if (monthlyunit == 7)
monthlyunit = 6;
else if (monthlyunit == 9)
monthlyunit = 8;
else if (monthlyunit > 25 && monthlyunit < 30)
monthlyunit = 25;
else if (monthlyunit > 20 && monthlyunit < 25)
monthlyunit = 20;
else if (monthlyunit > 15 && monthlyunit < 20)
monthlyunit = 15;
else if (monthlyunit > 10 && monthlyunit < 15)
monthlyunit = 10;
for (i = 0; i < j; i++) {
monthlyunit *= 10;
}
}
printf("\n<p>Each <code>%c</code> represents %d request%s.\n\n<pre width=%d>\n", markchar,
monthlyunit, (monthlyunit == 1)?"":"s, or part thereof", pagewidth);
printf(" month: ");
for (i = 5; i < fieldwidth; i++)
printf(" ");
printf("#reqs\n");
printf("-------- ");
for (i = 1; i <= fieldwidth; i++)
printf("-");
printf("\n");
for(i = firstmonthcode; i <= lastmonthcode; i++) {
monthno = i + FIRSTMONTH;
year = monthno / 12;
monthno = monthno % 12;
if (monthno == 0) {
monthno = 12;
year--;
}
printf("%s %d: %*d: ", monthname[monthno], year, fieldwidth, monthlyreq[i]);
while (monthlyreq[i] > 0) {
printf("%c", markchar);
monthlyreq[i] -= monthlyunit;
}
printf("\n");
}
}
/* Daily requests */
if (dq) {
printf("</pre>\n\n<hr>\n<h2><a NAME=\"Daily\">Daily Summary</h2>\n");
printf("\n\n<p>(<b>Go To</b>: <a HREF=\"#Top\">Top</a>");
if (mq)
printf(": <a HREF=\"#Monthly\">Monthly report</a>");
if (hq)
printf(": <a HREF=\"#Hourly\">Hourly summary</a>");
if (oq)
printf(": <a HREF=\"#Domain\">Domain report</a>");
if (iq)
printf(": <a HREF=\"#Directory\">Directory report</a>");
if (rq)
printf(": <a HREF=\"#Request\">Request report</a>");
printf(")\n");
for (i = 0; i <= 6; i++)
if(dailyreq[i] > maxdailyreq)
maxdailyreq = dailyreq[i];
tempint = 10000;
for (fieldwidth = 5; maxdailyreq / tempint >= 10; fieldwidth++)
tempint *= 10; /* so f.w. is log_10(maxmonthlyreq), but at least 5 */
if (dailyunit == 0) {
dailyunit = maxdailyreq * 3 / (2 * (pagewidth - 7 - fieldwidth));
j = 0;
while (dailyunit > 20) {
dailyunit /= 10;
j++;
}
if (dailyunit == 0)
dailyunit = 1;
else if (dailyunit == 7)
dailyunit = 6;
else if (dailyunit == 9)
dailyunit = 8;
else if (dailyunit > 25 && dailyunit < 30)
dailyunit = 25;
else if (dailyunit > 20 && dailyunit < 25)
dailyunit = 20;
else if (dailyunit > 15 && dailyunit < 20)
dailyunit = 15;
else if (dailyunit > 10 && dailyunit < 15)
dailyunit = 10;
for (i = 0; i < j; i++) {
dailyunit *= 10;
}
}
printf("\n<p>Each <code>%c</code> represents %d request%s.\n\n<pre width=%d>\n", markchar,
dailyunit, (dailyunit == 1)?"":"s, or part thereof", pagewidth);
printf("day: ");
for (i = 5; i < fieldwidth; i++)
printf(" ");
printf("#reqs\n");
printf("--- ");
for(i = 1; i <= fieldwidth; i++)
printf("-");
printf("\n");
for(i = 0; i <= 6; i++) {
printf("%s: %*d: ", dayname[i], fieldwidth, dailyreq[i]);
while (dailyreq[i] > 0) {
printf("%c", markchar);
dailyreq[i] -= dailyunit;
}
printf("\n");
}
}
/* Hourly requests */
if (hq) {
printf("</pre>\n\n<hr>\n<h2><a NAME=\"Hourly\">Hourly Summary</h2>\n");
printf("\n\n<p>(<b>Go To</b>: <a HREF=\"#Top\">Top</a>");
if (mq)
printf(": <a HREF=\"#Monthly\">Monthly report</a>");
if (dq)
printf(": <a HREF=\"#Daily\">Daily summary</a>");
if (oq)
printf(": <a HREF=\"#Domain\">Domain report</a>");
if (iq)
printf(": <a HREF=\"#Directory\">Directory report</a>");
if (rq)
printf(": <a HREF=\"#Request\">Request report</a>");
printf(")\n");
for (i = 0; i <= 23; i++)
if(hourlyreq[i] > maxhourlyreq)
maxhourlyreq = hourlyreq[i];
tempint = 10000;
for (fieldwidth = 5; maxhourlyreq / tempint >= 10; fieldwidth++)
tempint *= 10; /* so f.w. is log_10(maxhourlyreq), but at least 5 */
if (hourlyunit == 0) {
hourlyunit = 3 * maxhourlyreq / ( 2 * (pagewidth - 6 - fieldwidth)); /* except we want a 'nice' amount, so ... */
j = 0;
while (hourlyunit > 10) {
hourlyunit /= 10;
j++;
}
if (hourlyunit == 0)
hourlyunit = 1;
else if (hourlyunit == 7)
hourlyunit = 6;
else if (hourlyunit == 9)
hourlyunit = 8;
else if (hourlyunit > 25 && hourlyunit < 30)
hourlyunit = 25;
else if (hourlyunit > 20 && hourlyunit < 25)
hourlyunit = 20;
else if (hourlyunit > 15 && hourlyunit < 20)
hourlyunit = 15;
else if (hourlyunit > 10 && hourlyunit < 15)
hourlyunit = 10;
for (i = 0; i < j; i++) {
hourlyunit *= 10;
}
}
printf("\n<p>Each <code>%c</code> represents %d request%s.\n\n<pre width=%d>\n", markchar,
hourlyunit, (hourlyunit == 1)?"":"s, or part thereof", pagewidth);
printf("hr: ");
for(i = 5; i < fieldwidth; i++)
printf(" ");
printf("#reqs\n");
printf("-- ");
for (i = 1; i <= fieldwidth; i++)
printf("-");
printf("\n");
for(i = 0; i <= 23; i++) {
printf("%2d: %*d: ", i, fieldwidth, hourlyreq[i]);
while (hourlyreq[i] > 0) {
printf("%c", markchar);
hourlyreq[i] -= hourlyunit;
}
printf("\n");
}
}
/* Domain report */
if (oq) {
printf("</pre>\n\n<hr>\n<h2><a NAME=\"Domain\">Domain Report</h2>\n\n");
printf("\n\n<p>(<b>Go To</b>: <a HREF=\"#Top\">Top</a>");
if (mq)
printf(": <a HREF=\"#Monthly\">Monthly report</a>");
if (dq)
printf(": <a HREF=\"#Daily\">Daily summary</a>");
if (hq)
printf(": <a HREF=\"#Hourly\">Hourly summary</a>");
if (iq)
printf(": <a HREF=\"#Directory\">Directory report</a>");
if (rq)
printf(": <a HREF=\"#Request\">Request report</a>");
printf(")\n<pre>");
#ifdef PROF
gettimeofday(&thistime, &tzp);
timeout += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
lasttime = thistime;
#endif
tempint = 10000;
for (fieldwidth = 5; dom_max_reqs / tempint >= 10; fieldwidth++)
tempint *= 10;
for (i = 5; i < fieldwidth + 1; i++)
printf(" ");
printf("#reqs : ");
if (bq)
printf(" %%bytes : ");
printf("domain\n");
for (i = 1; i <= fieldwidth + 2; i++)
printf("-");
if (bq)
printf(" --------");
printf(" ------\n");
for (i = firstdom; i >= 0; i = domainhead[i] -> nexti) {
if (!(i == DOMHASHSIZE - 2 && domainhead[i] -> reqs == 0)) {
if (bq) {
bytepc = (domainhead[i] -> bytes) / (total_bytes / 10000); /* this domain's bytes, as %age, *100 */
bytepc1 = ((int)(bytepc)) / 100; /* whole number of %bytes */
bytepc2 = ((int)(bytepc)) % 100; /* remaining 100ths. */
}
printf(" %*d : ", fieldwidth, domainhead[i] -> reqs);
if (bq && (bytepc1 > 0 || bytepc2 > 0))
printf("%3d.%02d%% : ", bytepc1, bytepc2);
else if (bq)
printf(" : ");
if (domainhead[i] -> id[0] == '*') /* flagged domains, not real domain names */
printf("[%s]\n", domainhead[i] -> name);
else if (domainhead[i] -> name[0] == '?') /* real domain, but don't print name */
printf(".%s\n", domainhead[i] -> id);
else
printf(".%s (%s)\n", domainhead[i] -> id, domainhead[i] -> name);
/* Now print its subdomains too. */
domp = domainhead[i] -> next;
while (domp -> name[0] != '\0') {
if (bq) {
bytepc = (domp -> bytes) / (total_bytes / 10000); /* this domain's bytes, as %age, *100 */
bytepc1 = ((int)(bytepc)) / 100; /* whole number of %bytes */
bytepc2 = ((int)(bytepc)) % 100; /* remaining 100ths. */
}
if (!bq)
printf("(%*d): ", fieldwidth, domp -> reqs);
else if (bytepc1 > 0 || bytepc2 > 0)
printf("(%*d):(%3d.%02d%%): ", fieldwidth, domp -> reqs, bytepc1, bytepc2);
else
printf("(%*d): : ", fieldwidth, domp -> reqs);
tempp = domp -> id;
while ((tempp = strchr(tempp, '.')) != NULL) {
printf(" "); /* print two spaces for each dot in name */
tempp++;
}
if (i == DOMHASHSIZE - 1)
printf(" "); /* and two more for numerical domains */
printf("%s", domp -> id);
if (domp -> name[0] != '?') /* print name */
printf(" (%s)", domp -> name);
printf("\n");
domp = domp -> next;
} /* end while */
}
} /* end for (i = running over domains) */
} /* end if (oq) */
/* Directory report */
if (iq) {
printf("</pre>\n\n<hr>\n<h2><a NAME=\"Directory\">Directory Report</h2>\n\n");
printf("\n\n<p>(<b>Go To</b>: <a HREF=\"#Top\">Top</a>");
if (mq)
printf(": <a HREF=\"#Monthly\">Monthly report</a>");
if (dq)
printf(": <a HREF=\"#Daily\">Daily summary</a>");
if (hq)
printf(": <a HREF=\"#Hourly\">Hourly summary</a>");
if (oq)
printf(": <a HREF=\"#Domain\">Domain report</a>");
if (rq)
printf(": <a HREF=\"#Request\">Request report</a>");
printf(")\n<pre>");
tempint = 10000;
for (fieldwidth = 5; dir_max_reqs / tempint >= 10; fieldwidth++)
tempint *= 10;
for (i = 5; i < fieldwidth; i++)
printf(" ");
printf("#reqs: ");
if (bq)
printf("%%bytes: ");
printf("directory\n");
for (i = 1; i <= fieldwidth; i++)
printf("-");
if (bq)
printf(" ------");
printf(" ---------\n");
for(dirp = dirsorthead; dirp -> name[0] != '\0'; dirp = dirp -> next) {
if (bq) {
bytepc = dirp -> bytes / (total_bytes / 10000);
bytepc1 = ((int)(bytepc)) / 100; /* whole number of %bytes */
bytepc2 = ((int)(bytepc)) % 100; /* remaining 100ths. */
}
if (!bq)
printf("%*d: %s\n", fieldwidth, dirp -> reqs, dirp -> name);
else if (bytepc1 > 0 || bytepc2 > 0)
printf("%*d:%3d.%02d%%: %s\n", fieldwidth, dirp -> reqs, bytepc1,
bytepc2, dirp -> name);
else
printf("%*d: : %s\n", fieldwidth, dirp -> reqs, dirp -> name);
}
}
/* Request report */
if (rq) {
printf("</pre>\n\n<hr>\n<h2><a NAME=\"Request\">Request Report</h2>\n\n");
printf("\n\n<p>(<b>Go To</b>: <a HREF=\"#Top\">Top</a>");
if (mq)
printf(": <a HREF=\"#Monthly\">Monthly report</a>");
if (dq)
printf(": <a HREF=\"#Daily\">Daily summary</a>");
if (hq)
printf(": <a HREF=\"#Hourly\">Hourly summary</a>");
if (oq)
printf(": <a HREF=\"#Domain\">Domain report</a>");
if (iq)
printf(": <a HREF=\"#Directory\">Directory report</a>");
printf(")\n<pre>");
tempint = 10000;
for (fieldwidth = 5; url_max_reqs / tempint >= 10; fieldwidth++)
tempint *= 10;
for (i = 5; i < fieldwidth; i++)
printf(" ");
printf("#reqs: ");
if (bq)
printf("%%bytes: ");
printf("filename\n");
for (i = 1; i <= fieldwidth; i++)
printf("-");
if (bq)
printf(" ------");
printf(" --------\n");
for(urlp = urlsorthead; urlp -> name[0] != '\0'; urlp = urlp -> next) {
if (bq) {
bytepc = urlp -> bytes / (total_bytes / 10000);
bytepc1 = ((int)(bytepc)) / 100; /* whole number of %bytes */
bytepc2 = ((int)(bytepc)) % 100; /* remaining 100ths. */
}
if ((kq == 2) || /* if we want to link to everything ... */
((strcmp(urlp -> name + MAX(strlen(urlp -> name) - 5, 0), ".html") == 0
|| strcmp(urlp -> name + MAX(strlen(urlp -> name) - 4, 0), ".htm") == 0
|| urlp -> name[strlen(urlp -> name) - 1] == '/')
&& kq == 1)) { /* ... or it is a page, and we want to link to pages */
if (!bq)
printf("%*d: <a HREF=\"%s\">%s</a>\n", fieldwidth, urlp -> reqs,
urlp -> name, urlp -> name);
else if (bytepc1 > 0 || bytepc2 > 0)
printf("%*d:%3d.%02d%%: <a HREF=\"%s\">%s</a>\n", fieldwidth,
urlp -> reqs, bytepc1, bytepc2, urlp -> name, urlp -> name);
else
printf("%*d: : <a HREF=\"%s\">%s</a>\n", fieldwidth,
urlp -> reqs, urlp -> name, urlp -> name);
}
else if (!bq)
printf("%*d: %s\n", fieldwidth, urlp -> reqs, urlp -> name);
else if (bytepc1 > 0 || bytepc2 > 0)
printf("%*d:%3d.%02d%%: %s\n", fieldwidth, urlp -> reqs, bytepc1,
bytepc2, urlp -> name);
else printf("%*d: : %s\n", fieldwidth, urlp -> reqs, urlp -> name);
}
}
/* Bit at the bottom of the page */
printf("</pre>\n\n<hr>\n<i>This analysis was produced by <a HREF=\"
http://www.statslab.cam.ac.uk/~sret1/analog/\">analog%s</a>.\n", VERSION);
gettimeofday(&stoptime, &tzp);
#ifdef PROF
timeout += stoptime.tv_usec - lasttime.tv_usec + (stoptime.tv_sec - lasttime.tv_sec) * 1000000;
#endif
stoptime.tv_sec -= starttime.tv_sec;
stoptime.tv_usec -= starttime.tv_usec; /* so now measures elapsed time */
if (total_fail_reqs + total_succ_reqs + total_other_reqs == 0) /* i.e. empty or corrupt logfile */
printf("<br><b>Running time:</b> %d minute%s, %d second%s. </i>\n",
stoptime.tv_sec / 60, ((stoptime.tv_sec / 60) == 1)?"":"s",
stoptime.tv_sec % 60, ((stoptime.tv_sec % 60) == 1)?"":"s");
else
printf("<br><b>Running time:</b> %d minute%s, %d second%s (%d microseconds per request). </i>\n",
stoptime.tv_sec / 60, ((stoptime.tv_sec / 60) == 1)?"":"s",
stoptime.tv_sec % 60, ((stoptime.tv_sec % 60) == 1)?"":"s",
(stoptime.tv_sec * 1000000 + stoptime.tv_usec) / (total_fail_reqs + total_succ_reqs + total_other_reqs));
printf("\n\n<p>(<b>Go To</b>: <a HREF=\"#Top\">Top</a>");
if (mq)
printf(": <a HREF=\"#Monthly\">Monthly report</a>");
if (dq)
printf(": <a HREF=\"#Daily\">Daily summary</a>");
if (hq)
printf(": <a HREF=\"#Hourly\">Hourly summary</a>");
if (oq)
printf(": <a HREF=\"#Domain\">Domain report</a>");
if (iq)
printf(": <a HREF=\"#Directory\">Directory report</a>");
if (rq)
printf(": <a HREF=\"#Request\">Request report</a>");
printf(")\n");
printf("\n</body>\n</html>\n");
#ifdef PROF
timetot = timein + timeout + timegets + timescan1 + timescan2 + timescan + timereq +
timedom + timedir + timehost + timedomsort + timeout + timeother;
timetot /= 100;
fprintf(stderr, "Initial time = %10d (%5.2f%%)\n", timein, (double)timein / (double)timetot);
fprintf(stderr, "fgets time = %10d (%5.2f%%)\n", timegets, (double)timegets / (double)timetot);
fprintf(stderr, "sscanf time 1 = %10d (%5.2f%%)\n", timescan1, (double)timescan1 / (double)timetot);
fprintf(stderr, "sscanf time 2 = %10d (%5.2f%%)\n", timescan2, (double)timescan2 / (double)timetot);
fprintf(stderr, "sscanf time 3 = %10d (%5.2f%%)\n", timescan, (double)timescan / (double)timetot);
fprintf(stderr, "Request time = %10d (%5.2f%%)\n", timereq, (double)timereq / (double)timetot);
fprintf(stderr, "Domain time = %10d (%5.2f%%)\n", timedom, (double)timedom / (double)timetot);
fprintf(stderr, "Directory time = %10d (%5.2f%%)\n", timedir, (double)timedir / (double)timetot);
fprintf(stderr, "Host time = %10d (%5.2f%%)\n", timehost, (double)timehost / (double)timetot);
fprintf(stderr, "Sort time = %10d (%5.2f%%)\n", timesort, (double)timesort / (double)timetot);
fprintf(stderr, "Dom. sort time = %10d (%5.2f%%)\n", timedomsort, (double)timedomsort / (double)timetot);
fprintf(stderr, "Output time = %10d (%5.2f%%)\n", timeout, (double)timeout / (double)timetot);
fprintf(stderr, "Other time = %10d (%5.2f%%)\n", timeother, (double)timeother / (double)timetot);
#endif
return(OK);
}