/*
* digest - create digests of mail messages
*
* This program uses the file "digest.info" to figure out what issue
* of the digest it is making. The format of this file is:
*
* Name of the List # leave out the word "digest"
* Host # the host where the digest lives
* From # who sends the digest out
* To # who the list is sent to
* Volume # Volume XX : Issue XXX
* Date # Day, dd Mon yy hh:mm:ss ZZZ
*
* As an example:
*
* Foobar
* intrepid.ecn.purdue.edu
* Dave Curry (The Moderator) <
[email protected]>
*
[email protected]
* Volume 1 : Issue 0
* Mon, 4 Jan 88 20:15:33 EST
*
* Make sure the "From" line includes a legitimate RFC 822 mail address.
* Make sure the issue number starts at zero; it gets incremented BEFORE
* generating each digest. Volume numbers must be incremented by hand.
* The "digest.info" file gets modified by the program after generation
* of each digest.
*
* The contents of the file "digest.head", if it exists, will be placed
* between the list of today's topics and the top of the digest. This
* can be used to put information about where to FTP archives from, etc.
*
* The file "digest.input" should contain a set of mail messages in the
* format of a UNIX mailbox. These messages will be read into memory,
* and a list of "Today's Topics" generated from the subject lines. The
* messages will then be sorted so that all the messages on the same topic
* come out together in the digest. Any message whost first word in the
* subject line is "Administrivia" will be guaranteed to come out first
* in the digest.
*
* The digest will be left in the file "digest.output". You can send it
* using the command "/usr/lib/sendmail -t < digest.output".
*
* I suggest creating the following mail aliases in /usr/lib/aliases:
*
* 1. Foobar-Digest:/path/to/the/digest.input/file
* This file must be world-writable for sendmail to modify it.
* This is the address to publish for people to send digest
* submissions to.
* 2. Foobar-Digest-Request:yourlogin
* This is the address for people to use to ask to be added
* or deleted from the list.
* 3. Foobar-List: :include:/path/to/list/of/recipients
* This is the list of people who receive the digest. It should
* be a list of addresses of the format:
*
* name, name, name, name,
* name, name, name
*
* Continuation lines should start with whitespace.
*
* There is one problem with the sorting of messages by subject line to get
* all the same topic together. The code handles elimination of "Re:"
* strings, but if someone changes the subject on you, then things get ugly.
* This shouldn't happen too often, though.
*
* Special thanks to Jon Solomon who sent me his TELECOM digest generating
* program. I swiped a lot of ideas from it in writing this one.
*
* David A. Curry
*
[email protected]
*/
#include <sys/types.h>
#include <sys/timeb.h>
#include <sys/time.h>
#include <ctype.h>
#include <stdio.h>
#define HEAD1 27 /* Field width of first third */
#define HEAD2 20 /* Field width of second third */
#define HEAD3 21 /* Field width of last third */
#define DATELEN 14 /* Amount of date to put in hdr */
#define LINELEN 70 /* Length of an average line */
#define MAXMSGS 64 /* Maximum number of msgs/digest*/
#define LINESIZE 256 /* Maximum line size */
#define LISTINFO "digest.info" /* Information file name */
#define LISTHEAD "digest.head" /* Header for top of digest */
#define LISTINPUT "digest.input" /* Input file name */
#define LISTOUTPUT "digest.output" /* Output file name */
/*
* Message structure. We read through the input file and fill one of
* these in for each message. The To, Cc, From, Date, and Subject
* point to the fields of the same names from the message. The
* "sortstring" is a copy of the subject string with all whitespace
* deleted and all letters in lower case. The messageaddr is the
* seek position in the file where the message body starts, and
* messagelength is how long the message is.
*/
struct message {
char *To;
char *Cc;
char *From;
char *Date;
char *Subject;
char *sortstring;
long messageaddr;
long messagelength;
} messages[MAXMSGS];
/*
* List structure. Contains the information from the LISTINFO file.
*/
struct listinfo {
char *Title;
char *Host;
char *From;
char *To;
char *Volline;
char *Dateline;
} listinfo;
FILE *input;
FILE *output;
int issue_number; /* The number of this issue */
int nmessages = 0; /* Number of messages */
int digestsize = 0; /* Size of digest in bytes */
char *index(), *malloc(), *safter(), *nospace(), *getline();
main()
{
/*
* Read the list information file and update the
* issue number and date strings.
*/
get_list_info();
inc_volume_and_date();
printf("Assembling %s Digest %s (%.*s)\n", listinfo.Title, listinfo.Volline, DATELEN, listinfo.Dateline);
printf("Scanning and sorting messages for topic lines.\n");
/*
* Scan the message file for subject strings and
* sort the messages to get all the messages for
* each topic next to each other.
*/
scan_messages();
sort_messages();
printf("Writing %s Digest to \"%s\"\n", listinfo.Title, LISTOUTPUT);
/*
* Print the digest header, put the messages
* in the digest.
*/
do_digest_header();
read_messages();
printf("The digest is %d characters long in %d messages.\n", digestsize, nmessages);
/*
* Put out the new list information.
*/
put_list_info();
}
/*
* get_list_info - reads in the LISTINFO file.
*/
get_list_info()
{
FILE *fp;
int incomplete;
if ((fp = fopen(LISTINFO, "r")) == NULL) {
printf("digest: cannot open \"%s\" for reading.\n", LISTINFO);
exit(1);
}
incomplete = 0;
if ((listinfo.Title = getline(fp)) == NULL)
incomplete++;
if ((listinfo.Host = getline(fp)) == NULL)
incomplete++;
if ((listinfo.From = getline(fp)) == NULL)
incomplete++;
if ((listinfo.To = getline(fp)) == NULL)
incomplete++;
if ((listinfo.Volline = getline(fp)) == NULL)
incomplete++;
if ((listinfo.Dateline = getline(fp)) == NULL)
incomplete++;
fclose(fp);
/*
* Error-check. Not too sophisicated, but then you're
* supposed to know what you're doing anyway.
*/
if (incomplete) {
printf("digest: incomplete or badly formatted \"%s\" file.\n", LISTINFO);
printf("Proper format:\n");
printf("\tTitle\n\tHost\n\tFrom\n\tTo\n\tVolline\n\tDateline\n");
exit(1);
}
}
/*
* inc_volume_and_date - update the volume/issue string and get a new date.
*/
inc_volume_and_date()
{
char *msgdate();
register char *volline, *colon;
if ((volline = malloc(strlen(listinfo.Volline)+1)) == NULL) {
printf("digest: out of memory.\n");
exit(1);
}
/*
* Volume numbers get changed by hand.
*/
issue_number = atoi(safter(listinfo.Volline, " Issue ")) + 1;
if ((colon = index(listinfo.Volline, ':')) != NULL)
*colon = NULL;
sprintf(volline, "%s: Issue %3d", listinfo.Volline, issue_number);
strcpy(listinfo.Volline, volline);
/*
* Get a new date.
*/
listinfo.Dateline = msgdate();
free(volline);
}
/*
* msgdate - produce a new date string. Format is
*
* Day, dd Mon yy hh:mm:ss tzn
*/
char *msgdate()
{
char *timezone();
struct timeb tbuf;
register struct tm *t;
struct tm *localtime();
static char datebuf[64];
char *days = "SunMonTueWedThuFriSat";
char *months = "JanFebMarAprMayJunJulAugSepOctNovDec";
ftime(&tbuf);
t = localtime(&(tbuf.time));
sprintf(datebuf, "%3.3s, %2d %3.3s %02d %02d:%02d:%02d %3.3s",
&days[3 * t->tm_wday], t->tm_mday,
&months[3 * t->tm_mon], t->tm_year, t->tm_hour,
t->tm_min, t->tm_sec, timezone(tbuf.timezone, t->tm_isdst));
return(datebuf);
}
/*
* getline - read a line into a dynamically allocated buffer.
*/
char *getline(fp)
FILE *fp;
{
register int c;
register char *str, *str_begin;
if ((str = malloc(LINESIZE)) == NULL) {
printf("digest: out of memory.\n");
exit(1);
}
str_begin = str;
while (((str - str_begin) < (LINESIZE - 1)) &&
((c = getc(fp)) != '\n') && (c != EOF))
*str++ = c;
*str++ = NULL;
if (c == EOF)
return(NULL);
return(str_begin);
}
/*
* scan_messages - scans through LISTINPUT reading in header fields
* and marking the beginning and ending of messages.
*
* NOTE: some of the code here depends on the UNIX mail header format.
* This format simply guarantees that the first line of a message's
* header will be "From blah-blah-blah". Note there is no colon
* (`:') on the "From", the real "From:" line is farther down in
* the headers.
*/
scan_messages()
{
register long n;
register char *s;
if ((input = fopen(LISTINPUT, "r")) == NULL) {
printf("digest: cannot open \"%s\" for reading.\n", LISTINPUT);
exit(1);
}
/*
* We break out of this from inside.
*/
for (;;) {
if (nmessages >= MAXMSGS) {
printf("digest: too many messages.\n");
exit(1);
}
/*
* Find the start of the next message.
*/
do {
/*
* If we hit EOF, mark the length of the
* previous message and go back.
*/
if ((s = getline(input)) == NULL) {
n = ftell(input);
n = n - messages[nmessages - 1].messageaddr;
messages[nmessages - 1].messagelength = n;
return;
}
} while (strncmp(s, "From ", 5) != 0);
/*
* If we have found another message, mark the
* length of the previous message.
*/
if (nmessages) {
n = ftell(input);
n = n - (strlen(s) + 1);
n = n - messages[nmessages - 1].messageaddr;
messages[nmessages - 1].messagelength = n;
}
/*
* Read in the headers.
*/
for (;;) {
/*
* We shouldn't hit EOF here, we should
* at least finish the headers first.
*/
if ((s = getline(input)) == NULL) {
printf("digest: \"%s\": unexpected EOF.\n", LISTINPUT);
exit(1);
}
/*
* Blank line terminates headers.
*/
if (*s == NULL)
break;
/*
* Save certain headers. We strip the
* header name and leading whitespace.
*/
if (strncmp(s, "To:", 3) == 0) {
messages[nmessages].To = nospace(safter(s, "To:"));
}
else if (strncmp(s, "Cc:", 3) == 0) {
messages[nmessages].Cc = nospace(safter(s, "Cc:"));
}
else if (strncmp(s, "From:", 5) == 0) {
messages[nmessages].From = nospace(safter(s, "From:"));
}
else if (strncmp(s, "Date:", 5) == 0) {
messages[nmessages].Date = nospace(safter(s, "Date:"));
}
else if (strncmp(s, "Subject:", 8) == 0) {
s = nospace(safter(s, "Subject:"));
/*
* We don't need the "Re:" stuff.
*/
if ((strncmp(s, "re:", 3) == 0) || (strncmp(s, "Re:", 3) == 0) ||
(strncmp(s, "RE:", 3) == 0) || (strncmp(s, "rE:", 3) == 0))
s += 3;
messages[nmessages].Subject = nospace(s);
}
else {
/*
* If we aren't saving this line,
* give the memory back.
*/
free(s);
}
}
/*
* The message starts after the header.
*/
messages[nmessages].messageaddr = ftell(input);
nmessages++;
}
}
/*
* sort_messages - convert each message's subject line to a string
* all in lower case with no whitespace. Then sort
* the messages on this string. This will group
* all the messages on the same subject together.
*/
sort_messages()
{
register int i;
extern int comp();
register char *s, *t;
for (i=0; i < nmessages; i++) {
/*
* Skip messages with no subject.
*/
if (messages[i].Subject == NULL)
continue;
s = messages[i].Subject;
if ((t = malloc(strlen(s)+1)) == NULL) {
printf("digest: out of memory.\n");
exit(1);
}
messages[i].sortstring = t;
/*
* Zap leading whitespace.
*/
s = nospace(s);
/*
* Copy the subject string into sortstring
* converting upper case to lower case and
* ignoring whitespace.
*/
while (*s) {
if ((*s == ' ') || (*s == '\t')) {
s++;
continue;
}
if (isupper(*s))
*t++ = tolower(*s);
else
*t++ = *s;
s++;
}
*t = NULL;
}
/*
* Sort 'em.
*/
qsort(messages, nmessages, sizeof(struct message), comp);
}
/*
* comp - comparison routine for qsort. Meassges with no subject go
* at the end of the digest, messages with "administrivia" as
* the subject go to the top of the digest.
*/
comp(m1, m2)
register struct message *m1, *m2;
{
int admin1, admin2;
if (m1->sortstring == NULL) {
if (m2->sortstring == NULL)
return(0);
return(1); /* no subject messages to end */
}
if (m2->sortstring == NULL)
return(-1); /* no subject messages to end */
admin1 = strncmp(m1->sortstring, "administrivia", 13);
admin2 = strncmp(m2->sortstring, "administrivia", 13);
if (admin1 == 0) {
if (admin2 == 0)
return(0);
return(-1); /* administrivia to beginning */
}
if (admin2 == 0)
return(1); /* administrivia to beginning */
return(strcmp(m1->sortstring, m2->sortstring));
}
/*
* do_digest_header - prints the digest header and mailer headers.
*/
do_digest_header()
{
FILE *fp;
char *laststr;
char buf[BUFSIZ];
char tmp[LINESIZE];
extern int comp2();
register int i, j, length;
if ((output = fopen(LISTOUTPUT, "w")) == NULL) {
printf("digest: cannot create \"%s\"\n", LISTOUTPUT);
exit(1);
}
digestsize = 0;
/*
* Mailer headers.
*/
sprintf(buf, "Date: %s\n", listinfo.Dateline);
digestsize += strlen(buf);
fputs(buf, output);
sprintf(buf, "From: %s\n", listinfo.From);
digestsize += strlen(buf);
fputs(buf, output);
sprintf(buf, "Reply-To: %s@%s\n", listinfo.Title, listinfo.Host);
digestsize += strlen(buf);
fputs(buf, output);
sprintf(buf, "Subject: %s Digest V1 #%d\n", listinfo.Title, issue_number);
digestsize += strlen(buf);
fputs(buf, output);
sprintf(buf, "To: %s\n", listinfo.To);
digestsize += strlen(buf);
fputs(buf, output);
/*
* The digest header.
*/
sprintf(tmp, "%s Digest", listinfo.Title);
sprintf(buf, "\n\n%-*.*s %-*.*s %-*.*s\n\n",
HEAD1, HEAD1, tmp,
HEAD2, DATELEN, listinfo.Dateline,
HEAD3, HEAD3, listinfo.Volline);
digestsize += strlen(buf);
fputs(buf, output);
sprintf(buf, "Today's Topics:\n");
digestsize += strlen(buf);
fputs(buf, output);
/*
* Do today's topics lines.
*/
laststr = "";
for (i=0; i < nmessages; i++) {
/*
* No topic.
*/
if (messages[i].Subject == NULL)
continue;
laststr = messages[i].sortstring;
/*
* Count the number of messages with this topic.
*/
j = 1;
while (((i + j) < nmessages) && (strcmp(laststr, messages[i+j].sortstring) == 0))
j++;
/*
* Print the topic centered on the line.
*/
if (j > 1) {
sprintf(tmp, "%s (%d msgs)", messages[i].Subject, j);
length = (LINELEN / 2) + (strlen(tmp) / 2);
sprintf(buf, "%*s\n", length, tmp);
/*
* Sort messages with same topic into their
* original arrival order.
*/
qsort(&messages[i], j, sizeof(struct message), comp2);
i += (j - 1);
}
else {
length = (LINELEN / 2) + (strlen(messages[i].Subject) / 2);
sprintf(buf, "%*s\n", length, messages[i].Subject);
}
digestsize += strlen(buf);
fputs(buf, output);
}
/*
* Read the LISTHEAD file, if there is one.
*/
if ((fp = fopen(LISTHEAD, "r")) != NULL) {
fputc('\n', output);
digestsize++;
while (fgets(buf, BUFSIZ, fp) != NULL) {
digestsize += strlen(buf);
fputs(buf, output);
}
fclose(fp);
}
/*
* Print a line of dashes.
*/
for (i=0; i < LINELEN; i++) {
putc('-', output);
digestsize++;
}
fputs("\n\n", output);
digestsize += 2;
}
/*
* comp2 - comparison routine for second qsort. This one simply compares
* messages addresses in the input file, so that we can sort the
* messages with the same topic back into the order they arrived.
*/
comp2(m1, m2)
register struct message *m1, *m2;
{
return(m1->messageaddr - m2->messageaddr);
}
/*
* read_messages - reads in the message texts and puts them in the
* digest with their headers.
*/
read_messages()
{
char buf[BUFSIZ];
register char *s, *t;
register int i, length;
for (i=0; i < nmessages; i++) {
/*
* Just in case.
*/
clearerr(input);
/*
* Put the message's headers back in.
*/
sprintf(buf, "Date: %s\n", messages[i].Date);
digestsize += strlen(buf);
fputs(buf, output);
sprintf(buf, "From: %s\n", messages[i].From);
digestsize += strlen(buf);
fputs(buf, output);
if (messages[i].Subject != NULL) {
sprintf(buf, "Subject: %s\n", messages[i].Subject);
digestsize += strlen(buf);
fputs(buf, output);
}
if (messages[i].To != NULL) {
sprintf(buf, "To: %s\n\n", messages[i].To);
digestsize += strlen(buf);
fputs(buf, output);
}
/*
* Read the message into memory. This is
* so we can zap extra blank lines.
*/
fseek(input, messages[i].messageaddr, 0);
length = messages[i].messagelength;
if ((s = malloc(length+1)) == NULL) {
printf("digest: out of memory.\n");
exit(1);
}
fread(s, 1, length, input);
/*
* Zap trailing newlines.
*/
t = s + length;
while (*--t == '\n')
length--;
*++t = NULL;
/*
* Zap leading newlines.
*/
t = s;
while (*t++ == '\n')
length--;
t--;
/*
* Write the message.
*/
digestsize += length;
fwrite(t, 1, length, output);
sprintf(buf, "\n\n------------------------------\n\n");
digestsize += strlen(buf);
fputs(buf, output);
free(s);
}
/*
* All done.
*/
sprintf(buf, "End of %s Digest\n******************************\n", listinfo.Title);
digestsize += strlen(buf);
fputs(buf, output);
fclose(output);
fclose(input);
}
/*
* put_list_info - rewrite the LISTINFO file with the new data.
*/
put_list_info()
{
FILE *fp;
char tmp[LINESIZE];
sprintf(tmp, "%s.old", LISTINFO);
if (rename(LISTINFO, tmp) < 0) {
printf("digest: cannot move old \"%s\" file, today's data lost.\n", LISTINFO);
return;
}
if ((fp = fopen(LISTINFO, "w")) == NULL) {
printf("digest: cannot create \"%s\", today's data lost.\n", LISTINFO);
return;
}
fprintf(fp, "%s\n", listinfo.Title);
fprintf(fp, "%s\n", listinfo.Host);
fprintf(fp, "%s\n", listinfo.From);
fprintf(fp, "%s\n", listinfo.To);
fprintf(fp, "%s\n", listinfo.Volline);
fprintf(fp, "%s\n", listinfo.Dateline);
fclose(fp);
unlink(tmp);
}
/*
* safter - return a pointer to the position in str which follows pat.
*/
char *safter(str, pat)
register char *str, *pat;
{
register int len;
len = strlen(pat);
while (*str) {
if (strncmp(str, pat, len) == 0) {
str += len;
return(str);
}
str++;
}
return(NULL);
}
/*
* nospace - advance s over leading whitespace, return new value.
*/
char *nospace(s)
register char *s;
{
while ((*s != NULL) && ((*s == ' ') || (*s == '\t')))
s++;
return(s);
}