GopherProxy

	many improvements - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches
	git clone git://git.codemadness.org/bmf
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit f5e56cc70c117352ec5b7a7984065eaa65db162f
	parent 20a0f52d5b478e240450fd72fa3bbd3ab5c58c48
	Author: Hiltjo Posthuma <[email protected]>
	Date: Sat, 27 Oct 2018 18:37:01 +0200

	many improvements

	- update README: only flat files are supported now.
	- remove bmf.spec.in file.
	- remove unused functions.
	- remove some assert() calls.
	- dbtext_db_open() improvements:
	- check strdup call + perror message.
	- malloc + perror message.
	- check for empty directory string (just in case).
	- use snprintf (just in case).
	- free memory on failure condition.
	- dbtext_db_opentable() improvements:
	- use snprintf, error on path truncation.
	- remove unit tests (not maintained).
	- code-style improvements.

	Diffstat:
	M Makefile \| 1 -
	M README \| 5 +----
	M bmf.c \| 1 -
	D bmf.spec.in \| 64 -----------------------------…
	M config.h \| 1 -
	M dbg.c \| 14 +-------------
	M dbg.h \| 3 ---
	M dbh.c \| 58 +++++++++++++++---------------
	M dbh.h \| 46 ++++++++++++++---------------…
	M dbtext.c \| 166 +++++++++++------------------…
	M filt.c \| 8 +++-----
	M lex.c \| 45 -----------------------------…
	M lex.h \| 22 +++++++++++-----------
	M str.c \| 51 +++--------------------------…
	M str.h \| 4 ----
	M vec.c \| 215 -----------------------------…
	M vec.h \| 50 +++++++++++------------------…

	17 files changed, 144 insertions(+), 610 deletions(-)
	---
	diff --git a/Makefile b/Makefile
	@@ -50,7 +50,6 @@ dist:
	cp -f ${MAN1} ${DOC} ${HDR} \
	${SRC} ${SCRIPTS} \
	Makefile \
	- bmf.spec.in \
	"${NAME}-${VERSION}"
	# make tarball
	tar -cf - "${NAME}-${VERSION}" \| \
	diff --git a/README b/README
	@@ -13,10 +13,7 @@ This project provides features which are not available in ot…

	(1) Independence from external programs and libraries. Tokens are stored in
	memory using simple vectors which require no heavyweight external data
	-structure libraries. Multiple token database formats are supported,
	-including flat files, libdb, and mysql. Conversion between formats will
	-always be possible with the included import/export utility and flat files
	-will always remain an option.
	+structure libraries. The tokens are stored in plain-text "flat" files.

	(2) Efficient processing. Input data is parsed by a handcrafted parser
	which weighs in under 3% of the equivalent code generated by flex. No
	diff --git a/bmf.c b/bmf.c
	@@ -98,7 +98,6 @@ main(int argc, char **argv)
	err(1, "pledge");

	srand(time(NULL));
	- atexit(dump_alloc_heap);

	stats.keepers = DEF_KEEPERS;
	while ((ch = getopt(argc, argv, "NSVd:hk:m:npstv")) != EOF) {
	diff --git a/bmf.spec.in b/bmf.spec.in
	@@ -1,64 +0,0 @@
	-Name: bmf
	-Version: VERSION
	-Release: 1
	-URL: http://www.sourceforge.net/projects/bmf
	-Source0: %{name}-%{version}.tar.gz
	-License: GPL
	-Group: Applications/Internet
	-Summary: fast anti-spam filtering by Bayesian statistical analysis
	-Buildroot: %{_tmppath}/%{name}-%{version}-root
	-
	-%description
	-bmf is a Bayesian mail filter. It takes an email message or other text on
	-stdin, does a statistical check against lists of "good" and "spam" words,
	-and returns a status code indicating whether or not the message is spam.
	-bmf is efficient, small, and self-contained.
	-
	-%prep
	-
	-%setup
	-
	-%build
	-./configure --with-libdb --without-mysql
	-make
	-
	-%install
	-[ -n "$RPM_BUILD_ROOT" -a "$RPM_BUILD_ROOT" != / ] && rm -rf $RPM_BUILD_ROOT
	-make DESTDIR=${RPM_BUILD_ROOT} install
	-gzip $RPM_BUILD_ROOT/%{_mandir}//.?
	-
	-
	-%files
	-%{_bindir}/bmf
	-%{_mandir}/man1/bmf.1.gz
	-%{_bindir}/bmfconv
	-%{_mandir}/man1/bmfconv.1.gz
	-%doc README LICENSE
	-
	-%changelog
	-* Mon Oct 14 2002 Tom Marshall <[email protected]>
	-- Update to version 0.9.3.
	-
	-* Sat Oct 12 2002 Tom Marshall <[email protected]>
	-- Update to version 0.9.2.
	-
	-* Sat Oct 12 2002 Tom Marshall <[email protected]>
	-- Update to version 0.9.1.
	-
	-* Wed Oct 09 2002 Tom Marshall <[email protected]>
	-- Update to version 0.84.
	-
	-* Mon Oct 07 2002 Tom Marshall <[email protected]>
	-- Update to version 0.83.
	-
	-* Sat Oct 05 2002 Tom Marshall <[email protected]>
	-- Update to version 0.82.
	-
	-* Thu Oct 03 2002 Tom Marshall <[email protected]>
	-- Update to version 0.81.
	-- Add bmfconv.
	-- Use new configure script.
	-
	-* Fri Sep 27 2002 Tom Marshall <[email protected]>
	-- Initial build.
	-
	diff --git a/config.h b/config.h
	@@ -19,7 +19,6 @@
	#include <errno.h>
	#include <math.h>
	#include <ctype.h>
	-#include <assert.h>

	/**************************************
	* System headers
	diff --git a/dbg.c b/dbg.c
	@@ -15,7 +15,7 @@

	uint g_verbose = 0;

	-void
	+void
	verbose(int level, const char *fmt,...)
	{
	va_list v;
	@@ -26,15 +26,3 @@ verbose(int level, const char *fmt,...)
	va_end(v);
	}
	}
	-
	-void
	-dbgout(const char *fmt,...)
	-{
	- /* empty */
	-}
	-
	-void
	-dump_alloc_heap(void)
	-{
	- /* empty */
	-}
	diff --git a/dbg.h b/dbg.h
	@@ -14,7 +14,4 @@ extern uint g_verbose;

	void verbose( int level, const char* fmt, ... );

	-void dbgout( const char* fmt, ... );
	-void dump_alloc_heap( void );
	-
	#endif /* ndef _DBG_H */
	diff --git a/dbh.c b/dbh.c
	@@ -24,43 +24,43 @@
	*
	* the list referenced in the iterator must be sorted.
	*/
	-uint db_getnewcount( veciter_t* piter )
	+uint
	+db_getnewcount(veciter_t * piter)
	{
	- str_t* pstr;
	- uint count;
	- veciter_t curiter;
	- str_t* pcurstr;
	+ str_t *pstr;
	+ uint count;
	+ veciter_t curiter;
	+ str_t *pcurstr;

	- pstr = &piter->plist->pitems[piter->index];
	- count = 0;
	+ pstr = &piter->plist->pitems[piter->index];
	+ count = 0;

	- curiter.plist = piter->plist;
	- curiter.index = piter->index;
	- pcurstr = &curiter.plist->pitems[curiter.index];
	+ curiter.plist = piter->plist;
	+ curiter.index = piter->index;
	+ pcurstr = &curiter.plist->pitems[curiter.index];

	- while( curiter.index < curiter.plist->nitems && str_casecmp( pstr, pcurstr…
	- {
	- piter->index = curiter.index;
	- count = min( MAXFREQ, count + 1 );
	- veciter_next( &curiter );
	- pcurstr = &curiter.plist->pitems[curiter.index];
	- }
	+ while (curiter.index < curiter.plist->nitems && str_casecmp(pstr, pcur…
	+ piter->index = curiter.index;
	+ count = min(MAXFREQ, count + 1);
	+ veciter_next(&curiter);
	+ pcurstr = &curiter.plist->pitems[curiter.index];
	+ }

	- return count;
	+ return count;
	}

	-dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, c…
	+dbh_t *
	+dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db…
	{
	- dbh_t* pdb = NULL;
	+ dbh_t *pdb;

	- switch( dbfmt )
	- {
	- case db_text:
	- pdb = (dbh_t*)dbtext_db_open( dbhost, dbname, dbuser, dbpass );
	- break;
	- default:
	- assert(false);
	- }
	+ switch (dbfmt) {
	+ case db_text:
	+ pdb = (dbh_t *) dbtext_db_open(dbhost, dbname, dbuser, dbpass);
	+ break;
	+ default:
	+ break;
	+ }

	- return pdb;
	+ return NULL;
	}
	diff --git a/dbh.h b/dbh.h
	@@ -11,44 +11,40 @@
	#define _DBH_H

	/* database formats */
	-typedef enum
	-{
	- db_text /* flat text */
	-} dbfmt_t;
	+typedef enum {
	+ db_text /* flat text */
	+} dbfmt_t;

	/* record/field structure */
	-typedef struct _rec
	-{
	- str_t w;
	- uint n;
	-} rec_t;
	+typedef struct _rec {
	+ str_t w;
	+ uint n;
	+} rec_t;

	/* database table */
	typedef struct _dbt dbt_t;
	-struct _dbt
	-{
	- bool_t (close)(dbt_t);
	- bool_t (mergeclose)(dbt_t,vec_t*);
	- bool_t (unmergeclose)(dbt_t,vec_t*);
	- bool_t (import)(dbt_t,cpchar);
	- bool_t (export)(dbt_t,cpchar);
	- uint (getmsgcount)(dbt_t);
	- uint (getcount)(dbt_t,str_t*);
	+struct _dbt {
	+ bool_t(close) (dbt_t );
	+ bool_t(mergeclose) (dbt_t , vec_t *);
	+ bool_t(unmergeclose) (dbt_t , vec_t *);
	+ bool_t(import) (dbt_t , cpchar);
	+ bool_t(export) (dbt_t , cpchar);
	+ uint(getmsgcount) (dbt_t );
	+ uint(getcount) (dbt_t , str_t *);
	};

	/* database instance */
	typedef struct _dbh dbh_t;
	-struct _dbh
	-{
	- bool_t (close)(dbh_t);
	- dbt_t* (opentable)(dbh_t,cpchar,bool_t);
	+struct _dbh {
	+ bool_t(close) (dbh_t );
	+ dbt_t (opentable) (dbh_t *, cpchar, bool_t);
	};

	-dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, …
	+dbh_t *dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cp…

	#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n"
	#define TEXTDB_MAXLINELEN (MAXWORDLEN+32)

	-uint db_getnewcount( veciter_t* piter );
	+uint db_getnewcount(veciter_t * piter);

	-#endif /* ndef _DBH_H */
	+#endif /* ndef _DBH_H */
	diff --git a/dbtext.c b/dbtext.c
	@@ -21,78 +21,83 @@
	static void
	dbtext_table_setsize(dbttext_t * pthis, uint nsize)
	{
	- if (nsize > pthis->nalloc) {
	- uint nnewalloc;
	- rec_t *pnewitems;
	- uint n;
	-
	- nnewalloc = pthis->nalloc * 2;
	- if (nnewalloc < nsize)
	- nnewalloc = nsize;
	- pnewitems = (rec_t ) realloc(pthis->pitems, nnewalloc sizeo…
	- if (pnewitems == NULL) {
	- exit(2);
	- }
	- for (n = pthis->nitems; n < nsize; n++) {
	- str_create(&pnewitems[n].w);
	- pnewitems[n].n = 0;
	- }
	- pthis->pitems = pnewitems;
	- pthis->nalloc = nnewalloc;
	+ uint nnewalloc;
	+ rec_t *pnewitems;
	+ uint n;
	+
	+ if (nsize <= pthis->nalloc)
	+ return;
	+
	+ nnewalloc = pthis->nalloc * 2;
	+ if (nnewalloc < nsize)
	+ nnewalloc = nsize;
	+ pnewitems = (rec_t ) realloc(pthis->pitems, nnewalloc sizeof(rec_t)…
	+ if (pnewitems == NULL) {
	+ exit(2);
	}
	+ for (n = pthis->nitems; n < nsize; n++) {
	+ str_create(&pnewitems[n].w);
	+ pnewitems[n].n = 0;
	+ }
	+ pthis->pitems = pnewitems;
	+ pthis->nalloc = nnewalloc;
	}

	dbh_t *
	dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass)
	{
	- dbhtext_t *pthis;
	-
	+ dbhtext_t *pthis = NULL;
	uint dirlen;
	cpchar phome;
	struct stat st;

	- pthis = (dbhtext_t *) malloc(sizeof(dbhtext_t));
	- if (pthis == NULL) {
	+ if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
	+ perror("malloc()");
	goto bail;
	}
	+
	pthis->close = dbtext_db_close;
	pthis->opentable = dbtext_db_opentable;
	+
	if (dbname != NULL && *dbname != '\0') {
	dirlen = strlen(dbname);
	- pthis->dir = strdup(dbname);
	- if (pthis->dir[dirlen - 1] == '/') {
	- pthis->dir[dirlen - 1] = '\0';
	+ if ((pthis->dir = strdup(dbname)) == NULL) {
	+ perror("strdup()");
	+ goto bail;
	}
	+ if (dirlen && pthis->dir[dirlen - 1] == '/')
	+ pthis->dir[--dirlen] = '\0';
	} else {
	phome = getenv("HOME");
	if (phome == NULL \|\| *phome == '\0') {
	phome = ".";
	}
	- pthis->dir = (char *) malloc(strlen(phome) + 5 + 1);
	- if (pthis->dir == NULL) {
	+ dirlen = strlen(phome) + 5 + 1;
	+ if ((pthis->dir = malloc(dirlen)) == NULL)
	goto bail;
	- }
	- sprintf(pthis->dir, "%s/.bmf", phome);
	+
	+ snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
	}

	- /* ensure config directory exists */
	+ /* make sure config directory exists */
	if (stat(pthis->dir, &st) != 0) {
	- if (errno == ENOENT) {
	- if (mkdir(pthis->dir, S_IRUSR \| S_IWUSR \| S_IXUSR) != …
	- goto bail;
	- }
	- } else {
	+ if (errno != ENOENT \|\|
	+ mkdir(pthis->dir, S_IRUSR \| S_IWUSR \| S_IXUSR) != 0)
	goto bail;
	- }
	} else {
	- if (!S_ISDIR(st.st_mode)) {
	+ if (!S_ISDIR(st.st_mode))
	goto bail;
	- }
	}

	- return (dbh_t *) pthis;
	+ return (dbh_t *)pthis;

	bail:
	+ if (pthis) {
	+ if (pthis->dir)
	+ free(pthis->dir);
	+ free(pthis);
	+ }
	+
	return NULL;
	}

	@@ -114,19 +119,17 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool…

	#endif /* ndef NOLOCK */
	char szpath[PATH_MAX];
	- int flags;
	+ int flags, ret;
	struct stat st;
	-
	char *pbegin;
	char *pend;
	rec_t r;
	uint pos;

	- if (pthis->dir == NULL) {
	+ if (pthis->dir == NULL)
	goto bail;
	- }
	- ptable = (dbttext_t *) malloc(sizeof(dbttext_t));
	- if (ptable == NULL) {
	+
	+ if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
	perror("malloc()");
	goto bail;
	}
	@@ -144,13 +147,18 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool…
	ptable->nitems = 0;
	ptable->pitems = NULL;

	- sprintf(szpath, "%s/%s.txt", pthis->dir, table);
	- flags = (rdonly ? O_RDONLY \| O_CREAT : O_RDWR \| O_CREAT);
	- ptable->fd = open(szpath, flags, 0644);
	- if (ptable->fd == -1) {
	+ ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
	+ if (ret == -1 \|\| (size_t)ret >= sizeof(szpath)) {
	+ fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, tabl…
	+ goto bail;
	+ }
	+
	+ flags = O_CREAT \| (rdonly ? O_RDONLY : O_RDWR);
	+ if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
	perror("open()");
	goto bail;
	}
	+
	#ifndef NOLOCK
	memset(&lock, 0, sizeof(lock));
	lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
	@@ -307,8 +315,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
	}
	if (cmp < 0) {
	/* write existing str */
	- assert(prec->w.p != NULL && prec->w.len > 0);
	- assert(prec->w.len <= MAXWORDLEN);
	count = prec->n;
	strncpylwr(p, prec->w.p, prec->w.len);
	p += prec->w.len;
	@@ -318,10 +324,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
	pos++;
	} else if (cmp == 0) {
	/* same str, merge and write sum */
	- assert(prec->w.p != NULL && prec->w.len > 0);
	- assert(pmsgstr->p != NULL && pmsgstr->len > 0);
	- assert(prec->w.len <= MAXWORDLEN);
	- assert(pmsgstr->len <= MAXWORDLEN);
	count = db_getnewcount(&msgiter);
	count += prec->n;
	strncpylwr(p, prec->w.p, prec->w.len);
	@@ -334,8 +336,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
	pmsgstr = veciter_get(&msgiter);
	} else { /* cmp > 0 */
	/* write new str */
	- assert(pmsgstr->p != NULL && pmsgstr->len > 0);
	- assert(pmsgstr->len <= MAXWORDLEN);
	count = db_getnewcount(&msgiter);
	strncpylwr(p, pmsgstr->p, pmsgstr->len);
	p += pmsgstr->len;
	@@ -398,8 +398,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
	}
	if (cmp < 0) {
	/* write existing str */
	- assert(prec->w.p != NULL && prec->w.len > 0);
	- assert(prec->w.len <= MAXWORDLEN);
	count = prec->n;
	strncpylwr(p, prec->w.p, prec->w.len);
	p += prec->w.len;
	@@ -409,10 +407,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
	pos++;
	} else if (cmp == 0) {
	/* same str, merge and write difference */
	- assert(prec->w.p != NULL && prec->w.len > 0);
	- assert(pmsgstr->p != NULL && pmsgstr->len > 0);
	- assert(prec->w.len <= MAXWORDLEN);
	- assert(pmsgstr->len <= MAXWORDLEN);
	count = db_getnewcount(&msgiter);
	count = (prec->n > count) ? (prec->n - count) : 0;
	strncpylwr(p, prec->w.p, prec->w.len);
	@@ -425,8 +419,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
	pmsgstr = veciter_get(&msgiter);
	} else { /* cmp > 0 */
	/* this should not happen, so write with count=0 */
	- assert(pmsgstr->p != NULL && pmsgstr->len > 0);
	- assert(pmsgstr->len <= MAXWORDLEN);
	db_getnewcount(&msgiter);
	count = 0;
	strncpylwr(p, pmsgstr->p, pmsgstr->len);
	@@ -485,53 +477,9 @@ dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
	else
	lo = mid;
	}
	- assert(hi >= 0 && hi < pthis->nitems);

	if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
	return 0;
	}
	return pthis->pitems[hi].n;
	}
	-
	-#ifdef UNIT_TEST
	-int
	-main(int argc, char **argv)
	-{
	- dbh_t *pdb;
	- veciter_t iter;
	- str_t *pstr;
	- uint n;
	-
	- if (argc != 2) {
	- fprintf(stderr, "usage: %s <file>\n", argv[0]);
	- return 1;
	- }
	- for (n = 0; n < 100; n++) {
	- pdb = dbh_open("testlist", true);
	-
	- vec_first(&db, &iter);
	- while ((pstr = veciter_get(&iter)) != NULL) {
	- char buf[MAXWORDLEN + 32];
	- char *p;
	-
	- if (pstr->len > 200) {
	- fprintf(stderr, "str too long: %u chars\n", ps…
	- break;
	- }
	- p = buf;
	- strcpy(buf, "str: ");
	- p += 6;
	- memcpy(p, pstr->p, pstr->len);
	- p += pstr->len;
	- sprintf(p, " %u", pstr->count);
	- puts(buf);
	-
	- veciter_next(&iter);
	- }
	-
	- dbh_close(&db);
	- }
	-
	- return 0;
	-}
	-#endif /* def UNIT_TEST */
	diff --git a/filt.c b/filt.c
	@@ -21,7 +21,7 @@
	#define DEVIATION(n) fabs((n)-0.5f)

	/* Dump the contents of a statistics structure */
	-void
	+void
	statdump(stats_t * pstat, int fd)
	{
	char iobuf[IOBUFSIZE];
	@@ -49,7 +49,7 @@ statdump(stats_t * pstat, int fd)
	}
	}

	-void
	+void
	bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats)
	{
	veciter_t iter;
	@@ -95,8 +95,6 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, sta…
	double goodprob = goodtotal ? min(1.0, (goodness / goo…
	double spamprob = spamtotal ? min(1.0, (spamness / spa…

	- assert(goodtotal > 0 \|\| spamtotal > 0);
	-
	#ifdef NON_EQUIPROBABLE
	prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_p…
	#else
	@@ -146,7 +144,7 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, s…
	pstats->spamicity = product / (product + invproduct);
	}

	-bool_t
	+bool_t
	bvec_loadmsg(vec_t * pthis, lex_t * plex, tok_t * ptok)
	{
	str_t w;
	diff --git a/lex.c b/lex.c
	@@ -561,8 +561,6 @@ lex_nexttoken(lex_t * pthis, tok_t * ptok)
	uint len;
	uint toklen;

	- assert(pthis->pbuf != NULL);
	-
	if (pthis->pos == pthis->eom) {
	pthis->bom = pthis->pos;
	}
	@@ -637,9 +635,6 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits)
	char szbuf[256];
	bool_t in_headers = true;

	- assert(pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen);
	- assert(pthis->bom <= pthis->eom);
	-
	pthis->pos = pthis->bom;
	if (is_spam) {
	sprintf(szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests…
	@@ -682,43 +677,3 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits)
	}
	pthis->bom = pthis->eom;
	}
	-
	-#ifdef UNIT_TEST
	-
	-int
	-main(int argc, char **argv)
	-{
	- int fd;
	- lex_t lex;
	- tok_t tok;
	-
	- fd = STDIN_FILENO;
	- if (argc == 2) {
	- fd = open(argv[1], O_RDONLY);
	- }
	- lex_create(&lex);
	- if (!lex_load(&lex, fd)) {
	- fprintf(stderr, "cannot load file\n");
	- exit(1);
	- }
	- lex_nexttoken(&lex, &tok);
	- while (tok.tt != eof) {
	- char sztok[64];
	-
	- if (tok.len > MAXWORDLEN) {
	- printf("* token too long! *\n");
	- exit(1);
	- }
	- memcpy(sztok, tok.p, tok.len);
	- strlwr(sztok);
	- sztok[tok.len] = '\0';
	- printf("get_token: %d '%s'\n", tok.tt, sztok);
	-
	- lex_nexttoken(&lex, &tok);
	- }
	-
	- lex_destroy(&lex);
	- return 0;
	-}
	-
	-#endif /* def UNIT_TEST */
	diff --git a/lex.h b/lex.h
	@@ -14,23 +14,23 @@ typedef enum { from, eof, word } toktype_t;

	typedef struct _tok
	{
	- toktype_t tt; /* token type */
	- char* p;
	- uint len;
	+ toktype_t tt; /* token type */
	+ char *p;
	+ uint len;
	} tok_t;

	typedef enum { envelope, hdrs, body } msgsec_t;

	typedef struct _lex
	{
	- mbox_t mboxtype;
	- msgsec_t section; /* current section (envelope, headers, body) */
	- uint pos; /* current position */
	- uint bom; /* beginning of message */
	- uint eom; /* end of current message (start of next) */
	- uint lineend; /* line end (actually, start of next line) */
	- uint buflen; /* length of buffer */
	- char* pbuf;
	+ mbox_t mboxtype;
	+ msgsec_t section; /* current section (envelope, headers, bod…
	+ uint pos; /* current position */
	+ uint bom; /* beginning of message */
	+ uint eom; /* end of current message (start of ne…
	+ uint lineend; /* line end (actually, start of next l…
	+ uint buflen; /* length of buffer */
	+ char *pbuf;
	} lex_t;

	void lex_create ( lex_t* plex, mbox_t mboxtype );
	diff --git a/str.c b/str.c
	@@ -12,23 +12,6 @@
	#include "str.h"

	void
	-strlwr(char *s)
	-{
	- while (*s != '\0') {
	- s = tolower(s);
	- s++;
	- }
	-}
	-
	-void
	-strcpylwr(char d, const char s)
	-{
	- while (*s != '\0') {
	- d++ = tolower(s++);
	- }
	-}
	-
	-void
	strncpylwr(char d, const char s, int n)
	{
	while (n--) {
	@@ -37,46 +20,20 @@ strncpylwr(char d, const char s, int n)
	}

	void
	-str_create(str_t * pstr)
	+str_create(str_t *pstr)
	{
	pstr->p = NULL;
	pstr->len = 0;
	}

	-void
	-str_destroy(str_t * pstr)
	-{
	- /* empty */
	-}
	-
	-int
	-str_cmp(const str_t * pthis, const str_t * pother)
	-{
	- uint minlen = min(pthis->len, pother->len);
	- int cmp;
	-
	- assert(pthis->p != NULL && pother->p != NULL && minlen != 0);
	-
	- cmp = strncmp(pthis->p, pother->p, minlen);
	-
	- if (cmp == 0 && pthis->len != pother->len) {
	- cmp = (pthis->len < pother->len) ? -1 : 1;
	- }
	- return cmp;
	-}
	-
	int
	str_casecmp(const str_t * pthis, const str_t * pother)
	{
	- uint minlen = min(pthis->len, pother->len);
	int cmp;

	- assert(pthis->p != NULL && pother->p != NULL && minlen != 0);
	-
	- cmp = strncasecmp(pthis->p, pother->p, minlen);
	-
	- if (cmp == 0 && pthis->len != pother->len) {
	+ cmp = strncasecmp(pthis->p, pother->p, min(pthis->len, pother->len));
	+ if (cmp == 0 && pthis->len != pother->len)
	cmp = (pthis->len < pother->len) ? -1 : 1;
	- }
	+
	return cmp;
	}
	diff --git a/str.h b/str.h
	@@ -11,8 +11,6 @@
	#define _STR_H

	/* a couple of generic string functions... */
	-void strlwr( char* s );
	-void strcpylwr( char* d, const char* s );
	void strncpylwr( char* d, const char* s, int n );

	typedef struct _str
	@@ -22,9 +20,7 @@ typedef struct _str
	} str_t;

	void str_create ( str_t* pthis );
	-void str_destroy( str_t* pthis );

	-int str_cmp ( const str_t* pthis, const str_t* pother );
	int str_casecmp( const str_t* pthis, const str_t* pother );

	#endif /* ndef _STR_H */
	diff --git a/vec.c b/vec.c
	@@ -61,55 +61,20 @@ vec_setsize(vec_t * pthis, uint nsize)
	}

	void
	-vec_addhead(vec_t * pthis, str_t * pstr)
	-{
	- assert(pstr->p != NULL && pstr->len > 0);
	-
	- vec_setsize(pthis, pthis->nitems + 1);
	- memmove(&pthis->pitems[1], &pthis->pitems[0], pthis->nitems * sizeof(s…
	- pthis->pitems[0] = *pstr;
	- pthis->nitems++;
	-}
	-
	-void
	vec_addtail(vec_t * pthis, str_t * pstr)
	{
	- assert(pstr->p != NULL && pstr->len > 0);
	-
	vec_setsize(pthis, pthis->nitems + 1);
	pthis->pitems[pthis->nitems] = *pstr;
	pthis->nitems++;
	}

	void
	-vec_delhead(vec_t * pthis)
	-{
	- assert(pthis->nitems > 0);
	- pthis->nitems--;
	- memmove(&pthis->pitems[0], &pthis->pitems[1], pthis->nitems * sizeof(s…
	-}
	-
	-void
	-vec_deltail(vec_t * pthis)
	-{
	- assert(pthis->nitems > 0);
	- pthis->nitems--;
	-}
	-
	-void
	vec_first(vec_t * pthis, veciter_t * piter)
	{
	piter->plist = pthis;
	piter->index = 0;
	}

	-void
	-vec_last(vec_t * pthis, veciter_t * piter)
	-{
	- piter->plist = pthis;
	- piter->index = pthis->nitems;
	-}
	-
	/*****************************************************************************
	* sorted vector
	*/
	@@ -121,66 +86,6 @@ svec_compare(const void p1, const void p2)
	}

	void
	-svec_add(vec_t * pthis, str_t * pstr)
	-{
	- int lo, hi, mid;
	- veciter_t iter;
	-
	- if (pthis->nitems == 0) {
	- vec_addtail(pthis, pstr);
	- return;
	- }
	- if (str_casecmp(pstr, &pthis->pitems[0]) < 0) {
	- vec_addhead(pthis, pstr);
	- return;
	- }
	- hi = pthis->nitems - 1;
	- lo = -1;
	- while (hi - lo > 1) {
	- mid = (hi + lo) / 2;
	- if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0)
	- hi = mid;
	- else
	- lo = mid;
	- }
	- assert(hi < pthis->nitems);
	-
	- iter.plist = pthis;
	- iter.index = hi;
	-
	- if (str_casecmp(pstr, &pthis->pitems[hi]) < 0) {
	- veciter_addbefore(&iter, pstr);
	- } else {
	- veciter_addafter(&iter, pstr);
	- }
	-}
	-
	-str_t *
	-svec_find(vec_t * pthis, str_t * pstr)
	-{
	- int lo, hi, mid;
	-
	- if (pthis->nitems == 0) {
	- return NULL;
	- }
	- hi = pthis->nitems - 1;
	- lo = -1;
	- while (hi - lo > 1) {
	- mid = (hi + lo) / 2;
	- if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0)
	- hi = mid;
	- else
	- lo = mid;
	- }
	- assert(hi >= 0 && hi < pthis->nitems);
	-
	- if (str_casecmp(pstr, &pthis->pitems[hi]) != 0) {
	- return NULL;
	- }
	- return &pthis->pitems[hi];
	-}
	-
	-void
	svec_sort(vec_t * pthis)
	{
	if (pthis->nitems > 1) {
	@@ -208,35 +113,6 @@ veciter_get(veciter_t * pthis)
	}

	bool_t
	-veciter_equal(veciter_t * pthis, veciter_t * pthat)
	-{
	- if (pthis->plist != pthat->plist \|\|
	- pthis->index != pthat->index) {
	- return false;
	- }
	- return true;
	-}
	-
	-bool_t
	-veciter_hasitem(veciter_t * pthis)
	-{
	- if (pthis->plist == NULL \|\| pthis->index >= pthis->plist->nitems) {
	- return false;
	- }
	- return true;
	-}
	-
	-bool_t
	-veciter_prev(veciter_t * pthis)
	-{
	- if (pthis->index == 0) {
	- return false;
	- }
	- pthis->index--;
	- return true;
	-}
	-
	-bool_t
	veciter_next(veciter_t * pthis)
	{
	pthis->index++;
	@@ -245,94 +121,3 @@ veciter_next(veciter_t * pthis)
	}
	return true;
	}
	-
	-void
	-veciter_addafter(veciter_t * pthis, str_t * pstr)
	-{
	- str_t *pitems;
	-
	- vec_setsize(pthis->plist, pthis->plist->nitems + 1);
	- assert(pthis->index < pthis->plist->nitems);
	- pitems = pthis->plist->pitems;
	-
	- if (pthis->index != pthis->plist->nitems - 1) {
	- memmove(&pitems[pthis->index + 2], &pitems[pthis->index + 1],
	- (pthis->plist->nitems - pthis->index - 1) * sizeof(str_t));
	- }
	- pitems[pthis->index + 1] = *pstr;
	- pthis->plist->nitems++;
	-}
	-
	-void
	-veciter_addbefore(veciter_t * pthis, str_t * pstr)
	-{
	- str_t *pitems;
	-
	- vec_setsize(pthis->plist, pthis->plist->nitems + 1);
	- assert(pthis->index < pthis->plist->nitems);
	- pitems = pthis->plist->pitems;
	-
	- memmove(&pitems[pthis->index + 1], &pitems[pthis->index],
	- (pthis->plist->nitems - pthis->index) * sizeof(str_t));
	-
	- pitems[pthis->index] = *pstr;
	- pthis->plist->nitems++;
	-}
	-
	-void
	-veciter_del(veciter_t * pthis)
	-{
	- str_t *pitems;
	-
	- assert(pthis->plist->nitems > 0);
	- pthis->plist->nitems--;
	- if (pthis->index < pthis->plist->nitems) {
	- pitems = pthis->plist->pitems;
	- memmove(&pitems[pthis->index], &pitems[pthis->index + 1],
	- (pthis->plist->nitems - pthis->index) * sizeof(str_t));
	- }
	-}
	-
	-#ifdef UNIT_TEST
	-int
	-main(int argc, char **argv)
	-{
	- vec_t vl;
	- veciter_t iter;
	- str_t *pstr;
	- uint n;
	-
	- if (argc != 2) {
	- fprintf(stderr, "usage: %s <file>\n", argv[0]);
	- return 1;
	- }
	- for (n = 0; n < 100; n++) {
	- vec_create(&vl);
	- vec_load(&vl, argv[1]);
	-
	- vec_first(&vl, &iter);
	- while ((pstr = veciter_get(&iter)) != NULL) {
	- char buf[256];
	- char *p;
	-
	- if (pstr->len > 200) {
	- fprintf(stderr, "str too long: %u chars\n", ps…
	- break;
	- }
	- p = buf;
	- strcpy(buf, "str: ");
	- p += 6;
	- memcpy(p, pstr->p, pstr->len);
	- p += pstr->len;
	- sprintf(p, " %u", pstr->count);
	- puts(buf);
	-
	- veciter_next(&iter);
	- }
	-
	- vec_destroy(&vl);
	- }
	-
	- return 0;
	-}
	-#endif /* def UNIT_TEST */
	diff --git a/vec.h b/vec.h
	@@ -13,46 +13,30 @@
	/* item count for initial alloc */
	#define VEC_INITIAL_SIZE 256

	-typedef struct _vec
	-{
	- uint nalloc; /* items alloced in pitems */
	- uint nitems; /* items available */
	- str_t* pitems; /* growing vector of items */
	+typedef struct _vec {
	+ uint nalloc; /* items allocated in pitems */
	+ uint nitems; /* items available */
	+ str_t pitems; / growing vector of items */
	} vec_t;

	-typedef struct _veciter
	-{
	- struct _vec* plist;
	- uint index;
	+typedef struct _veciter {
	+ struct _vec *plist;
	+ uint index;
	} veciter_t;

	/* class vector */
	-void vec_create ( vec_t* pthis );
	-void vec_destroy ( vec_t* pthis );
	+void vec_create(vec_t * pthis);
	+void vec_destroy(vec_t * pthis);
	+void vec_addtail(vec_t * pthis, str_t * pstr);
	+void vec_first(vec_t * pthis, veciter_t * piter);

	-void vec_addhead ( vec_t* pthis, str_t* pstr );
	-void vec_addtail ( vec_t* pthis, str_t* pstr );
	-void vec_delhead ( vec_t* pthis );
	-void vec_deltail ( vec_t* pthis );
	+/* class sorted_vector */
	+void svec_sort(vec_t * ptthis);

	-void vec_first ( vec_t* pthis, veciter_t* piter );
	-void vec_last ( vec_t* pthis, veciter_t* piter );
	+/* veciter_create not needed */
	+void veciter_destroy(veciter_t * pthis);

	-/* class sorted_vector */
	-void svec_add ( vec_t* pthis, str_t* pstr );
	-str_t* svec_find ( vec_t* pthis, str_t* pstr );
	-void svec_sort ( vec_t* ptthis );
	-
	-/* veciter_create not needed */
	-void veciter_destroy ( veciter_t* pthis );
	-
	-str_t* veciter_get ( veciter_t* pthis );
	-bool_t veciter_equal ( veciter_t* pthis, veciter_t* pthat );
	-bool_t veciter_hasitem ( veciter_t* pthis );
	-bool_t veciter_prev ( veciter_t* pthis );
	-bool_t veciter_next ( veciter_t* pthis );
	-void veciter_addafter ( veciter_t* pthis, str_t* pstr );
	-void veciter_addbefore( veciter_t* pthis, str_t* pstr );
	-void veciter_del ( veciter_t* pthis );
	+str_t veciter_get(veciter_t pthis);
	+bool_t veciter_next(veciter_t * pthis);

	#endif /* ndef _VEC_H */