many improvements - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches | |
git clone git://git.codemadness.org/bmf | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit f5e56cc70c117352ec5b7a7984065eaa65db162f | |
parent 20a0f52d5b478e240450fd72fa3bbd3ab5c58c48 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sat, 27 Oct 2018 18:37:01 +0200 | |
many improvements | |
- update README: only flat files are supported now. | |
- remove bmf.spec.in file. | |
- remove unused functions. | |
- remove some assert() calls. | |
- dbtext_db_open() improvements: | |
- check strdup call + perror message. | |
- malloc + perror message. | |
- check for empty directory string (just in case). | |
- use snprintf (just in case). | |
- free memory on failure condition. | |
- dbtext_db_opentable() improvements: | |
- use snprintf, error on path truncation. | |
- remove unit tests (not maintained). | |
- code-style improvements. | |
Diffstat: | |
M Makefile | 1 - | |
M README | 5 +---- | |
M bmf.c | 1 - | |
D bmf.spec.in | 64 -----------------------------… | |
M config.h | 1 - | |
M dbg.c | 14 +------------- | |
M dbg.h | 3 --- | |
M dbh.c | 58 +++++++++++++++--------------- | |
M dbh.h | 46 ++++++++++++++---------------… | |
M dbtext.c | 166 +++++++++++------------------… | |
M filt.c | 8 +++----- | |
M lex.c | 45 -----------------------------… | |
M lex.h | 22 +++++++++++----------- | |
M str.c | 51 +++--------------------------… | |
M str.h | 4 ---- | |
M vec.c | 215 -----------------------------… | |
M vec.h | 50 +++++++++++------------------… | |
17 files changed, 144 insertions(+), 610 deletions(-) | |
--- | |
diff --git a/Makefile b/Makefile | |
@@ -50,7 +50,6 @@ dist: | |
cp -f ${MAN1} ${DOC} ${HDR} \ | |
${SRC} ${SCRIPTS} \ | |
Makefile \ | |
- bmf.spec.in \ | |
"${NAME}-${VERSION}" | |
# make tarball | |
tar -cf - "${NAME}-${VERSION}" | \ | |
diff --git a/README b/README | |
@@ -13,10 +13,7 @@ This project provides features which are not available in ot… | |
(1) Independence from external programs and libraries. Tokens are stored in | |
memory using simple vectors which require no heavyweight external data | |
-structure libraries. Multiple token database formats are supported, | |
-including flat files, libdb, and mysql. Conversion between formats will | |
-always be possible with the included import/export utility and flat files | |
-will always remain an option. | |
+structure libraries. The tokens are stored in plain-text "flat" files. | |
(2) Efficient processing. Input data is parsed by a handcrafted parser | |
which weighs in under 3% of the equivalent code generated by flex. No | |
diff --git a/bmf.c b/bmf.c | |
@@ -98,7 +98,6 @@ main(int argc, char **argv) | |
err(1, "pledge"); | |
srand(time(NULL)); | |
- atexit(dump_alloc_heap); | |
stats.keepers = DEF_KEEPERS; | |
while ((ch = getopt(argc, argv, "NSVd:hk:m:npstv")) != EOF) { | |
diff --git a/bmf.spec.in b/bmf.spec.in | |
@@ -1,64 +0,0 @@ | |
-Name: bmf | |
-Version: VERSION | |
-Release: 1 | |
-URL: http://www.sourceforge.net/projects/bmf | |
-Source0: %{name}-%{version}.tar.gz | |
-License: GPL | |
-Group: Applications/Internet | |
-Summary: fast anti-spam filtering by Bayesian statistical analysis | |
-Buildroot: %{_tmppath}/%{name}-%{version}-root | |
- | |
-%description | |
-bmf is a Bayesian mail filter. It takes an email message or other text on | |
-stdin, does a statistical check against lists of "good" and "spam" words, | |
-and returns a status code indicating whether or not the message is spam. | |
-bmf is efficient, small, and self-contained. | |
- | |
-%prep | |
- | |
-%setup | |
- | |
-%build | |
-./configure --with-libdb --without-mysql | |
-make | |
- | |
-%install | |
-[ -n "$RPM_BUILD_ROOT" -a "$RPM_BUILD_ROOT" != / ] && rm -rf $RPM_BUILD_ROOT | |
-make DESTDIR=${RPM_BUILD_ROOT} install | |
-gzip $RPM_BUILD_ROOT/%{_mandir}/*/*.? | |
- | |
- | |
-%files | |
-%{_bindir}/bmf | |
-%{_mandir}/man1/bmf.1.gz | |
-%{_bindir}/bmfconv | |
-%{_mandir}/man1/bmfconv.1.gz | |
-%doc README LICENSE | |
- | |
-%changelog | |
-* Mon Oct 14 2002 Tom Marshall <[email protected]> | |
-- Update to version 0.9.3. | |
- | |
-* Sat Oct 12 2002 Tom Marshall <[email protected]> | |
-- Update to version 0.9.2. | |
- | |
-* Sat Oct 12 2002 Tom Marshall <[email protected]> | |
-- Update to version 0.9.1. | |
- | |
-* Wed Oct 09 2002 Tom Marshall <[email protected]> | |
-- Update to version 0.84. | |
- | |
-* Mon Oct 07 2002 Tom Marshall <[email protected]> | |
-- Update to version 0.83. | |
- | |
-* Sat Oct 05 2002 Tom Marshall <[email protected]> | |
-- Update to version 0.82. | |
- | |
-* Thu Oct 03 2002 Tom Marshall <[email protected]> | |
-- Update to version 0.81. | |
-- Add bmfconv. | |
-- Use new configure script. | |
- | |
-* Fri Sep 27 2002 Tom Marshall <[email protected]> | |
-- Initial build. | |
- | |
diff --git a/config.h b/config.h | |
@@ -19,7 +19,6 @@ | |
#include <errno.h> | |
#include <math.h> | |
#include <ctype.h> | |
-#include <assert.h> | |
/************************************** | |
* System headers | |
diff --git a/dbg.c b/dbg.c | |
@@ -15,7 +15,7 @@ | |
uint g_verbose = 0; | |
-void | |
+void | |
verbose(int level, const char *fmt,...) | |
{ | |
va_list v; | |
@@ -26,15 +26,3 @@ verbose(int level, const char *fmt,...) | |
va_end(v); | |
} | |
} | |
- | |
-void | |
-dbgout(const char *fmt,...) | |
-{ | |
- /* empty */ | |
-} | |
- | |
-void | |
-dump_alloc_heap(void) | |
-{ | |
- /* empty */ | |
-} | |
diff --git a/dbg.h b/dbg.h | |
@@ -14,7 +14,4 @@ extern uint g_verbose; | |
void verbose( int level, const char* fmt, ... ); | |
-void dbgout( const char* fmt, ... ); | |
-void dump_alloc_heap( void ); | |
- | |
#endif /* ndef _DBG_H */ | |
diff --git a/dbh.c b/dbh.c | |
@@ -24,43 +24,43 @@ | |
* | |
* the list referenced in the iterator must be sorted. | |
*/ | |
-uint db_getnewcount( veciter_t* piter ) | |
+uint | |
+db_getnewcount(veciter_t * piter) | |
{ | |
- str_t* pstr; | |
- uint count; | |
- veciter_t curiter; | |
- str_t* pcurstr; | |
+ str_t *pstr; | |
+ uint count; | |
+ veciter_t curiter; | |
+ str_t *pcurstr; | |
- pstr = &piter->plist->pitems[piter->index]; | |
- count = 0; | |
+ pstr = &piter->plist->pitems[piter->index]; | |
+ count = 0; | |
- curiter.plist = piter->plist; | |
- curiter.index = piter->index; | |
- pcurstr = &curiter.plist->pitems[curiter.index]; | |
+ curiter.plist = piter->plist; | |
+ curiter.index = piter->index; | |
+ pcurstr = &curiter.plist->pitems[curiter.index]; | |
- while( curiter.index < curiter.plist->nitems && str_casecmp( pstr, pcurstr… | |
- { | |
- piter->index = curiter.index; | |
- count = min( MAXFREQ, count + 1 ); | |
- veciter_next( &curiter ); | |
- pcurstr = &curiter.plist->pitems[curiter.index]; | |
- } | |
+ while (curiter.index < curiter.plist->nitems && str_casecmp(pstr, pcur… | |
+ piter->index = curiter.index; | |
+ count = min(MAXFREQ, count + 1); | |
+ veciter_next(&curiter); | |
+ pcurstr = &curiter.plist->pitems[curiter.index]; | |
+ } | |
- return count; | |
+ return count; | |
} | |
-dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, c… | |
+dbh_t * | |
+dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db… | |
{ | |
- dbh_t* pdb = NULL; | |
+ dbh_t *pdb; | |
- switch( dbfmt ) | |
- { | |
- case db_text: | |
- pdb = (dbh_t*)dbtext_db_open( dbhost, dbname, dbuser, dbpass ); | |
- break; | |
- default: | |
- assert(false); | |
- } | |
+ switch (dbfmt) { | |
+ case db_text: | |
+ pdb = (dbh_t *) dbtext_db_open(dbhost, dbname, dbuser, dbpass); | |
+ break; | |
+ default: | |
+ break; | |
+ } | |
- return pdb; | |
+ return NULL; | |
} | |
diff --git a/dbh.h b/dbh.h | |
@@ -11,44 +11,40 @@ | |
#define _DBH_H | |
/* database formats */ | |
-typedef enum | |
-{ | |
- db_text /* flat text */ | |
-} dbfmt_t; | |
+typedef enum { | |
+ db_text /* flat text */ | |
+} dbfmt_t; | |
/* record/field structure */ | |
-typedef struct _rec | |
-{ | |
- str_t w; | |
- uint n; | |
-} rec_t; | |
+typedef struct _rec { | |
+ str_t w; | |
+ uint n; | |
+} rec_t; | |
/* database table */ | |
typedef struct _dbt dbt_t; | |
-struct _dbt | |
-{ | |
- bool_t (*close)(dbt_t*); | |
- bool_t (*mergeclose)(dbt_t*,vec_t*); | |
- bool_t (*unmergeclose)(dbt_t*,vec_t*); | |
- bool_t (*import)(dbt_t*,cpchar); | |
- bool_t (*export)(dbt_t*,cpchar); | |
- uint (*getmsgcount)(dbt_t*); | |
- uint (*getcount)(dbt_t*,str_t*); | |
+struct _dbt { | |
+ bool_t(*close) (dbt_t *); | |
+ bool_t(*mergeclose) (dbt_t *, vec_t *); | |
+ bool_t(*unmergeclose) (dbt_t *, vec_t *); | |
+ bool_t(*import) (dbt_t *, cpchar); | |
+ bool_t(*export) (dbt_t *, cpchar); | |
+ uint(*getmsgcount) (dbt_t *); | |
+ uint(*getcount) (dbt_t *, str_t *); | |
}; | |
/* database instance */ | |
typedef struct _dbh dbh_t; | |
-struct _dbh | |
-{ | |
- bool_t (*close)(dbh_t*); | |
- dbt_t* (*opentable)(dbh_t*,cpchar,bool_t); | |
+struct _dbh { | |
+ bool_t(*close) (dbh_t *); | |
+ dbt_t *(*opentable) (dbh_t *, cpchar, bool_t); | |
}; | |
-dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, … | |
+dbh_t *dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cp… | |
#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n" | |
#define TEXTDB_MAXLINELEN (MAXWORDLEN+32) | |
-uint db_getnewcount( veciter_t* piter ); | |
+uint db_getnewcount(veciter_t * piter); | |
-#endif /* ndef _DBH_H */ | |
+#endif /* ndef _DBH_H */ | |
diff --git a/dbtext.c b/dbtext.c | |
@@ -21,78 +21,83 @@ | |
static void | |
dbtext_table_setsize(dbttext_t * pthis, uint nsize) | |
{ | |
- if (nsize > pthis->nalloc) { | |
- uint nnewalloc; | |
- rec_t *pnewitems; | |
- uint n; | |
- | |
- nnewalloc = pthis->nalloc * 2; | |
- if (nnewalloc < nsize) | |
- nnewalloc = nsize; | |
- pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeo… | |
- if (pnewitems == NULL) { | |
- exit(2); | |
- } | |
- for (n = pthis->nitems; n < nsize; n++) { | |
- str_create(&pnewitems[n].w); | |
- pnewitems[n].n = 0; | |
- } | |
- pthis->pitems = pnewitems; | |
- pthis->nalloc = nnewalloc; | |
+ uint nnewalloc; | |
+ rec_t *pnewitems; | |
+ uint n; | |
+ | |
+ if (nsize <= pthis->nalloc) | |
+ return; | |
+ | |
+ nnewalloc = pthis->nalloc * 2; | |
+ if (nnewalloc < nsize) | |
+ nnewalloc = nsize; | |
+ pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t)… | |
+ if (pnewitems == NULL) { | |
+ exit(2); | |
} | |
+ for (n = pthis->nitems; n < nsize; n++) { | |
+ str_create(&pnewitems[n].w); | |
+ pnewitems[n].n = 0; | |
+ } | |
+ pthis->pitems = pnewitems; | |
+ pthis->nalloc = nnewalloc; | |
} | |
dbh_t * | |
dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass) | |
{ | |
- dbhtext_t *pthis; | |
- | |
+ dbhtext_t *pthis = NULL; | |
uint dirlen; | |
cpchar phome; | |
struct stat st; | |
- pthis = (dbhtext_t *) malloc(sizeof(dbhtext_t)); | |
- if (pthis == NULL) { | |
+ if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) { | |
+ perror("malloc()"); | |
goto bail; | |
} | |
+ | |
pthis->close = dbtext_db_close; | |
pthis->opentable = dbtext_db_opentable; | |
+ | |
if (dbname != NULL && *dbname != '\0') { | |
dirlen = strlen(dbname); | |
- pthis->dir = strdup(dbname); | |
- if (pthis->dir[dirlen - 1] == '/') { | |
- pthis->dir[dirlen - 1] = '\0'; | |
+ if ((pthis->dir = strdup(dbname)) == NULL) { | |
+ perror("strdup()"); | |
+ goto bail; | |
} | |
+ if (dirlen && pthis->dir[dirlen - 1] == '/') | |
+ pthis->dir[--dirlen] = '\0'; | |
} else { | |
phome = getenv("HOME"); | |
if (phome == NULL || *phome == '\0') { | |
phome = "."; | |
} | |
- pthis->dir = (char *) malloc(strlen(phome) + 5 + 1); | |
- if (pthis->dir == NULL) { | |
+ dirlen = strlen(phome) + 5 + 1; | |
+ if ((pthis->dir = malloc(dirlen)) == NULL) | |
goto bail; | |
- } | |
- sprintf(pthis->dir, "%s/.bmf", phome); | |
+ | |
+ snprintf(pthis->dir, dirlen, "%s/.bmf", phome); | |
} | |
- /* ensure config directory exists */ | |
+ /* make sure config directory exists */ | |
if (stat(pthis->dir, &st) != 0) { | |
- if (errno == ENOENT) { | |
- if (mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != … | |
- goto bail; | |
- } | |
- } else { | |
+ if (errno != ENOENT || | |
+ mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0) | |
goto bail; | |
- } | |
} else { | |
- if (!S_ISDIR(st.st_mode)) { | |
+ if (!S_ISDIR(st.st_mode)) | |
goto bail; | |
- } | |
} | |
- return (dbh_t *) pthis; | |
+ return (dbh_t *)pthis; | |
bail: | |
+ if (pthis) { | |
+ if (pthis->dir) | |
+ free(pthis->dir); | |
+ free(pthis); | |
+ } | |
+ | |
return NULL; | |
} | |
@@ -114,19 +119,17 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool… | |
#endif /* ndef NOLOCK */ | |
char szpath[PATH_MAX]; | |
- int flags; | |
+ int flags, ret; | |
struct stat st; | |
- | |
char *pbegin; | |
char *pend; | |
rec_t r; | |
uint pos; | |
- if (pthis->dir == NULL) { | |
+ if (pthis->dir == NULL) | |
goto bail; | |
- } | |
- ptable = (dbttext_t *) malloc(sizeof(dbttext_t)); | |
- if (ptable == NULL) { | |
+ | |
+ if ((ptable = malloc(sizeof(dbttext_t))) == NULL) { | |
perror("malloc()"); | |
goto bail; | |
} | |
@@ -144,13 +147,18 @@ dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool… | |
ptable->nitems = 0; | |
ptable->pitems = NULL; | |
- sprintf(szpath, "%s/%s.txt", pthis->dir, table); | |
- flags = (rdonly ? O_RDONLY | O_CREAT : O_RDWR | O_CREAT); | |
- ptable->fd = open(szpath, flags, 0644); | |
- if (ptable->fd == -1) { | |
+ ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table); | |
+ if (ret == -1 || (size_t)ret >= sizeof(szpath)) { | |
+ fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, tabl… | |
+ goto bail; | |
+ } | |
+ | |
+ flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR); | |
+ if ((ptable->fd = open(szpath, flags, 0644)) == -1) { | |
perror("open()"); | |
goto bail; | |
} | |
+ | |
#ifndef NOLOCK | |
memset(&lock, 0, sizeof(lock)); | |
lock.l_type = rdonly ? F_RDLCK : F_WRLCK; | |
@@ -307,8 +315,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) | |
} | |
if (cmp < 0) { | |
/* write existing str */ | |
- assert(prec->w.p != NULL && prec->w.len > 0); | |
- assert(prec->w.len <= MAXWORDLEN); | |
count = prec->n; | |
strncpylwr(p, prec->w.p, prec->w.len); | |
p += prec->w.len; | |
@@ -318,10 +324,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) | |
pos++; | |
} else if (cmp == 0) { | |
/* same str, merge and write sum */ | |
- assert(prec->w.p != NULL && prec->w.len > 0); | |
- assert(pmsgstr->p != NULL && pmsgstr->len > 0); | |
- assert(prec->w.len <= MAXWORDLEN); | |
- assert(pmsgstr->len <= MAXWORDLEN); | |
count = db_getnewcount(&msgiter); | |
count += prec->n; | |
strncpylwr(p, prec->w.p, prec->w.len); | |
@@ -334,8 +336,6 @@ dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) | |
pmsgstr = veciter_get(&msgiter); | |
} else { /* cmp > 0 */ | |
/* write new str */ | |
- assert(pmsgstr->p != NULL && pmsgstr->len > 0); | |
- assert(pmsgstr->len <= MAXWORDLEN); | |
count = db_getnewcount(&msgiter); | |
strncpylwr(p, pmsgstr->p, pmsgstr->len); | |
p += pmsgstr->len; | |
@@ -398,8 +398,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) | |
} | |
if (cmp < 0) { | |
/* write existing str */ | |
- assert(prec->w.p != NULL && prec->w.len > 0); | |
- assert(prec->w.len <= MAXWORDLEN); | |
count = prec->n; | |
strncpylwr(p, prec->w.p, prec->w.len); | |
p += prec->w.len; | |
@@ -409,10 +407,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) | |
pos++; | |
} else if (cmp == 0) { | |
/* same str, merge and write difference */ | |
- assert(prec->w.p != NULL && prec->w.len > 0); | |
- assert(pmsgstr->p != NULL && pmsgstr->len > 0); | |
- assert(prec->w.len <= MAXWORDLEN); | |
- assert(pmsgstr->len <= MAXWORDLEN); | |
count = db_getnewcount(&msgiter); | |
count = (prec->n > count) ? (prec->n - count) : 0; | |
strncpylwr(p, prec->w.p, prec->w.len); | |
@@ -425,8 +419,6 @@ dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) | |
pmsgstr = veciter_get(&msgiter); | |
} else { /* cmp > 0 */ | |
/* this should not happen, so write with count=0 */ | |
- assert(pmsgstr->p != NULL && pmsgstr->len > 0); | |
- assert(pmsgstr->len <= MAXWORDLEN); | |
db_getnewcount(&msgiter); | |
count = 0; | |
strncpylwr(p, pmsgstr->p, pmsgstr->len); | |
@@ -485,53 +477,9 @@ dbtext_table_getcount(dbttext_t * pthis, str_t * pword) | |
else | |
lo = mid; | |
} | |
- assert(hi >= 0 && hi < pthis->nitems); | |
if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) { | |
return 0; | |
} | |
return pthis->pitems[hi].n; | |
} | |
- | |
-#ifdef UNIT_TEST | |
-int | |
-main(int argc, char **argv) | |
-{ | |
- dbh_t *pdb; | |
- veciter_t iter; | |
- str_t *pstr; | |
- uint n; | |
- | |
- if (argc != 2) { | |
- fprintf(stderr, "usage: %s <file>\n", argv[0]); | |
- return 1; | |
- } | |
- for (n = 0; n < 100; n++) { | |
- pdb = dbh_open("testlist", true); | |
- | |
- vec_first(&db, &iter); | |
- while ((pstr = veciter_get(&iter)) != NULL) { | |
- char buf[MAXWORDLEN + 32]; | |
- char *p; | |
- | |
- if (pstr->len > 200) { | |
- fprintf(stderr, "str too long: %u chars\n", ps… | |
- break; | |
- } | |
- p = buf; | |
- strcpy(buf, "str: "); | |
- p += 6; | |
- memcpy(p, pstr->p, pstr->len); | |
- p += pstr->len; | |
- sprintf(p, " %u", pstr->count); | |
- puts(buf); | |
- | |
- veciter_next(&iter); | |
- } | |
- | |
- dbh_close(&db); | |
- } | |
- | |
- return 0; | |
-} | |
-#endif /* def UNIT_TEST */ | |
diff --git a/filt.c b/filt.c | |
@@ -21,7 +21,7 @@ | |
#define DEVIATION(n) fabs((n)-0.5f) | |
/* Dump the contents of a statistics structure */ | |
-void | |
+void | |
statdump(stats_t * pstat, int fd) | |
{ | |
char iobuf[IOBUFSIZE]; | |
@@ -49,7 +49,7 @@ statdump(stats_t * pstat, int fd) | |
} | |
} | |
-void | |
+void | |
bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats) | |
{ | |
veciter_t iter; | |
@@ -95,8 +95,6 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, sta… | |
double goodprob = goodtotal ? min(1.0, (goodness / goo… | |
double spamprob = spamtotal ? min(1.0, (spamness / spa… | |
- assert(goodtotal > 0 || spamtotal > 0); | |
- | |
#ifdef NON_EQUIPROBABLE | |
prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_p… | |
#else | |
@@ -146,7 +144,7 @@ bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, s… | |
pstats->spamicity = product / (product + invproduct); | |
} | |
-bool_t | |
+bool_t | |
bvec_loadmsg(vec_t * pthis, lex_t * plex, tok_t * ptok) | |
{ | |
str_t w; | |
diff --git a/lex.c b/lex.c | |
@@ -561,8 +561,6 @@ lex_nexttoken(lex_t * pthis, tok_t * ptok) | |
uint len; | |
uint toklen; | |
- assert(pthis->pbuf != NULL); | |
- | |
if (pthis->pos == pthis->eom) { | |
pthis->bom = pthis->pos; | |
} | |
@@ -637,9 +635,6 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits) | |
char szbuf[256]; | |
bool_t in_headers = true; | |
- assert(pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen); | |
- assert(pthis->bom <= pthis->eom); | |
- | |
pthis->pos = pthis->bom; | |
if (is_spam) { | |
sprintf(szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests… | |
@@ -682,43 +677,3 @@ lex_passthru(lex_t * pthis, bool_t is_spam, double hits) | |
} | |
pthis->bom = pthis->eom; | |
} | |
- | |
-#ifdef UNIT_TEST | |
- | |
-int | |
-main(int argc, char **argv) | |
-{ | |
- int fd; | |
- lex_t lex; | |
- tok_t tok; | |
- | |
- fd = STDIN_FILENO; | |
- if (argc == 2) { | |
- fd = open(argv[1], O_RDONLY); | |
- } | |
- lex_create(&lex); | |
- if (!lex_load(&lex, fd)) { | |
- fprintf(stderr, "cannot load file\n"); | |
- exit(1); | |
- } | |
- lex_nexttoken(&lex, &tok); | |
- while (tok.tt != eof) { | |
- char sztok[64]; | |
- | |
- if (tok.len > MAXWORDLEN) { | |
- printf("*** token too long! ***\n"); | |
- exit(1); | |
- } | |
- memcpy(sztok, tok.p, tok.len); | |
- strlwr(sztok); | |
- sztok[tok.len] = '\0'; | |
- printf("get_token: %d '%s'\n", tok.tt, sztok); | |
- | |
- lex_nexttoken(&lex, &tok); | |
- } | |
- | |
- lex_destroy(&lex); | |
- return 0; | |
-} | |
- | |
-#endif /* def UNIT_TEST */ | |
diff --git a/lex.h b/lex.h | |
@@ -14,23 +14,23 @@ typedef enum { from, eof, word } toktype_t; | |
typedef struct _tok | |
{ | |
- toktype_t tt; /* token type */ | |
- char* p; | |
- uint len; | |
+ toktype_t tt; /* token type */ | |
+ char *p; | |
+ uint len; | |
} tok_t; | |
typedef enum { envelope, hdrs, body } msgsec_t; | |
typedef struct _lex | |
{ | |
- mbox_t mboxtype; | |
- msgsec_t section; /* current section (envelope, headers, body) */ | |
- uint pos; /* current position */ | |
- uint bom; /* beginning of message */ | |
- uint eom; /* end of current message (start of next) */ | |
- uint lineend; /* line end (actually, start of next line) */ | |
- uint buflen; /* length of buffer */ | |
- char* pbuf; | |
+ mbox_t mboxtype; | |
+ msgsec_t section; /* current section (envelope, headers, bod… | |
+ uint pos; /* current position */ | |
+ uint bom; /* beginning of message */ | |
+ uint eom; /* end of current message (start of ne… | |
+ uint lineend; /* line end (actually, start of next l… | |
+ uint buflen; /* length of buffer */ | |
+ char *pbuf; | |
} lex_t; | |
void lex_create ( lex_t* plex, mbox_t mboxtype ); | |
diff --git a/str.c b/str.c | |
@@ -12,23 +12,6 @@ | |
#include "str.h" | |
void | |
-strlwr(char *s) | |
-{ | |
- while (*s != '\0') { | |
- *s = tolower(*s); | |
- s++; | |
- } | |
-} | |
- | |
-void | |
-strcpylwr(char *d, const char *s) | |
-{ | |
- while (*s != '\0') { | |
- *d++ = tolower(*s++); | |
- } | |
-} | |
- | |
-void | |
strncpylwr(char *d, const char *s, int n) | |
{ | |
while (n--) { | |
@@ -37,46 +20,20 @@ strncpylwr(char *d, const char *s, int n) | |
} | |
void | |
-str_create(str_t * pstr) | |
+str_create(str_t *pstr) | |
{ | |
pstr->p = NULL; | |
pstr->len = 0; | |
} | |
-void | |
-str_destroy(str_t * pstr) | |
-{ | |
- /* empty */ | |
-} | |
- | |
-int | |
-str_cmp(const str_t * pthis, const str_t * pother) | |
-{ | |
- uint minlen = min(pthis->len, pother->len); | |
- int cmp; | |
- | |
- assert(pthis->p != NULL && pother->p != NULL && minlen != 0); | |
- | |
- cmp = strncmp(pthis->p, pother->p, minlen); | |
- | |
- if (cmp == 0 && pthis->len != pother->len) { | |
- cmp = (pthis->len < pother->len) ? -1 : 1; | |
- } | |
- return cmp; | |
-} | |
- | |
int | |
str_casecmp(const str_t * pthis, const str_t * pother) | |
{ | |
- uint minlen = min(pthis->len, pother->len); | |
int cmp; | |
- assert(pthis->p != NULL && pother->p != NULL && minlen != 0); | |
- | |
- cmp = strncasecmp(pthis->p, pother->p, minlen); | |
- | |
- if (cmp == 0 && pthis->len != pother->len) { | |
+ cmp = strncasecmp(pthis->p, pother->p, min(pthis->len, pother->len)); | |
+ if (cmp == 0 && pthis->len != pother->len) | |
cmp = (pthis->len < pother->len) ? -1 : 1; | |
- } | |
+ | |
return cmp; | |
} | |
diff --git a/str.h b/str.h | |
@@ -11,8 +11,6 @@ | |
#define _STR_H | |
/* a couple of generic string functions... */ | |
-void strlwr( char* s ); | |
-void strcpylwr( char* d, const char* s ); | |
void strncpylwr( char* d, const char* s, int n ); | |
typedef struct _str | |
@@ -22,9 +20,7 @@ typedef struct _str | |
} str_t; | |
void str_create ( str_t* pthis ); | |
-void str_destroy( str_t* pthis ); | |
-int str_cmp ( const str_t* pthis, const str_t* pother ); | |
int str_casecmp( const str_t* pthis, const str_t* pother ); | |
#endif /* ndef _STR_H */ | |
diff --git a/vec.c b/vec.c | |
@@ -61,55 +61,20 @@ vec_setsize(vec_t * pthis, uint nsize) | |
} | |
void | |
-vec_addhead(vec_t * pthis, str_t * pstr) | |
-{ | |
- assert(pstr->p != NULL && pstr->len > 0); | |
- | |
- vec_setsize(pthis, pthis->nitems + 1); | |
- memmove(&pthis->pitems[1], &pthis->pitems[0], pthis->nitems * sizeof(s… | |
- pthis->pitems[0] = *pstr; | |
- pthis->nitems++; | |
-} | |
- | |
-void | |
vec_addtail(vec_t * pthis, str_t * pstr) | |
{ | |
- assert(pstr->p != NULL && pstr->len > 0); | |
- | |
vec_setsize(pthis, pthis->nitems + 1); | |
pthis->pitems[pthis->nitems] = *pstr; | |
pthis->nitems++; | |
} | |
void | |
-vec_delhead(vec_t * pthis) | |
-{ | |
- assert(pthis->nitems > 0); | |
- pthis->nitems--; | |
- memmove(&pthis->pitems[0], &pthis->pitems[1], pthis->nitems * sizeof(s… | |
-} | |
- | |
-void | |
-vec_deltail(vec_t * pthis) | |
-{ | |
- assert(pthis->nitems > 0); | |
- pthis->nitems--; | |
-} | |
- | |
-void | |
vec_first(vec_t * pthis, veciter_t * piter) | |
{ | |
piter->plist = pthis; | |
piter->index = 0; | |
} | |
-void | |
-vec_last(vec_t * pthis, veciter_t * piter) | |
-{ | |
- piter->plist = pthis; | |
- piter->index = pthis->nitems; | |
-} | |
- | |
/***************************************************************************** | |
* sorted vector | |
*/ | |
@@ -121,66 +86,6 @@ svec_compare(const void *p1, const void *p2) | |
} | |
void | |
-svec_add(vec_t * pthis, str_t * pstr) | |
-{ | |
- int lo, hi, mid; | |
- veciter_t iter; | |
- | |
- if (pthis->nitems == 0) { | |
- vec_addtail(pthis, pstr); | |
- return; | |
- } | |
- if (str_casecmp(pstr, &pthis->pitems[0]) < 0) { | |
- vec_addhead(pthis, pstr); | |
- return; | |
- } | |
- hi = pthis->nitems - 1; | |
- lo = -1; | |
- while (hi - lo > 1) { | |
- mid = (hi + lo) / 2; | |
- if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0) | |
- hi = mid; | |
- else | |
- lo = mid; | |
- } | |
- assert(hi < pthis->nitems); | |
- | |
- iter.plist = pthis; | |
- iter.index = hi; | |
- | |
- if (str_casecmp(pstr, &pthis->pitems[hi]) < 0) { | |
- veciter_addbefore(&iter, pstr); | |
- } else { | |
- veciter_addafter(&iter, pstr); | |
- } | |
-} | |
- | |
-str_t * | |
-svec_find(vec_t * pthis, str_t * pstr) | |
-{ | |
- int lo, hi, mid; | |
- | |
- if (pthis->nitems == 0) { | |
- return NULL; | |
- } | |
- hi = pthis->nitems - 1; | |
- lo = -1; | |
- while (hi - lo > 1) { | |
- mid = (hi + lo) / 2; | |
- if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0) | |
- hi = mid; | |
- else | |
- lo = mid; | |
- } | |
- assert(hi >= 0 && hi < pthis->nitems); | |
- | |
- if (str_casecmp(pstr, &pthis->pitems[hi]) != 0) { | |
- return NULL; | |
- } | |
- return &pthis->pitems[hi]; | |
-} | |
- | |
-void | |
svec_sort(vec_t * pthis) | |
{ | |
if (pthis->nitems > 1) { | |
@@ -208,35 +113,6 @@ veciter_get(veciter_t * pthis) | |
} | |
bool_t | |
-veciter_equal(veciter_t * pthis, veciter_t * pthat) | |
-{ | |
- if (pthis->plist != pthat->plist || | |
- pthis->index != pthat->index) { | |
- return false; | |
- } | |
- return true; | |
-} | |
- | |
-bool_t | |
-veciter_hasitem(veciter_t * pthis) | |
-{ | |
- if (pthis->plist == NULL || pthis->index >= pthis->plist->nitems) { | |
- return false; | |
- } | |
- return true; | |
-} | |
- | |
-bool_t | |
-veciter_prev(veciter_t * pthis) | |
-{ | |
- if (pthis->index == 0) { | |
- return false; | |
- } | |
- pthis->index--; | |
- return true; | |
-} | |
- | |
-bool_t | |
veciter_next(veciter_t * pthis) | |
{ | |
pthis->index++; | |
@@ -245,94 +121,3 @@ veciter_next(veciter_t * pthis) | |
} | |
return true; | |
} | |
- | |
-void | |
-veciter_addafter(veciter_t * pthis, str_t * pstr) | |
-{ | |
- str_t *pitems; | |
- | |
- vec_setsize(pthis->plist, pthis->plist->nitems + 1); | |
- assert(pthis->index < pthis->plist->nitems); | |
- pitems = pthis->plist->pitems; | |
- | |
- if (pthis->index != pthis->plist->nitems - 1) { | |
- memmove(&pitems[pthis->index + 2], &pitems[pthis->index + 1], | |
- (pthis->plist->nitems - pthis->index - 1) * sizeof(str_t)); | |
- } | |
- pitems[pthis->index + 1] = *pstr; | |
- pthis->plist->nitems++; | |
-} | |
- | |
-void | |
-veciter_addbefore(veciter_t * pthis, str_t * pstr) | |
-{ | |
- str_t *pitems; | |
- | |
- vec_setsize(pthis->plist, pthis->plist->nitems + 1); | |
- assert(pthis->index < pthis->plist->nitems); | |
- pitems = pthis->plist->pitems; | |
- | |
- memmove(&pitems[pthis->index + 1], &pitems[pthis->index], | |
- (pthis->plist->nitems - pthis->index) * sizeof(str_t)); | |
- | |
- pitems[pthis->index] = *pstr; | |
- pthis->plist->nitems++; | |
-} | |
- | |
-void | |
-veciter_del(veciter_t * pthis) | |
-{ | |
- str_t *pitems; | |
- | |
- assert(pthis->plist->nitems > 0); | |
- pthis->plist->nitems--; | |
- if (pthis->index < pthis->plist->nitems) { | |
- pitems = pthis->plist->pitems; | |
- memmove(&pitems[pthis->index], &pitems[pthis->index + 1], | |
- (pthis->plist->nitems - pthis->index) * sizeof(str_t)); | |
- } | |
-} | |
- | |
-#ifdef UNIT_TEST | |
-int | |
-main(int argc, char **argv) | |
-{ | |
- vec_t vl; | |
- veciter_t iter; | |
- str_t *pstr; | |
- uint n; | |
- | |
- if (argc != 2) { | |
- fprintf(stderr, "usage: %s <file>\n", argv[0]); | |
- return 1; | |
- } | |
- for (n = 0; n < 100; n++) { | |
- vec_create(&vl); | |
- vec_load(&vl, argv[1]); | |
- | |
- vec_first(&vl, &iter); | |
- while ((pstr = veciter_get(&iter)) != NULL) { | |
- char buf[256]; | |
- char *p; | |
- | |
- if (pstr->len > 200) { | |
- fprintf(stderr, "str too long: %u chars\n", ps… | |
- break; | |
- } | |
- p = buf; | |
- strcpy(buf, "str: "); | |
- p += 6; | |
- memcpy(p, pstr->p, pstr->len); | |
- p += pstr->len; | |
- sprintf(p, " %u", pstr->count); | |
- puts(buf); | |
- | |
- veciter_next(&iter); | |
- } | |
- | |
- vec_destroy(&vl); | |
- } | |
- | |
- return 0; | |
-} | |
-#endif /* def UNIT_TEST */ | |
diff --git a/vec.h b/vec.h | |
@@ -13,46 +13,30 @@ | |
/* item count for initial alloc */ | |
#define VEC_INITIAL_SIZE 256 | |
-typedef struct _vec | |
-{ | |
- uint nalloc; /* items alloced in pitems */ | |
- uint nitems; /* items available */ | |
- str_t* pitems; /* growing vector of items */ | |
+typedef struct _vec { | |
+ uint nalloc; /* items allocated in pitems */ | |
+ uint nitems; /* items available */ | |
+ str_t *pitems; /* growing vector of items */ | |
} vec_t; | |
-typedef struct _veciter | |
-{ | |
- struct _vec* plist; | |
- uint index; | |
+typedef struct _veciter { | |
+ struct _vec *plist; | |
+ uint index; | |
} veciter_t; | |
/* class vector */ | |
-void vec_create ( vec_t* pthis ); | |
-void vec_destroy ( vec_t* pthis ); | |
+void vec_create(vec_t * pthis); | |
+void vec_destroy(vec_t * pthis); | |
+void vec_addtail(vec_t * pthis, str_t * pstr); | |
+void vec_first(vec_t * pthis, veciter_t * piter); | |
-void vec_addhead ( vec_t* pthis, str_t* pstr ); | |
-void vec_addtail ( vec_t* pthis, str_t* pstr ); | |
-void vec_delhead ( vec_t* pthis ); | |
-void vec_deltail ( vec_t* pthis ); | |
+/* class sorted_vector */ | |
+void svec_sort(vec_t * ptthis); | |
-void vec_first ( vec_t* pthis, veciter_t* piter ); | |
-void vec_last ( vec_t* pthis, veciter_t* piter ); | |
+/* veciter_create not needed */ | |
+void veciter_destroy(veciter_t * pthis); | |
-/* class sorted_vector */ | |
-void svec_add ( vec_t* pthis, str_t* pstr ); | |
-str_t* svec_find ( vec_t* pthis, str_t* pstr ); | |
-void svec_sort ( vec_t* ptthis ); | |
- | |
-/* veciter_create not needed */ | |
-void veciter_destroy ( veciter_t* pthis ); | |
- | |
-str_t* veciter_get ( veciter_t* pthis ); | |
-bool_t veciter_equal ( veciter_t* pthis, veciter_t* pthat ); | |
-bool_t veciter_hasitem ( veciter_t* pthis ); | |
-bool_t veciter_prev ( veciter_t* pthis ); | |
-bool_t veciter_next ( veciter_t* pthis ); | |
-void veciter_addafter ( veciter_t* pthis, str_t* pstr ); | |
-void veciter_addbefore( veciter_t* pthis, str_t* pstr ); | |
-void veciter_del ( veciter_t* pthis ); | |
+str_t *veciter_get(veciter_t * pthis); | |
+bool_t veciter_next(veciter_t * pthis); | |
#endif /* ndef _VEC_H */ |