merge dbh and dbtext (WIP) - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patc… | |
git clone git://git.codemadness.org/bmf | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 60b437c6d0bc19fc9f67ca8cfaf6cbfc50d47423 | |
parent 4c3c79f49125ef555fba1df7f6cbab2c7b26ea00 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sat, 27 Oct 2018 19:31:30 +0200 | |
merge dbh and dbtext (WIP) | |
Diffstat: | |
M Makefile | 2 -- | |
M bmf.c | 3 +-- | |
M dbh.c | 474 +++++++++++++++++++++++++++++… | |
M dbh.h | 45 +++++++++++++++++++++++++----… | |
D dbtext.c | 490 -----------------------------… | |
D dbtext.h | 49 -----------------------------… | |
M filt.h | 14 +++++++++----- | |
7 files changed, 511 insertions(+), 566 deletions(-) | |
--- | |
diff --git a/Makefile b/Makefile | |
@@ -9,7 +9,6 @@ SRC = \ | |
bmf.c \ | |
dbg.c \ | |
dbh.c \ | |
- dbtext.c \ | |
filt.c \ | |
lex.c \ | |
str.c \ | |
@@ -21,7 +20,6 @@ HDR = \ | |
config.h \ | |
dbg.h \ | |
dbh.h \ | |
- dbtext.h \ | |
filt.h \ | |
lex.h \ | |
str.h \ | |
diff --git a/bmf.c b/bmf.c | |
@@ -75,7 +75,6 @@ version(void) | |
int | |
main(int argc, char **argv) | |
{ | |
- dbfmt_t dbfmt = db_text; | |
char *dbname = NULL; | |
bool_t rdonly; | |
runmode_t mode = mode_normal; | |
@@ -152,7 +151,7 @@ main(int argc, char **argv) | |
} | |
stats.extrema = (discrim_t *) malloc(stats.keepers * sizeof(discrim_t)… | |
- pdb = dbh_open(dbfmt, "localhost", dbname, "", ""); | |
+ pdb = dbh_open(dbname); | |
if (pdb == NULL) { | |
fprintf(stderr, "%s: cannot open database\n", argv[0]); | |
exit(2); | |
diff --git a/dbh.c b/dbh.c | |
@@ -16,7 +16,6 @@ | |
#include "vec.h" | |
#include "dbh.h" | |
-#include "dbtext.h" | |
/* | |
* get count for new (incoming) word. there may be duplicate entries for the | |
@@ -50,17 +49,472 @@ db_getnewcount(veciter_t * piter) | |
} | |
dbh_t * | |
-dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db… | |
+dbh_open(cpchar dbname) | |
{ | |
- dbh_t *pdb; | |
+ dbhtext_t *pthis = NULL; | |
+ uint dirlen; | |
+ cpchar phome; | |
+ struct stat st; | |
- switch (dbfmt) { | |
- case db_text: | |
- pdb = (dbh_t *) dbtext_db_open(dbhost, dbname, dbuser, dbpass); | |
- break; | |
- default: | |
- break; | |
+ if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) { | |
+ perror("malloc()"); | |
+ goto bail; | |
} | |
- return pdb; | |
+ pthis->close = dbtext_db_close; | |
+ pthis->opentable = dbtext_db_opentable; | |
+ | |
+ if (dbname != NULL && *dbname != '\0') { | |
+ dirlen = strlen(dbname); | |
+ if ((pthis->dir = strdup(dbname)) == NULL) { | |
+ perror("strdup()"); | |
+ goto bail; | |
+ } | |
+ if (dirlen && pthis->dir[dirlen - 1] == '/') | |
+ pthis->dir[--dirlen] = '\0'; | |
+ } else { | |
+ phome = getenv("HOME"); | |
+ if (phome == NULL || *phome == '\0') { | |
+ phome = "."; | |
+ } | |
+ dirlen = strlen(phome) + 5 + 1; | |
+ if ((pthis->dir = malloc(dirlen)) == NULL) | |
+ goto bail; | |
+ | |
+ /* NOTE: no truncation possible */ | |
+ snprintf(pthis->dir, dirlen, "%s/.bmf", phome); | |
+ } | |
+ | |
+ /* make sure config directory exists */ | |
+ if (stat(pthis->dir, &st) != 0) { | |
+ if (errno != ENOENT || | |
+ mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0) | |
+ goto bail; | |
+ } else { | |
+ if (!S_ISDIR(st.st_mode)) | |
+ goto bail; | |
+ } | |
+ | |
+ /* unveil(2), TODO: rework later */ | |
+ /* TODO: permission depending on mode */ | |
+ char listpath[PATH_MAX]; | |
+ snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "goodlist"); | |
+ if (unveil(listpath, "rw") == -1) { | |
+ perror("unveil()"); | |
+ exit(2); | |
+ } | |
+ snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "spamlist"); | |
+ if (unveil(listpath, "rw") == -1) { | |
+ perror("unveil()"); | |
+ exit(2); | |
+ } | |
+ if (unveil(NULL, NULL) == -1) { | |
+ perror("unveil()"); | |
+ exit(2); | |
+ } | |
+ | |
+ return (dbh_t *)pthis; | |
+ | |
+bail: | |
+ if (pthis) { | |
+ if (pthis->dir) | |
+ free(pthis->dir); | |
+ free(pthis); | |
+ } | |
+ | |
+ return NULL; | |
+} | |
+ | |
+static void | |
+dbtext_table_setsize(dbttext_t * pthis, uint nsize) | |
+{ | |
+ uint nnewalloc; | |
+ rec_t *pnewitems; | |
+ uint n; | |
+ | |
+ if (nsize <= pthis->nalloc) | |
+ return; | |
+ | |
+ nnewalloc = pthis->nalloc * 2; | |
+ if (nnewalloc < nsize) | |
+ nnewalloc = nsize; | |
+ pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t)… | |
+ if (pnewitems == NULL) { | |
+ exit(2); | |
+ } | |
+ for (n = pthis->nitems; n < nsize; n++) { | |
+ str_create(&pnewitems[n].w); | |
+ pnewitems[n].n = 0; | |
+ } | |
+ pthis->pitems = pnewitems; | |
+ pthis->nalloc = nnewalloc; | |
+} | |
+ | |
+bool_t | |
+dbtext_db_close(dbhtext_t * pthis) | |
+{ | |
+ free(pthis->dir); | |
+ pthis->dir = NULL; | |
+ return true; | |
+} | |
+ | |
+dbt_t * | |
+dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly) | |
+{ | |
+ dbttext_t *ptable = NULL; | |
+ | |
+#ifndef NOLOCK | |
+ struct flock lock; | |
+ | |
+#endif /* ndef NOLOCK */ | |
+ char szpath[PATH_MAX]; | |
+ int flags, ret; | |
+ struct stat st; | |
+ char *pbegin; | |
+ char *pend; | |
+ rec_t r; | |
+ uint pos; | |
+ | |
+ if (pthis->dir == NULL) | |
+ goto bail; | |
+ | |
+ if ((ptable = malloc(sizeof(dbttext_t))) == NULL) { | |
+ perror("malloc()"); | |
+ goto bail; | |
+ } | |
+ ptable->close = dbtext_table_close; | |
+ ptable->mergeclose = dbtext_table_mergeclose; | |
+ ptable->unmergeclose = dbtext_table_unmergeclose; | |
+ ptable->getmsgcount = dbtext_table_getmsgcount; | |
+ ptable->getcount = dbtext_table_getcount; | |
+ ptable->fd = -1; | |
+ ptable->pbuf = NULL; | |
+ ptable->nmsgs = 0; | |
+ ptable->nalloc = 0; | |
+ ptable->nitems = 0; | |
+ ptable->pitems = NULL; | |
+ | |
+ ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table); | |
+ if (ret == -1 || (size_t)ret >= sizeof(szpath)) { | |
+ fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, tabl… | |
+ goto bail; | |
+ } | |
+ | |
+ flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR); | |
+ if ((ptable->fd = open(szpath, flags, 0644)) == -1) { | |
+ perror("open()"); | |
+ goto bail; | |
+ } | |
+ | |
+#ifndef NOLOCK | |
+ memset(&lock, 0, sizeof(lock)); | |
+ lock.l_type = rdonly ? F_RDLCK : F_WRLCK; | |
+ lock.l_start = 0; | |
+ lock.l_whence = SEEK_SET; | |
+ lock.l_len = 0; | |
+ fcntl(ptable->fd, F_SETLKW, &lock); | |
+#endif /* ndef NOLOCK */ | |
+ | |
+ if (fstat(ptable->fd, &st) != 0) { | |
+ perror("fstat()"); | |
+ goto bail_uc; | |
+ } | |
+ if (st.st_size == 0) { | |
+ return (dbt_t *) ptable; | |
+ } | |
+ ptable->pbuf = (char *) malloc(st.st_size); | |
+ if (ptable->pbuf == NULL) { | |
+ perror("malloc()"); | |
+ goto bail_uc; | |
+ } | |
+ if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) { | |
+ perror("read()"); | |
+ goto bail_fuc; | |
+ } | |
+ /* XXX: bogofilter compatibility */ | |
+ if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) { | |
+ goto bail_fuc; | |
+ } | |
+ pbegin = ptable->pbuf; | |
+ while (*pbegin != '\n') | |
+ pbegin++; | |
+ pbegin++; | |
+ | |
+ pos = 0; | |
+ while (pbegin < ptable->pbuf + st.st_size) { | |
+ pend = pbegin; | |
+ r.w.p = pbegin; | |
+ r.w.len = 0; | |
+ r.n = 0; | |
+ | |
+ while (*pend != '\n') { | |
+ if (pend >= ptable->pbuf + st.st_size) { | |
+ goto bail_fuc; | |
+ } | |
+ *pend = tolower(*pend); | |
+ if (*pend == ' ') { | |
+ r.w.len = (pend - pbegin); | |
+ r.n = strtol(pend + 1, NULL, 10); | |
+ } | |
+ pend++; | |
+ } | |
+ if (pend > pbegin && *pbegin != '#' && *pbegin != ';') { | |
+ if (r.w.len == 0 || r.w.len > MAXWORDLEN) { | |
+ fprintf(stderr, "dbh_loadfile: bad file format… | |
+ goto bail_fuc; | |
+ } | |
+ dbtext_table_setsize(ptable, pos + 1); | |
+ ptable->pitems[pos++] = r; | |
+ ptable->nitems = pos; | |
+ } | |
+ pbegin = pend + 1; | |
+ } | |
+ | |
+ if (rdonly) { | |
+#ifndef NOLOCK | |
+ lock.l_type = F_UNLCK; | |
+ fcntl(ptable->fd, F_SETLKW, &lock); | |
+#endif /* ndef NOLOCK */ | |
+ close(ptable->fd); | |
+ ptable->fd = -1; | |
+ } | |
+ return (dbt_t *) ptable; | |
+ | |
+bail_fuc: | |
+ free(ptable->pbuf); | |
+ | |
+bail_uc: | |
+#ifndef NOLOCK | |
+ lock.l_type = F_UNLCK; | |
+ fcntl(ptable->fd, F_SETLKW, &lock); | |
+#endif /* ndef NOLOCK */ | |
+ | |
+ close(ptable->fd); | |
+ ptable->fd = -1; | |
+ | |
+bail: | |
+ free(ptable); | |
+ return NULL; | |
+} | |
+ | |
+bool_t | |
+dbtext_table_close(dbttext_t * pthis) | |
+{ | |
+ struct flock lockall; | |
+ | |
+ free(pthis->pbuf); | |
+ pthis->pbuf = NULL; | |
+ free(pthis->pitems); | |
+ pthis->pitems = NULL; | |
+ | |
+ if (pthis->fd != -1) { | |
+#ifndef NOLOCK | |
+ memset(&lockall, 0, sizeof(lockall)); | |
+ lockall.l_type = F_UNLCK; | |
+ lockall.l_start = 0; | |
+ lockall.l_whence = SEEK_SET; | |
+ lockall.l_len = 0; | |
+ fcntl(pthis->fd, F_SETLKW, &lockall); | |
+#endif /* ndef NOLOCK */ | |
+ close(pthis->fd); | |
+ pthis->fd = -1; | |
+ } | |
+ return true; | |
+} | |
+ | |
+bool_t | |
+dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) | |
+{ | |
+ /* note that we require both vectors to be sorted */ | |
+ | |
+ uint pos; | |
+ rec_t *prec; | |
+ veciter_t msgiter; | |
+ str_t *pmsgstr; | |
+ uint count; | |
+ char iobuf[IOBUFSIZE]; | |
+ char *p; | |
+ | |
+ if (pthis->fd == -1) { | |
+ return false; | |
+ } | |
+ ftruncate(pthis->fd, 0); | |
+ lseek(pthis->fd, 0, SEEK_SET); | |
+ | |
+ pthis->nmsgs++; | |
+ | |
+ p = iobuf; | |
+ p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs); | |
+ | |
+ vec_first(pmsg, &msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ | |
+ pos = 0; | |
+ while (pos < pthis->nitems || pmsgstr != NULL) { | |
+ int cmp = 0; | |
+ | |
+ prec = &pthis->pitems[pos]; | |
+ if (pmsgstr != NULL && pos < pthis->nitems) { | |
+ cmp = str_casecmp(&prec->w, pmsgstr); | |
+ } else { | |
+ /* we exhausted one list or the other (but not both) */ | |
+ cmp = (pos < pthis->nitems) ? -1 : 1; | |
+ } | |
+ if (cmp < 0) { | |
+ /* write existing str */ | |
+ count = prec->n; | |
+ strncpylwr(p, prec->w.p, prec->w.len); | |
+ p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ pos++; | |
+ } else if (cmp == 0) { | |
+ /* same str, merge and write sum */ | |
+ count = db_getnewcount(&msgiter); | |
+ count += prec->n; | |
+ strncpylwr(p, prec->w.p, prec->w.len); | |
+ p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ pos++; | |
+ veciter_next(&msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ } else { /* cmp > 0 */ | |
+ /* write new str */ | |
+ count = db_getnewcount(&msgiter); | |
+ strncpylwr(p, pmsgstr->p, pmsgstr->len); | |
+ p += pmsgstr->len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ veciter_next(&msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ } | |
+ | |
+ if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) { | |
+ write(pthis->fd, iobuf, p - iobuf); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ if (p != iobuf) { | |
+ write(pthis->fd, iobuf, p - iobuf); | |
+ } | |
+ veciter_destroy(&msgiter); | |
+ return dbtext_table_close(pthis); | |
+} | |
+ | |
+bool_t | |
+dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) | |
+{ | |
+ /* note that we require both vectors to be sorted */ | |
+ | |
+ uint pos; | |
+ rec_t *prec; | |
+ veciter_t msgiter; | |
+ str_t *pmsgstr; | |
+ uint count; | |
+ char iobuf[IOBUFSIZE]; | |
+ char *p; | |
+ | |
+ if (pthis->fd == -1) { | |
+ return false; | |
+ } | |
+ ftruncate(pthis->fd, 0); | |
+ lseek(pthis->fd, 0, SEEK_SET); | |
+ | |
+ pthis->nmsgs--; | |
+ | |
+ p = iobuf; | |
+ p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs); | |
+ | |
+ vec_first(pmsg, &msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ | |
+ pos = 0; | |
+ while (pos < pthis->nitems || pmsgstr != NULL) { | |
+ int cmp = 0; | |
+ | |
+ prec = &pthis->pitems[pos]; | |
+ if (pmsgstr != NULL && pos < pthis->nitems) { | |
+ cmp = str_casecmp(&prec->w, pmsgstr); | |
+ } else { | |
+ /* we exhausted one list or the other (but not both) */ | |
+ cmp = (pos < pthis->nitems) ? -1 : 1; | |
+ } | |
+ if (cmp < 0) { | |
+ /* write existing str */ | |
+ count = prec->n; | |
+ strncpylwr(p, prec->w.p, prec->w.len); | |
+ p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ pos++; | |
+ } else if (cmp == 0) { | |
+ /* same str, merge and write difference */ | |
+ count = db_getnewcount(&msgiter); | |
+ count = (prec->n > count) ? (prec->n - count) : 0; | |
+ strncpylwr(p, prec->w.p, prec->w.len); | |
+ p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ pos++; | |
+ veciter_next(&msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ } else { /* cmp > 0 */ | |
+ /* this should not happen, so write with count=0 */ | |
+ db_getnewcount(&msgiter); | |
+ count = 0; | |
+ strncpylwr(p, pmsgstr->p, pmsgstr->len); | |
+ p += pmsgstr->len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ veciter_next(&msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ } | |
+ | |
+ if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) { | |
+ write(pthis->fd, iobuf, p - iobuf); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ if (p != iobuf) { | |
+ write(pthis->fd, iobuf, p - iobuf); | |
+ } | |
+ veciter_destroy(&msgiter); | |
+ return dbtext_table_close(pthis); | |
+} | |
+ | |
+uint | |
+dbtext_table_getmsgcount(dbttext_t * pthis) | |
+{ | |
+ return pthis->nmsgs; | |
+} | |
+ | |
+uint | |
+dbtext_table_getcount(dbttext_t * pthis, str_t * pword) | |
+{ | |
+ int lo, hi, mid; | |
+ | |
+ if (pthis->nitems == 0) { | |
+ return 0; | |
+ } | |
+ hi = pthis->nitems - 1; | |
+ lo = -1; | |
+ while (hi - lo > 1) { | |
+ mid = (hi + lo) / 2; | |
+ if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ | |
+ if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) { | |
+ return 0; | |
+ } | |
+ return pthis->pitems[hi].n; | |
} | |
diff --git a/dbh.h b/dbh.h | |
@@ -10,16 +10,14 @@ | |
#ifndef _DBH_H | |
#define _DBH_H | |
-/* database formats */ | |
-typedef enum { | |
- db_text /* flat text */ | |
-} dbfmt_t; | |
+#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n" | |
+#define TEXTDB_MAXLINELEN (MAXWORDLEN+32) | |
/* record/field structure */ | |
typedef struct _rec { | |
str_t w; | |
uint n; | |
-} rec_t; | |
+} rec_t; | |
/* database table */ | |
typedef struct _dbt dbt_t; | |
@@ -38,11 +36,42 @@ struct _dbh { | |
dbt_t *(*opentable) (dbh_t *, cpchar, bool_t); | |
}; | |
-dbh_t *dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cp… | |
+typedef struct _dbttext dbttext_t; | |
+struct _dbttext | |
+{ | |
+ bool_t (*close)(dbttext_t*); | |
+ bool_t (*mergeclose)(dbttext_t*,vec_t*); | |
+ bool_t (*unmergeclose)(dbttext_t*,vec_t*); | |
+ uint (*getmsgcount)(dbttext_t*); | |
+ uint (*getcount)(dbttext_t*,str_t*); | |
-#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n" | |
-#define TEXTDB_MAXLINELEN (MAXWORDLEN+32) | |
+ int fd; /* file descriptor, if currently open */ | |
+ char* pbuf; /* data buffer, if currently open */ | |
+ uint nmsgs; /* number of messages represented in list */ | |
+ uint nalloc; /* items alloced in pitems */ | |
+ uint nitems; /* items available */ | |
+ rec_t* pitems; /* growing vector of items */ | |
+}; | |
+ | |
+typedef struct _dbhtext dbhtext_t; | |
+struct _dbhtext | |
+{ | |
+ bool_t (*close)(dbhtext_t*); | |
+ dbt_t* (*opentable)(dbhtext_t*,cpchar,bool_t); | |
+ | |
+ char* dir; | |
+}; | |
uint db_getnewcount(veciter_t * piter); | |
+dbh_t* dbtext_db_open(cpchar dbname); | |
+bool_t dbtext_db_close( dbhtext_t* pthis ); | |
+dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly ); | |
+ | |
+bool_t dbtext_table_close( dbttext_t* pthis ); | |
+bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg ); | |
+bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg ); | |
+uint dbtext_table_getmsgcount( dbttext_t* pthis ); | |
+uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword ); | |
+ | |
#endif /* ndef _DBH_H */ | |
diff --git a/dbtext.c b/dbtext.c | |
@@ -1,490 +0,0 @@ | |
-/* $Id: dbtext.c,v 1.12 2002/10/19 09:59:35 tommy Exp $ */ | |
- | |
-/* | |
- * Copyright (c) 2002 Tom Marshall <[email protected]> | |
- * | |
- * This program is free software. It may be distributed under the terms | |
- * in the file LICENSE, found in the top level of the distribution. | |
- * | |
- * dbtext.c: flatfile database handler | |
- */ | |
- | |
-#include "config.h" | |
-#include "dbg.h" | |
-#include "str.h" | |
-#include "lex.h" | |
-#include "vec.h" | |
- | |
-#include "dbh.h" | |
-#include "dbtext.h" | |
- | |
-static void | |
-dbtext_table_setsize(dbttext_t * pthis, uint nsize) | |
-{ | |
- uint nnewalloc; | |
- rec_t *pnewitems; | |
- uint n; | |
- | |
- if (nsize <= pthis->nalloc) | |
- return; | |
- | |
- nnewalloc = pthis->nalloc * 2; | |
- if (nnewalloc < nsize) | |
- nnewalloc = nsize; | |
- pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t)… | |
- if (pnewitems == NULL) { | |
- exit(2); | |
- } | |
- for (n = pthis->nitems; n < nsize; n++) { | |
- str_create(&pnewitems[n].w); | |
- pnewitems[n].n = 0; | |
- } | |
- pthis->pitems = pnewitems; | |
- pthis->nalloc = nnewalloc; | |
-} | |
- | |
-dbh_t * | |
-dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass) | |
-{ | |
- dbhtext_t *pthis = NULL; | |
- uint dirlen; | |
- cpchar phome; | |
- struct stat st; | |
- | |
- if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) { | |
- perror("malloc()"); | |
- goto bail; | |
- } | |
- | |
- pthis->close = dbtext_db_close; | |
- pthis->opentable = dbtext_db_opentable; | |
- | |
- if (dbname != NULL && *dbname != '\0') { | |
- dirlen = strlen(dbname); | |
- if ((pthis->dir = strdup(dbname)) == NULL) { | |
- perror("strdup()"); | |
- goto bail; | |
- } | |
- if (dirlen && pthis->dir[dirlen - 1] == '/') | |
- pthis->dir[--dirlen] = '\0'; | |
- } else { | |
- phome = getenv("HOME"); | |
- if (phome == NULL || *phome == '\0') { | |
- phome = "."; | |
- } | |
- dirlen = strlen(phome) + 5 + 1; | |
- if ((pthis->dir = malloc(dirlen)) == NULL) | |
- goto bail; | |
- | |
- /* NOTE: no truncation possible */ | |
- snprintf(pthis->dir, dirlen, "%s/.bmf", phome); | |
- } | |
- | |
- /* make sure config directory exists */ | |
- if (stat(pthis->dir, &st) != 0) { | |
- if (errno != ENOENT || | |
- mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0) | |
- goto bail; | |
- } else { | |
- if (!S_ISDIR(st.st_mode)) | |
- goto bail; | |
- } | |
- | |
- /* unveil(2), TODO: rework later */ | |
- /* TODO: permission depending on mode */ | |
- char listpath[PATH_MAX]; | |
- snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "goodlist"); | |
- if (unveil(listpath, "rw") == -1) { | |
- perror("unveil()"); | |
- exit(2); | |
- } | |
- snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "spamlist"); | |
- if (unveil(listpath, "rw") == -1) { | |
- perror("unveil()"); | |
- exit(2); | |
- } | |
- if (unveil(NULL, NULL) == -1) { | |
- perror("unveil()"); | |
- exit(2); | |
- } | |
- | |
- return (dbh_t *)pthis; | |
- | |
-bail: | |
- if (pthis) { | |
- if (pthis->dir) | |
- free(pthis->dir); | |
- free(pthis); | |
- } | |
- | |
- return NULL; | |
-} | |
- | |
-bool_t | |
-dbtext_db_close(dbhtext_t * pthis) | |
-{ | |
- free(pthis->dir); | |
- pthis->dir = NULL; | |
- return true; | |
-} | |
- | |
-dbt_t * | |
-dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly) | |
-{ | |
- dbttext_t *ptable = NULL; | |
- | |
-#ifndef NOLOCK | |
- struct flock lock; | |
- | |
-#endif /* ndef NOLOCK */ | |
- char szpath[PATH_MAX]; | |
- int flags, ret; | |
- struct stat st; | |
- char *pbegin; | |
- char *pend; | |
- rec_t r; | |
- uint pos; | |
- | |
- if (pthis->dir == NULL) | |
- goto bail; | |
- | |
- if ((ptable = malloc(sizeof(dbttext_t))) == NULL) { | |
- perror("malloc()"); | |
- goto bail; | |
- } | |
- ptable->close = dbtext_table_close; | |
- ptable->mergeclose = dbtext_table_mergeclose; | |
- ptable->unmergeclose = dbtext_table_unmergeclose; | |
- ptable->getmsgcount = dbtext_table_getmsgcount; | |
- ptable->getcount = dbtext_table_getcount; | |
- ptable->fd = -1; | |
- ptable->pbuf = NULL; | |
- ptable->nmsgs = 0; | |
- ptable->nalloc = 0; | |
- ptable->nitems = 0; | |
- ptable->pitems = NULL; | |
- | |
- ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table); | |
- if (ret == -1 || (size_t)ret >= sizeof(szpath)) { | |
- fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, tabl… | |
- goto bail; | |
- } | |
- | |
- flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR); | |
- if ((ptable->fd = open(szpath, flags, 0644)) == -1) { | |
- perror("open()"); | |
- goto bail; | |
- } | |
- | |
-#ifndef NOLOCK | |
- memset(&lock, 0, sizeof(lock)); | |
- lock.l_type = rdonly ? F_RDLCK : F_WRLCK; | |
- lock.l_start = 0; | |
- lock.l_whence = SEEK_SET; | |
- lock.l_len = 0; | |
- fcntl(ptable->fd, F_SETLKW, &lock); | |
-#endif /* ndef NOLOCK */ | |
- | |
- if (fstat(ptable->fd, &st) != 0) { | |
- perror("fstat()"); | |
- goto bail_uc; | |
- } | |
- if (st.st_size == 0) { | |
- return (dbt_t *) ptable; | |
- } | |
- ptable->pbuf = (char *) malloc(st.st_size); | |
- if (ptable->pbuf == NULL) { | |
- perror("malloc()"); | |
- goto bail_uc; | |
- } | |
- if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) { | |
- perror("read()"); | |
- goto bail_fuc; | |
- } | |
- /* XXX: bogofilter compatibility */ | |
- if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) { | |
- goto bail_fuc; | |
- } | |
- pbegin = ptable->pbuf; | |
- while (*pbegin != '\n') | |
- pbegin++; | |
- pbegin++; | |
- | |
- pos = 0; | |
- while (pbegin < ptable->pbuf + st.st_size) { | |
- pend = pbegin; | |
- r.w.p = pbegin; | |
- r.w.len = 0; | |
- r.n = 0; | |
- | |
- while (*pend != '\n') { | |
- if (pend >= ptable->pbuf + st.st_size) { | |
- goto bail_fuc; | |
- } | |
- *pend = tolower(*pend); | |
- if (*pend == ' ') { | |
- r.w.len = (pend - pbegin); | |
- r.n = strtol(pend + 1, NULL, 10); | |
- } | |
- pend++; | |
- } | |
- if (pend > pbegin && *pbegin != '#' && *pbegin != ';') { | |
- if (r.w.len == 0 || r.w.len > MAXWORDLEN) { | |
- fprintf(stderr, "dbh_loadfile: bad file format… | |
- goto bail_fuc; | |
- } | |
- dbtext_table_setsize(ptable, pos + 1); | |
- ptable->pitems[pos++] = r; | |
- ptable->nitems = pos; | |
- } | |
- pbegin = pend + 1; | |
- } | |
- | |
- if (rdonly) { | |
-#ifndef NOLOCK | |
- lock.l_type = F_UNLCK; | |
- fcntl(ptable->fd, F_SETLKW, &lock); | |
-#endif /* ndef NOLOCK */ | |
- close(ptable->fd); | |
- ptable->fd = -1; | |
- } | |
- return (dbt_t *) ptable; | |
- | |
-bail_fuc: | |
- free(ptable->pbuf); | |
- | |
-bail_uc: | |
-#ifndef NOLOCK | |
- lock.l_type = F_UNLCK; | |
- fcntl(ptable->fd, F_SETLKW, &lock); | |
-#endif /* ndef NOLOCK */ | |
- | |
- close(ptable->fd); | |
- ptable->fd = -1; | |
- | |
-bail: | |
- free(ptable); | |
- return NULL; | |
-} | |
- | |
-bool_t | |
-dbtext_table_close(dbttext_t * pthis) | |
-{ | |
- struct flock lockall; | |
- | |
- free(pthis->pbuf); | |
- pthis->pbuf = NULL; | |
- free(pthis->pitems); | |
- pthis->pitems = NULL; | |
- | |
- if (pthis->fd != -1) { | |
-#ifndef NOLOCK | |
- memset(&lockall, 0, sizeof(lockall)); | |
- lockall.l_type = F_UNLCK; | |
- lockall.l_start = 0; | |
- lockall.l_whence = SEEK_SET; | |
- lockall.l_len = 0; | |
- fcntl(pthis->fd, F_SETLKW, &lockall); | |
-#endif /* ndef NOLOCK */ | |
- close(pthis->fd); | |
- pthis->fd = -1; | |
- } | |
- return true; | |
-} | |
- | |
-bool_t | |
-dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) | |
-{ | |
- /* note that we require both vectors to be sorted */ | |
- | |
- uint pos; | |
- rec_t *prec; | |
- veciter_t msgiter; | |
- str_t *pmsgstr; | |
- uint count; | |
- char iobuf[IOBUFSIZE]; | |
- char *p; | |
- | |
- if (pthis->fd == -1) { | |
- return false; | |
- } | |
- ftruncate(pthis->fd, 0); | |
- lseek(pthis->fd, 0, SEEK_SET); | |
- | |
- pthis->nmsgs++; | |
- | |
- p = iobuf; | |
- p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs); | |
- | |
- vec_first(pmsg, &msgiter); | |
- pmsgstr = veciter_get(&msgiter); | |
- | |
- pos = 0; | |
- while (pos < pthis->nitems || pmsgstr != NULL) { | |
- int cmp = 0; | |
- | |
- prec = &pthis->pitems[pos]; | |
- if (pmsgstr != NULL && pos < pthis->nitems) { | |
- cmp = str_casecmp(&prec->w, pmsgstr); | |
- } else { | |
- /* we exhausted one list or the other (but not both) */ | |
- cmp = (pos < pthis->nitems) ? -1 : 1; | |
- } | |
- if (cmp < 0) { | |
- /* write existing str */ | |
- count = prec->n; | |
- strncpylwr(p, prec->w.p, prec->w.len); | |
- p += prec->w.len; | |
- *p++ = ' '; | |
- p += sprintf(p, "%u\n", count); | |
- | |
- pos++; | |
- } else if (cmp == 0) { | |
- /* same str, merge and write sum */ | |
- count = db_getnewcount(&msgiter); | |
- count += prec->n; | |
- strncpylwr(p, prec->w.p, prec->w.len); | |
- p += prec->w.len; | |
- *p++ = ' '; | |
- p += sprintf(p, "%u\n", count); | |
- | |
- pos++; | |
- veciter_next(&msgiter); | |
- pmsgstr = veciter_get(&msgiter); | |
- } else { /* cmp > 0 */ | |
- /* write new str */ | |
- count = db_getnewcount(&msgiter); | |
- strncpylwr(p, pmsgstr->p, pmsgstr->len); | |
- p += pmsgstr->len; | |
- *p++ = ' '; | |
- p += sprintf(p, "%u\n", count); | |
- | |
- veciter_next(&msgiter); | |
- pmsgstr = veciter_get(&msgiter); | |
- } | |
- | |
- if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) { | |
- write(pthis->fd, iobuf, p - iobuf); | |
- p = iobuf; | |
- } | |
- } | |
- if (p != iobuf) { | |
- write(pthis->fd, iobuf, p - iobuf); | |
- } | |
- veciter_destroy(&msgiter); | |
- return dbtext_table_close(pthis); | |
-} | |
- | |
-bool_t | |
-dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) | |
-{ | |
- /* note that we require both vectors to be sorted */ | |
- | |
- uint pos; | |
- rec_t *prec; | |
- veciter_t msgiter; | |
- str_t *pmsgstr; | |
- uint count; | |
- char iobuf[IOBUFSIZE]; | |
- char *p; | |
- | |
- if (pthis->fd == -1) { | |
- return false; | |
- } | |
- ftruncate(pthis->fd, 0); | |
- lseek(pthis->fd, 0, SEEK_SET); | |
- | |
- pthis->nmsgs--; | |
- | |
- p = iobuf; | |
- p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs); | |
- | |
- vec_first(pmsg, &msgiter); | |
- pmsgstr = veciter_get(&msgiter); | |
- | |
- pos = 0; | |
- while (pos < pthis->nitems || pmsgstr != NULL) { | |
- int cmp = 0; | |
- | |
- prec = &pthis->pitems[pos]; | |
- if (pmsgstr != NULL && pos < pthis->nitems) { | |
- cmp = str_casecmp(&prec->w, pmsgstr); | |
- } else { | |
- /* we exhausted one list or the other (but not both) */ | |
- cmp = (pos < pthis->nitems) ? -1 : 1; | |
- } | |
- if (cmp < 0) { | |
- /* write existing str */ | |
- count = prec->n; | |
- strncpylwr(p, prec->w.p, prec->w.len); | |
- p += prec->w.len; | |
- *p++ = ' '; | |
- p += sprintf(p, "%u\n", count); | |
- | |
- pos++; | |
- } else if (cmp == 0) { | |
- /* same str, merge and write difference */ | |
- count = db_getnewcount(&msgiter); | |
- count = (prec->n > count) ? (prec->n - count) : 0; | |
- strncpylwr(p, prec->w.p, prec->w.len); | |
- p += prec->w.len; | |
- *p++ = ' '; | |
- p += sprintf(p, "%u\n", count); | |
- | |
- pos++; | |
- veciter_next(&msgiter); | |
- pmsgstr = veciter_get(&msgiter); | |
- } else { /* cmp > 0 */ | |
- /* this should not happen, so write with count=0 */ | |
- db_getnewcount(&msgiter); | |
- count = 0; | |
- strncpylwr(p, pmsgstr->p, pmsgstr->len); | |
- p += pmsgstr->len; | |
- *p++ = ' '; | |
- p += sprintf(p, "%u\n", count); | |
- | |
- veciter_next(&msgiter); | |
- pmsgstr = veciter_get(&msgiter); | |
- } | |
- | |
- if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) { | |
- write(pthis->fd, iobuf, p - iobuf); | |
- p = iobuf; | |
- } | |
- } | |
- if (p != iobuf) { | |
- write(pthis->fd, iobuf, p - iobuf); | |
- } | |
- veciter_destroy(&msgiter); | |
- return dbtext_table_close(pthis); | |
-} | |
- | |
-uint | |
-dbtext_table_getmsgcount(dbttext_t * pthis) | |
-{ | |
- return pthis->nmsgs; | |
-} | |
- | |
-uint | |
-dbtext_table_getcount(dbttext_t * pthis, str_t * pword) | |
-{ | |
- int lo, hi, mid; | |
- | |
- if (pthis->nitems == 0) { | |
- return 0; | |
- } | |
- hi = pthis->nitems - 1; | |
- lo = -1; | |
- while (hi - lo > 1) { | |
- mid = (hi + lo) / 2; | |
- if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0) | |
- hi = mid; | |
- else | |
- lo = mid; | |
- } | |
- | |
- if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) { | |
- return 0; | |
- } | |
- return pthis->pitems[hi].n; | |
-} | |
diff --git a/dbtext.h b/dbtext.h | |
@@ -1,49 +0,0 @@ | |
-/* $Id: dbtext.h,v 1.3 2002/10/02 04:45:40 tommy Exp $ */ | |
- | |
-/* | |
- * Copyright (c) 2002 Tom Marshall <[email protected]> | |
- * | |
- * This program is free software. It may be distributed under the terms | |
- * in the file LICENSE, found in the top level of the distribution. | |
- */ | |
- | |
-#ifndef _DBTEXT_H | |
-#define _DBTEXT_H | |
- | |
-typedef struct _dbttext dbttext_t; | |
-struct _dbttext | |
-{ | |
- bool_t (*close)(dbttext_t*); | |
- bool_t (*mergeclose)(dbttext_t*,vec_t*); | |
- bool_t (*unmergeclose)(dbttext_t*,vec_t*); | |
- uint (*getmsgcount)(dbttext_t*); | |
- uint (*getcount)(dbttext_t*,str_t*); | |
- | |
- int fd; /* file descriptor, if currently open */ | |
- char* pbuf; /* data buffer, if currently open */ | |
- uint nmsgs; /* number of messages represented in list */ | |
- uint nalloc; /* items alloced in pitems */ | |
- uint nitems; /* items available */ | |
- rec_t* pitems; /* growing vector of items */ | |
-}; | |
- | |
-typedef struct _dbhtext dbhtext_t; | |
-struct _dbhtext | |
-{ | |
- bool_t (*close)(dbhtext_t*); | |
- dbt_t* (*opentable)(dbhtext_t*,cpchar,bool_t); | |
- | |
- char* dir; | |
-}; | |
- | |
-dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db… | |
-bool_t dbtext_db_close( dbhtext_t* pthis ); | |
-dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly ); | |
- | |
-bool_t dbtext_table_close( dbttext_t* pthis ); | |
-bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg ); | |
-bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg ); | |
-uint dbtext_table_getmsgcount( dbttext_t* pthis ); | |
-uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword ); | |
- | |
-#endif /* ndef _DBTEXT_H */ | |
diff --git a/filt.h b/filt.h | |
@@ -10,17 +10,21 @@ | |
#ifndef _FILT_H | |
#define _FILT_H | |
+#include "lex.h" | |
+#include "str.h" | |
+#include "vec.h" | |
+ | |
typedef struct | |
{ | |
- str_t key; | |
- double prob; | |
+ str_t key; | |
+ double prob; | |
} discrim_t; | |
typedef struct | |
{ | |
- double spamicity; | |
- uint keepers; | |
- discrim_t* extrema; | |
+ double spamicity; | |
+ uint keepers; | |
+ discrim_t* extrema; | |
} stats_t; | |
void statdump( stats_t* pstat, int fd ); |