Introduction
Introduction Statistics Contact Development Disclaimer Help
merge dbh and dbtext (WIP) - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patc…
git clone git://git.codemadness.org/bmf
Log
Files
Refs
README
LICENSE
---
commit 60b437c6d0bc19fc9f67ca8cfaf6cbfc50d47423
parent 4c3c79f49125ef555fba1df7f6cbab2c7b26ea00
Author: Hiltjo Posthuma <[email protected]>
Date: Sat, 27 Oct 2018 19:31:30 +0200
merge dbh and dbtext (WIP)
Diffstat:
M Makefile | 2 --
M bmf.c | 3 +--
M dbh.c | 474 +++++++++++++++++++++++++++++…
M dbh.h | 45 +++++++++++++++++++++++++----…
D dbtext.c | 490 -----------------------------…
D dbtext.h | 49 -----------------------------…
M filt.h | 14 +++++++++-----
7 files changed, 511 insertions(+), 566 deletions(-)
---
diff --git a/Makefile b/Makefile
@@ -9,7 +9,6 @@ SRC = \
bmf.c \
dbg.c \
dbh.c \
- dbtext.c \
filt.c \
lex.c \
str.c \
@@ -21,7 +20,6 @@ HDR = \
config.h \
dbg.h \
dbh.h \
- dbtext.h \
filt.h \
lex.h \
str.h \
diff --git a/bmf.c b/bmf.c
@@ -75,7 +75,6 @@ version(void)
int
main(int argc, char **argv)
{
- dbfmt_t dbfmt = db_text;
char *dbname = NULL;
bool_t rdonly;
runmode_t mode = mode_normal;
@@ -152,7 +151,7 @@ main(int argc, char **argv)
}
stats.extrema = (discrim_t *) malloc(stats.keepers * sizeof(discrim_t)…
- pdb = dbh_open(dbfmt, "localhost", dbname, "", "");
+ pdb = dbh_open(dbname);
if (pdb == NULL) {
fprintf(stderr, "%s: cannot open database\n", argv[0]);
exit(2);
diff --git a/dbh.c b/dbh.c
@@ -16,7 +16,6 @@
#include "vec.h"
#include "dbh.h"
-#include "dbtext.h"
/*
* get count for new (incoming) word. there may be duplicate entries for the
@@ -50,17 +49,472 @@ db_getnewcount(veciter_t * piter)
}
dbh_t *
-dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db…
+dbh_open(cpchar dbname)
{
- dbh_t *pdb;
+ dbhtext_t *pthis = NULL;
+ uint dirlen;
+ cpchar phome;
+ struct stat st;
- switch (dbfmt) {
- case db_text:
- pdb = (dbh_t *) dbtext_db_open(dbhost, dbname, dbuser, dbpass);
- break;
- default:
- break;
+ if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
+ perror("malloc()");
+ goto bail;
}
- return pdb;
+ pthis->close = dbtext_db_close;
+ pthis->opentable = dbtext_db_opentable;
+
+ if (dbname != NULL && *dbname != '\0') {
+ dirlen = strlen(dbname);
+ if ((pthis->dir = strdup(dbname)) == NULL) {
+ perror("strdup()");
+ goto bail;
+ }
+ if (dirlen && pthis->dir[dirlen - 1] == '/')
+ pthis->dir[--dirlen] = '\0';
+ } else {
+ phome = getenv("HOME");
+ if (phome == NULL || *phome == '\0') {
+ phome = ".";
+ }
+ dirlen = strlen(phome) + 5 + 1;
+ if ((pthis->dir = malloc(dirlen)) == NULL)
+ goto bail;
+
+ /* NOTE: no truncation possible */
+ snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
+ }
+
+ /* make sure config directory exists */
+ if (stat(pthis->dir, &st) != 0) {
+ if (errno != ENOENT ||
+ mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
+ goto bail;
+ } else {
+ if (!S_ISDIR(st.st_mode))
+ goto bail;
+ }
+
+ /* unveil(2), TODO: rework later */
+ /* TODO: permission depending on mode */
+ char listpath[PATH_MAX];
+ snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "goodlist");
+ if (unveil(listpath, "rw") == -1) {
+ perror("unveil()");
+ exit(2);
+ }
+ snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "spamlist");
+ if (unveil(listpath, "rw") == -1) {
+ perror("unveil()");
+ exit(2);
+ }
+ if (unveil(NULL, NULL) == -1) {
+ perror("unveil()");
+ exit(2);
+ }
+
+ return (dbh_t *)pthis;
+
+bail:
+ if (pthis) {
+ if (pthis->dir)
+ free(pthis->dir);
+ free(pthis);
+ }
+
+ return NULL;
+}
+
+static void
+dbtext_table_setsize(dbttext_t * pthis, uint nsize)
+{
+ uint nnewalloc;
+ rec_t *pnewitems;
+ uint n;
+
+ if (nsize <= pthis->nalloc)
+ return;
+
+ nnewalloc = pthis->nalloc * 2;
+ if (nnewalloc < nsize)
+ nnewalloc = nsize;
+ pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t)…
+ if (pnewitems == NULL) {
+ exit(2);
+ }
+ for (n = pthis->nitems; n < nsize; n++) {
+ str_create(&pnewitems[n].w);
+ pnewitems[n].n = 0;
+ }
+ pthis->pitems = pnewitems;
+ pthis->nalloc = nnewalloc;
+}
+
+bool_t
+dbtext_db_close(dbhtext_t * pthis)
+{
+ free(pthis->dir);
+ pthis->dir = NULL;
+ return true;
+}
+
+dbt_t *
+dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
+{
+ dbttext_t *ptable = NULL;
+
+#ifndef NOLOCK
+ struct flock lock;
+
+#endif /* ndef NOLOCK */
+ char szpath[PATH_MAX];
+ int flags, ret;
+ struct stat st;
+ char *pbegin;
+ char *pend;
+ rec_t r;
+ uint pos;
+
+ if (pthis->dir == NULL)
+ goto bail;
+
+ if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
+ perror("malloc()");
+ goto bail;
+ }
+ ptable->close = dbtext_table_close;
+ ptable->mergeclose = dbtext_table_mergeclose;
+ ptable->unmergeclose = dbtext_table_unmergeclose;
+ ptable->getmsgcount = dbtext_table_getmsgcount;
+ ptable->getcount = dbtext_table_getcount;
+ ptable->fd = -1;
+ ptable->pbuf = NULL;
+ ptable->nmsgs = 0;
+ ptable->nalloc = 0;
+ ptable->nitems = 0;
+ ptable->pitems = NULL;
+
+ ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
+ if (ret == -1 || (size_t)ret >= sizeof(szpath)) {
+ fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, tabl…
+ goto bail;
+ }
+
+ flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR);
+ if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
+ perror("open()");
+ goto bail;
+ }
+
+#ifndef NOLOCK
+ memset(&lock, 0, sizeof(lock));
+ lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
+ lock.l_start = 0;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 0;
+ fcntl(ptable->fd, F_SETLKW, &lock);
+#endif /* ndef NOLOCK */
+
+ if (fstat(ptable->fd, &st) != 0) {
+ perror("fstat()");
+ goto bail_uc;
+ }
+ if (st.st_size == 0) {
+ return (dbt_t *) ptable;
+ }
+ ptable->pbuf = (char *) malloc(st.st_size);
+ if (ptable->pbuf == NULL) {
+ perror("malloc()");
+ goto bail_uc;
+ }
+ if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) {
+ perror("read()");
+ goto bail_fuc;
+ }
+ /* XXX: bogofilter compatibility */
+ if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) {
+ goto bail_fuc;
+ }
+ pbegin = ptable->pbuf;
+ while (*pbegin != '\n')
+ pbegin++;
+ pbegin++;
+
+ pos = 0;
+ while (pbegin < ptable->pbuf + st.st_size) {
+ pend = pbegin;
+ r.w.p = pbegin;
+ r.w.len = 0;
+ r.n = 0;
+
+ while (*pend != '\n') {
+ if (pend >= ptable->pbuf + st.st_size) {
+ goto bail_fuc;
+ }
+ *pend = tolower(*pend);
+ if (*pend == ' ') {
+ r.w.len = (pend - pbegin);
+ r.n = strtol(pend + 1, NULL, 10);
+ }
+ pend++;
+ }
+ if (pend > pbegin && *pbegin != '#' && *pbegin != ';') {
+ if (r.w.len == 0 || r.w.len > MAXWORDLEN) {
+ fprintf(stderr, "dbh_loadfile: bad file format…
+ goto bail_fuc;
+ }
+ dbtext_table_setsize(ptable, pos + 1);
+ ptable->pitems[pos++] = r;
+ ptable->nitems = pos;
+ }
+ pbegin = pend + 1;
+ }
+
+ if (rdonly) {
+#ifndef NOLOCK
+ lock.l_type = F_UNLCK;
+ fcntl(ptable->fd, F_SETLKW, &lock);
+#endif /* ndef NOLOCK */
+ close(ptable->fd);
+ ptable->fd = -1;
+ }
+ return (dbt_t *) ptable;
+
+bail_fuc:
+ free(ptable->pbuf);
+
+bail_uc:
+#ifndef NOLOCK
+ lock.l_type = F_UNLCK;
+ fcntl(ptable->fd, F_SETLKW, &lock);
+#endif /* ndef NOLOCK */
+
+ close(ptable->fd);
+ ptable->fd = -1;
+
+bail:
+ free(ptable);
+ return NULL;
+}
+
+bool_t
+dbtext_table_close(dbttext_t * pthis)
+{
+ struct flock lockall;
+
+ free(pthis->pbuf);
+ pthis->pbuf = NULL;
+ free(pthis->pitems);
+ pthis->pitems = NULL;
+
+ if (pthis->fd != -1) {
+#ifndef NOLOCK
+ memset(&lockall, 0, sizeof(lockall));
+ lockall.l_type = F_UNLCK;
+ lockall.l_start = 0;
+ lockall.l_whence = SEEK_SET;
+ lockall.l_len = 0;
+ fcntl(pthis->fd, F_SETLKW, &lockall);
+#endif /* ndef NOLOCK */
+ close(pthis->fd);
+ pthis->fd = -1;
+ }
+ return true;
+}
+
+bool_t
+dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
+{
+ /* note that we require both vectors to be sorted */
+
+ uint pos;
+ rec_t *prec;
+ veciter_t msgiter;
+ str_t *pmsgstr;
+ uint count;
+ char iobuf[IOBUFSIZE];
+ char *p;
+
+ if (pthis->fd == -1) {
+ return false;
+ }
+ ftruncate(pthis->fd, 0);
+ lseek(pthis->fd, 0, SEEK_SET);
+
+ pthis->nmsgs++;
+
+ p = iobuf;
+ p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
+
+ vec_first(pmsg, &msgiter);
+ pmsgstr = veciter_get(&msgiter);
+
+ pos = 0;
+ while (pos < pthis->nitems || pmsgstr != NULL) {
+ int cmp = 0;
+
+ prec = &pthis->pitems[pos];
+ if (pmsgstr != NULL && pos < pthis->nitems) {
+ cmp = str_casecmp(&prec->w, pmsgstr);
+ } else {
+ /* we exhausted one list or the other (but not both) */
+ cmp = (pos < pthis->nitems) ? -1 : 1;
+ }
+ if (cmp < 0) {
+ /* write existing str */
+ count = prec->n;
+ strncpylwr(p, prec->w.p, prec->w.len);
+ p += prec->w.len;
+ *p++ = ' ';
+ p += sprintf(p, "%u\n", count);
+
+ pos++;
+ } else if (cmp == 0) {
+ /* same str, merge and write sum */
+ count = db_getnewcount(&msgiter);
+ count += prec->n;
+ strncpylwr(p, prec->w.p, prec->w.len);
+ p += prec->w.len;
+ *p++ = ' ';
+ p += sprintf(p, "%u\n", count);
+
+ pos++;
+ veciter_next(&msgiter);
+ pmsgstr = veciter_get(&msgiter);
+ } else { /* cmp > 0 */
+ /* write new str */
+ count = db_getnewcount(&msgiter);
+ strncpylwr(p, pmsgstr->p, pmsgstr->len);
+ p += pmsgstr->len;
+ *p++ = ' ';
+ p += sprintf(p, "%u\n", count);
+
+ veciter_next(&msgiter);
+ pmsgstr = veciter_get(&msgiter);
+ }
+
+ if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
+ write(pthis->fd, iobuf, p - iobuf);
+ p = iobuf;
+ }
+ }
+ if (p != iobuf) {
+ write(pthis->fd, iobuf, p - iobuf);
+ }
+ veciter_destroy(&msgiter);
+ return dbtext_table_close(pthis);
+}
+
+bool_t
+dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
+{
+ /* note that we require both vectors to be sorted */
+
+ uint pos;
+ rec_t *prec;
+ veciter_t msgiter;
+ str_t *pmsgstr;
+ uint count;
+ char iobuf[IOBUFSIZE];
+ char *p;
+
+ if (pthis->fd == -1) {
+ return false;
+ }
+ ftruncate(pthis->fd, 0);
+ lseek(pthis->fd, 0, SEEK_SET);
+
+ pthis->nmsgs--;
+
+ p = iobuf;
+ p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
+
+ vec_first(pmsg, &msgiter);
+ pmsgstr = veciter_get(&msgiter);
+
+ pos = 0;
+ while (pos < pthis->nitems || pmsgstr != NULL) {
+ int cmp = 0;
+
+ prec = &pthis->pitems[pos];
+ if (pmsgstr != NULL && pos < pthis->nitems) {
+ cmp = str_casecmp(&prec->w, pmsgstr);
+ } else {
+ /* we exhausted one list or the other (but not both) */
+ cmp = (pos < pthis->nitems) ? -1 : 1;
+ }
+ if (cmp < 0) {
+ /* write existing str */
+ count = prec->n;
+ strncpylwr(p, prec->w.p, prec->w.len);
+ p += prec->w.len;
+ *p++ = ' ';
+ p += sprintf(p, "%u\n", count);
+
+ pos++;
+ } else if (cmp == 0) {
+ /* same str, merge and write difference */
+ count = db_getnewcount(&msgiter);
+ count = (prec->n > count) ? (prec->n - count) : 0;
+ strncpylwr(p, prec->w.p, prec->w.len);
+ p += prec->w.len;
+ *p++ = ' ';
+ p += sprintf(p, "%u\n", count);
+
+ pos++;
+ veciter_next(&msgiter);
+ pmsgstr = veciter_get(&msgiter);
+ } else { /* cmp > 0 */
+ /* this should not happen, so write with count=0 */
+ db_getnewcount(&msgiter);
+ count = 0;
+ strncpylwr(p, pmsgstr->p, pmsgstr->len);
+ p += pmsgstr->len;
+ *p++ = ' ';
+ p += sprintf(p, "%u\n", count);
+
+ veciter_next(&msgiter);
+ pmsgstr = veciter_get(&msgiter);
+ }
+
+ if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
+ write(pthis->fd, iobuf, p - iobuf);
+ p = iobuf;
+ }
+ }
+ if (p != iobuf) {
+ write(pthis->fd, iobuf, p - iobuf);
+ }
+ veciter_destroy(&msgiter);
+ return dbtext_table_close(pthis);
+}
+
+uint
+dbtext_table_getmsgcount(dbttext_t * pthis)
+{
+ return pthis->nmsgs;
+}
+
+uint
+dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
+{
+ int lo, hi, mid;
+
+ if (pthis->nitems == 0) {
+ return 0;
+ }
+ hi = pthis->nitems - 1;
+ lo = -1;
+ while (hi - lo > 1) {
+ mid = (hi + lo) / 2;
+ if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0)
+ hi = mid;
+ else
+ lo = mid;
+ }
+
+ if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
+ return 0;
+ }
+ return pthis->pitems[hi].n;
}
diff --git a/dbh.h b/dbh.h
@@ -10,16 +10,14 @@
#ifndef _DBH_H
#define _DBH_H
-/* database formats */
-typedef enum {
- db_text /* flat text */
-} dbfmt_t;
+#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n"
+#define TEXTDB_MAXLINELEN (MAXWORDLEN+32)
/* record/field structure */
typedef struct _rec {
str_t w;
uint n;
-} rec_t;
+} rec_t;
/* database table */
typedef struct _dbt dbt_t;
@@ -38,11 +36,42 @@ struct _dbh {
dbt_t *(*opentable) (dbh_t *, cpchar, bool_t);
};
-dbh_t *dbh_open(dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, cp…
+typedef struct _dbttext dbttext_t;
+struct _dbttext
+{
+ bool_t (*close)(dbttext_t*);
+ bool_t (*mergeclose)(dbttext_t*,vec_t*);
+ bool_t (*unmergeclose)(dbttext_t*,vec_t*);
+ uint (*getmsgcount)(dbttext_t*);
+ uint (*getcount)(dbttext_t*,str_t*);
-#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n"
-#define TEXTDB_MAXLINELEN (MAXWORDLEN+32)
+ int fd; /* file descriptor, if currently open */
+ char* pbuf; /* data buffer, if currently open */
+ uint nmsgs; /* number of messages represented in list */
+ uint nalloc; /* items alloced in pitems */
+ uint nitems; /* items available */
+ rec_t* pitems; /* growing vector of items */
+};
+
+typedef struct _dbhtext dbhtext_t;
+struct _dbhtext
+{
+ bool_t (*close)(dbhtext_t*);
+ dbt_t* (*opentable)(dbhtext_t*,cpchar,bool_t);
+
+ char* dir;
+};
uint db_getnewcount(veciter_t * piter);
+dbh_t* dbtext_db_open(cpchar dbname);
+bool_t dbtext_db_close( dbhtext_t* pthis );
+dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly );
+
+bool_t dbtext_table_close( dbttext_t* pthis );
+bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg );
+bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg );
+uint dbtext_table_getmsgcount( dbttext_t* pthis );
+uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword );
+
#endif /* ndef _DBH_H */
diff --git a/dbtext.c b/dbtext.c
@@ -1,490 +0,0 @@
-/* $Id: dbtext.c,v 1.12 2002/10/19 09:59:35 tommy Exp $ */
-
-/*
- * Copyright (c) 2002 Tom Marshall <[email protected]>
- *
- * This program is free software. It may be distributed under the terms
- * in the file LICENSE, found in the top level of the distribution.
- *
- * dbtext.c: flatfile database handler
- */
-
-#include "config.h"
-#include "dbg.h"
-#include "str.h"
-#include "lex.h"
-#include "vec.h"
-
-#include "dbh.h"
-#include "dbtext.h"
-
-static void
-dbtext_table_setsize(dbttext_t * pthis, uint nsize)
-{
- uint nnewalloc;
- rec_t *pnewitems;
- uint n;
-
- if (nsize <= pthis->nalloc)
- return;
-
- nnewalloc = pthis->nalloc * 2;
- if (nnewalloc < nsize)
- nnewalloc = nsize;
- pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t)…
- if (pnewitems == NULL) {
- exit(2);
- }
- for (n = pthis->nitems; n < nsize; n++) {
- str_create(&pnewitems[n].w);
- pnewitems[n].n = 0;
- }
- pthis->pitems = pnewitems;
- pthis->nalloc = nnewalloc;
-}
-
-dbh_t *
-dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass)
-{
- dbhtext_t *pthis = NULL;
- uint dirlen;
- cpchar phome;
- struct stat st;
-
- if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
- perror("malloc()");
- goto bail;
- }
-
- pthis->close = dbtext_db_close;
- pthis->opentable = dbtext_db_opentable;
-
- if (dbname != NULL && *dbname != '\0') {
- dirlen = strlen(dbname);
- if ((pthis->dir = strdup(dbname)) == NULL) {
- perror("strdup()");
- goto bail;
- }
- if (dirlen && pthis->dir[dirlen - 1] == '/')
- pthis->dir[--dirlen] = '\0';
- } else {
- phome = getenv("HOME");
- if (phome == NULL || *phome == '\0') {
- phome = ".";
- }
- dirlen = strlen(phome) + 5 + 1;
- if ((pthis->dir = malloc(dirlen)) == NULL)
- goto bail;
-
- /* NOTE: no truncation possible */
- snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
- }
-
- /* make sure config directory exists */
- if (stat(pthis->dir, &st) != 0) {
- if (errno != ENOENT ||
- mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
- goto bail;
- } else {
- if (!S_ISDIR(st.st_mode))
- goto bail;
- }
-
- /* unveil(2), TODO: rework later */
- /* TODO: permission depending on mode */
- char listpath[PATH_MAX];
- snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "goodlist");
- if (unveil(listpath, "rw") == -1) {
- perror("unveil()");
- exit(2);
- }
- snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "spamlist");
- if (unveil(listpath, "rw") == -1) {
- perror("unveil()");
- exit(2);
- }
- if (unveil(NULL, NULL) == -1) {
- perror("unveil()");
- exit(2);
- }
-
- return (dbh_t *)pthis;
-
-bail:
- if (pthis) {
- if (pthis->dir)
- free(pthis->dir);
- free(pthis);
- }
-
- return NULL;
-}
-
-bool_t
-dbtext_db_close(dbhtext_t * pthis)
-{
- free(pthis->dir);
- pthis->dir = NULL;
- return true;
-}
-
-dbt_t *
-dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
-{
- dbttext_t *ptable = NULL;
-
-#ifndef NOLOCK
- struct flock lock;
-
-#endif /* ndef NOLOCK */
- char szpath[PATH_MAX];
- int flags, ret;
- struct stat st;
- char *pbegin;
- char *pend;
- rec_t r;
- uint pos;
-
- if (pthis->dir == NULL)
- goto bail;
-
- if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
- perror("malloc()");
- goto bail;
- }
- ptable->close = dbtext_table_close;
- ptable->mergeclose = dbtext_table_mergeclose;
- ptable->unmergeclose = dbtext_table_unmergeclose;
- ptable->getmsgcount = dbtext_table_getmsgcount;
- ptable->getcount = dbtext_table_getcount;
- ptable->fd = -1;
- ptable->pbuf = NULL;
- ptable->nmsgs = 0;
- ptable->nalloc = 0;
- ptable->nitems = 0;
- ptable->pitems = NULL;
-
- ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
- if (ret == -1 || (size_t)ret >= sizeof(szpath)) {
- fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, tabl…
- goto bail;
- }
-
- flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR);
- if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
- perror("open()");
- goto bail;
- }
-
-#ifndef NOLOCK
- memset(&lock, 0, sizeof(lock));
- lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
- lock.l_start = 0;
- lock.l_whence = SEEK_SET;
- lock.l_len = 0;
- fcntl(ptable->fd, F_SETLKW, &lock);
-#endif /* ndef NOLOCK */
-
- if (fstat(ptable->fd, &st) != 0) {
- perror("fstat()");
- goto bail_uc;
- }
- if (st.st_size == 0) {
- return (dbt_t *) ptable;
- }
- ptable->pbuf = (char *) malloc(st.st_size);
- if (ptable->pbuf == NULL) {
- perror("malloc()");
- goto bail_uc;
- }
- if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) {
- perror("read()");
- goto bail_fuc;
- }
- /* XXX: bogofilter compatibility */
- if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) {
- goto bail_fuc;
- }
- pbegin = ptable->pbuf;
- while (*pbegin != '\n')
- pbegin++;
- pbegin++;
-
- pos = 0;
- while (pbegin < ptable->pbuf + st.st_size) {
- pend = pbegin;
- r.w.p = pbegin;
- r.w.len = 0;
- r.n = 0;
-
- while (*pend != '\n') {
- if (pend >= ptable->pbuf + st.st_size) {
- goto bail_fuc;
- }
- *pend = tolower(*pend);
- if (*pend == ' ') {
- r.w.len = (pend - pbegin);
- r.n = strtol(pend + 1, NULL, 10);
- }
- pend++;
- }
- if (pend > pbegin && *pbegin != '#' && *pbegin != ';') {
- if (r.w.len == 0 || r.w.len > MAXWORDLEN) {
- fprintf(stderr, "dbh_loadfile: bad file format…
- goto bail_fuc;
- }
- dbtext_table_setsize(ptable, pos + 1);
- ptable->pitems[pos++] = r;
- ptable->nitems = pos;
- }
- pbegin = pend + 1;
- }
-
- if (rdonly) {
-#ifndef NOLOCK
- lock.l_type = F_UNLCK;
- fcntl(ptable->fd, F_SETLKW, &lock);
-#endif /* ndef NOLOCK */
- close(ptable->fd);
- ptable->fd = -1;
- }
- return (dbt_t *) ptable;
-
-bail_fuc:
- free(ptable->pbuf);
-
-bail_uc:
-#ifndef NOLOCK
- lock.l_type = F_UNLCK;
- fcntl(ptable->fd, F_SETLKW, &lock);
-#endif /* ndef NOLOCK */
-
- close(ptable->fd);
- ptable->fd = -1;
-
-bail:
- free(ptable);
- return NULL;
-}
-
-bool_t
-dbtext_table_close(dbttext_t * pthis)
-{
- struct flock lockall;
-
- free(pthis->pbuf);
- pthis->pbuf = NULL;
- free(pthis->pitems);
- pthis->pitems = NULL;
-
- if (pthis->fd != -1) {
-#ifndef NOLOCK
- memset(&lockall, 0, sizeof(lockall));
- lockall.l_type = F_UNLCK;
- lockall.l_start = 0;
- lockall.l_whence = SEEK_SET;
- lockall.l_len = 0;
- fcntl(pthis->fd, F_SETLKW, &lockall);
-#endif /* ndef NOLOCK */
- close(pthis->fd);
- pthis->fd = -1;
- }
- return true;
-}
-
-bool_t
-dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
-{
- /* note that we require both vectors to be sorted */
-
- uint pos;
- rec_t *prec;
- veciter_t msgiter;
- str_t *pmsgstr;
- uint count;
- char iobuf[IOBUFSIZE];
- char *p;
-
- if (pthis->fd == -1) {
- return false;
- }
- ftruncate(pthis->fd, 0);
- lseek(pthis->fd, 0, SEEK_SET);
-
- pthis->nmsgs++;
-
- p = iobuf;
- p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
-
- vec_first(pmsg, &msgiter);
- pmsgstr = veciter_get(&msgiter);
-
- pos = 0;
- while (pos < pthis->nitems || pmsgstr != NULL) {
- int cmp = 0;
-
- prec = &pthis->pitems[pos];
- if (pmsgstr != NULL && pos < pthis->nitems) {
- cmp = str_casecmp(&prec->w, pmsgstr);
- } else {
- /* we exhausted one list or the other (but not both) */
- cmp = (pos < pthis->nitems) ? -1 : 1;
- }
- if (cmp < 0) {
- /* write existing str */
- count = prec->n;
- strncpylwr(p, prec->w.p, prec->w.len);
- p += prec->w.len;
- *p++ = ' ';
- p += sprintf(p, "%u\n", count);
-
- pos++;
- } else if (cmp == 0) {
- /* same str, merge and write sum */
- count = db_getnewcount(&msgiter);
- count += prec->n;
- strncpylwr(p, prec->w.p, prec->w.len);
- p += prec->w.len;
- *p++ = ' ';
- p += sprintf(p, "%u\n", count);
-
- pos++;
- veciter_next(&msgiter);
- pmsgstr = veciter_get(&msgiter);
- } else { /* cmp > 0 */
- /* write new str */
- count = db_getnewcount(&msgiter);
- strncpylwr(p, pmsgstr->p, pmsgstr->len);
- p += pmsgstr->len;
- *p++ = ' ';
- p += sprintf(p, "%u\n", count);
-
- veciter_next(&msgiter);
- pmsgstr = veciter_get(&msgiter);
- }
-
- if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
- write(pthis->fd, iobuf, p - iobuf);
- p = iobuf;
- }
- }
- if (p != iobuf) {
- write(pthis->fd, iobuf, p - iobuf);
- }
- veciter_destroy(&msgiter);
- return dbtext_table_close(pthis);
-}
-
-bool_t
-dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
-{
- /* note that we require both vectors to be sorted */
-
- uint pos;
- rec_t *prec;
- veciter_t msgiter;
- str_t *pmsgstr;
- uint count;
- char iobuf[IOBUFSIZE];
- char *p;
-
- if (pthis->fd == -1) {
- return false;
- }
- ftruncate(pthis->fd, 0);
- lseek(pthis->fd, 0, SEEK_SET);
-
- pthis->nmsgs--;
-
- p = iobuf;
- p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
-
- vec_first(pmsg, &msgiter);
- pmsgstr = veciter_get(&msgiter);
-
- pos = 0;
- while (pos < pthis->nitems || pmsgstr != NULL) {
- int cmp = 0;
-
- prec = &pthis->pitems[pos];
- if (pmsgstr != NULL && pos < pthis->nitems) {
- cmp = str_casecmp(&prec->w, pmsgstr);
- } else {
- /* we exhausted one list or the other (but not both) */
- cmp = (pos < pthis->nitems) ? -1 : 1;
- }
- if (cmp < 0) {
- /* write existing str */
- count = prec->n;
- strncpylwr(p, prec->w.p, prec->w.len);
- p += prec->w.len;
- *p++ = ' ';
- p += sprintf(p, "%u\n", count);
-
- pos++;
- } else if (cmp == 0) {
- /* same str, merge and write difference */
- count = db_getnewcount(&msgiter);
- count = (prec->n > count) ? (prec->n - count) : 0;
- strncpylwr(p, prec->w.p, prec->w.len);
- p += prec->w.len;
- *p++ = ' ';
- p += sprintf(p, "%u\n", count);
-
- pos++;
- veciter_next(&msgiter);
- pmsgstr = veciter_get(&msgiter);
- } else { /* cmp > 0 */
- /* this should not happen, so write with count=0 */
- db_getnewcount(&msgiter);
- count = 0;
- strncpylwr(p, pmsgstr->p, pmsgstr->len);
- p += pmsgstr->len;
- *p++ = ' ';
- p += sprintf(p, "%u\n", count);
-
- veciter_next(&msgiter);
- pmsgstr = veciter_get(&msgiter);
- }
-
- if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
- write(pthis->fd, iobuf, p - iobuf);
- p = iobuf;
- }
- }
- if (p != iobuf) {
- write(pthis->fd, iobuf, p - iobuf);
- }
- veciter_destroy(&msgiter);
- return dbtext_table_close(pthis);
-}
-
-uint
-dbtext_table_getmsgcount(dbttext_t * pthis)
-{
- return pthis->nmsgs;
-}
-
-uint
-dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
-{
- int lo, hi, mid;
-
- if (pthis->nitems == 0) {
- return 0;
- }
- hi = pthis->nitems - 1;
- lo = -1;
- while (hi - lo > 1) {
- mid = (hi + lo) / 2;
- if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0)
- hi = mid;
- else
- lo = mid;
- }
-
- if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
- return 0;
- }
- return pthis->pitems[hi].n;
-}
diff --git a/dbtext.h b/dbtext.h
@@ -1,49 +0,0 @@
-/* $Id: dbtext.h,v 1.3 2002/10/02 04:45:40 tommy Exp $ */
-
-/*
- * Copyright (c) 2002 Tom Marshall <[email protected]>
- *
- * This program is free software. It may be distributed under the terms
- * in the file LICENSE, found in the top level of the distribution.
- */
-
-#ifndef _DBTEXT_H
-#define _DBTEXT_H
-
-typedef struct _dbttext dbttext_t;
-struct _dbttext
-{
- bool_t (*close)(dbttext_t*);
- bool_t (*mergeclose)(dbttext_t*,vec_t*);
- bool_t (*unmergeclose)(dbttext_t*,vec_t*);
- uint (*getmsgcount)(dbttext_t*);
- uint (*getcount)(dbttext_t*,str_t*);
-
- int fd; /* file descriptor, if currently open */
- char* pbuf; /* data buffer, if currently open */
- uint nmsgs; /* number of messages represented in list */
- uint nalloc; /* items alloced in pitems */
- uint nitems; /* items available */
- rec_t* pitems; /* growing vector of items */
-};
-
-typedef struct _dbhtext dbhtext_t;
-struct _dbhtext
-{
- bool_t (*close)(dbhtext_t*);
- dbt_t* (*opentable)(dbhtext_t*,cpchar,bool_t);
-
- char* dir;
-};
-
-dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db…
-bool_t dbtext_db_close( dbhtext_t* pthis );
-dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly );
-
-bool_t dbtext_table_close( dbttext_t* pthis );
-bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg );
-bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg );
-uint dbtext_table_getmsgcount( dbttext_t* pthis );
-uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword );
-
-#endif /* ndef _DBTEXT_H */
diff --git a/filt.h b/filt.h
@@ -10,17 +10,21 @@
#ifndef _FILT_H
#define _FILT_H
+#include "lex.h"
+#include "str.h"
+#include "vec.h"
+
typedef struct
{
- str_t key;
- double prob;
+ str_t key;
+ double prob;
} discrim_t;
typedef struct
{
- double spamicity;
- uint keepers;
- discrim_t* extrema;
+ double spamicity;
+ uint keepers;
+ discrim_t* extrema;
} stats_t;
void statdump( stats_t* pstat, int fd );
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.