improve code-style - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches | |
git clone git://git.codemadness.org/bmf | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 0f11e5e148314939e59850ef2aaa607f2b06bc90 | |
parent cd31f403d6c7b3acf4a41365c063c4cefef34e83 | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sun, 23 Sep 2018 14:36:58 +0200 | |
improve code-style | |
Diffstat: | |
M dbdb.c | 74 +++++++++++++++--------------… | |
M dbg.c | 37 +++++++++++++++++------------… | |
M dbtext.c | 982 +++++++++++++++--------------… | |
M filt.c | 250 +++++++++++++++--------------… | |
M lex.c | 1296 +++++++++++++++--------------… | |
M str.c | 86 ++++++++++++++++-------------… | |
M vec.c | 469 +++++++++++++++--------------… | |
7 files changed, 1531 insertions(+), 1663 deletions(-) | |
--- | |
diff --git a/dbdb.c b/dbdb.c | |
@@ -18,53 +18,51 @@ | |
#include "dbh.h" | |
#include "dbdb.h" | |
-dbh_t* dbdb_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpas… | |
+dbh_t * | |
+dbdb_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass) | |
{ | |
- return NULL; | |
+ return NULL; | |
} | |
#ifdef UNIT_TEST | |
-int main( int argc, char** argv ) | |
+int | |
+main(int argc, char **argv) | |
{ | |
- dbh_t* pdb; | |
- veciter_t iter; | |
- str_t* pstr; | |
- uint n; | |
+ dbh_t *pdb; | |
+ veciter_t iter; | |
+ str_t *pstr; | |
+ uint n; | |
- if( argc != 2 ) | |
- { | |
- fprintf( stderr, "usage: %s <file>\n", argv[0] ); | |
- return 1; | |
- } | |
+ if (argc != 2) { | |
+ fprintf(stderr, "usage: %s <file>\n", argv[0]); | |
+ return 1; | |
+ } | |
+ for (n = 0; n < 100; n++) { | |
+ pdb = dbh_open("testlist", true); | |
- for( n = 0; n < 100; n++ ) | |
- { | |
- pdb = dbh_open( "testlist", true ); | |
+ vec_first(&db, &iter); | |
+ while ((pstr = veciter_get(&iter)) != NULL) { | |
+ char buf[MAXWORDLEN + 32]; | |
+ char *p; | |
- vec_first( &db, &iter ); | |
- while( (pstr = veciter_get( &iter )) != NULL ) | |
- { | |
- char buf[MAXWORDLEN+32]; | |
- char* p; | |
- if( pstr->len > 200 ) | |
- { | |
- fprintf( stderr, "str too long: %u chars\n", pstr->len ); | |
- break; | |
- } | |
- p = buf; | |
- strcpy( buf, "str: " ); | |
- p += 6; | |
- memcpy( p, pstr->p, pstr->len ); | |
- p += pstr->len; | |
- sprintf( p, " %u", pstr->count ); | |
- puts( buf ); | |
+ if (pstr->len > 200) { | |
+ fprintf(stderr, "str too long: %u chars\n", ps… | |
+ break; | |
+ } | |
+ p = buf; | |
+ strcpy(buf, "str: "); | |
+ p += 6; | |
+ memcpy(p, pstr->p, pstr->len); | |
+ p += pstr->len; | |
+ sprintf(p, " %u", pstr->count); | |
+ puts(buf); | |
- veciter_next( &iter ); | |
- } | |
+ veciter_next(&iter); | |
+ } | |
- dbh_close( &db ); | |
- } | |
+ dbh_close(&db); | |
+ } | |
- return 0; | |
+ return 0; | |
} | |
-#endif /* def UNIT_TEST */ | |
+#endif /* def UNIT_TEST */ | |
diff --git a/dbg.c b/dbg.c | |
@@ -15,28 +15,31 @@ | |
uint g_verbose = 0; | |
-void verbose( int level, const char* fmt, ... ) | |
+void | |
+verbose(int level, const char *fmt,...) | |
{ | |
- if( g_verbose >= level ) | |
- { | |
- char str[4096]; | |
- va_list v; | |
- va_start( v, fmt ); | |
- vsnprintf( str, sizeof(str)-1, fmt, v ); | |
- str[sizeof(str)-1] = '\0'; | |
- | |
- fputs( str, stderr ); | |
- | |
- va_end( v ); | |
- } | |
+ if (g_verbose >= level) { | |
+ char str[4096]; | |
+ va_list v; | |
+ | |
+ va_start(v, fmt); | |
+ vsnprintf(str, sizeof(str) - 1, fmt, v); | |
+ str[sizeof(str) - 1] = '\0'; | |
+ | |
+ fputs(str, stderr); | |
+ | |
+ va_end(v); | |
+ } | |
} | |
-void dbgout( const char* fmt, ... ) | |
+void | |
+dbgout(const char *fmt,...) | |
{ | |
- /* empty */ | |
+ /* empty */ | |
} | |
-void dump_alloc_heap( void ) | |
+void | |
+dump_alloc_heap(void) | |
{ | |
- /* empty */ | |
+ /* empty */ | |
} | |
diff --git a/dbtext.c b/dbtext.c | |
@@ -18,574 +18,520 @@ | |
#include "dbh.h" | |
#include "dbtext.h" | |
-static void dbtext_table_setsize( dbttext_t* pthis, uint nsize ) | |
+static void | |
+dbtext_table_setsize(dbttext_t * pthis, uint nsize) | |
{ | |
- if( nsize > pthis->nalloc ) | |
- { | |
- uint nnewalloc; | |
- rec_t* pnewitems; | |
- uint n; | |
- | |
- nnewalloc = pthis->nalloc * 2; | |
- if( nnewalloc < nsize ) nnewalloc = nsize; | |
- pnewitems = (rec_t*)realloc( pthis->pitems, nnewalloc*sizeof(rec_t) ); | |
- if( pnewitems == NULL ) | |
- { | |
- exit( 2 ); | |
- } | |
- for( n = pthis->nitems; n < nsize; n++ ) | |
- { | |
- str_create( &pnewitems[n].w ); | |
- pnewitems[n].n = 0; | |
- } | |
- pthis->pitems = pnewitems; | |
- pthis->nalloc = nnewalloc; | |
- } | |
+ if (nsize > pthis->nalloc) { | |
+ uint nnewalloc; | |
+ rec_t *pnewitems; | |
+ uint n; | |
+ | |
+ nnewalloc = pthis->nalloc * 2; | |
+ if (nnewalloc < nsize) | |
+ nnewalloc = nsize; | |
+ pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeo… | |
+ if (pnewitems == NULL) { | |
+ exit(2); | |
+ } | |
+ for (n = pthis->nitems; n < nsize; n++) { | |
+ str_create(&pnewitems[n].w); | |
+ pnewitems[n].n = 0; | |
+ } | |
+ pthis->pitems = pnewitems; | |
+ pthis->nalloc = nnewalloc; | |
+ } | |
} | |
-dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbp… | |
+dbh_t * | |
+dbtext_db_open(cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass) | |
{ | |
- dbhtext_t* pthis; | |
- | |
- uint dirlen; | |
- cpchar phome; | |
- struct stat st; | |
- | |
- pthis = (dbhtext_t*)malloc( sizeof(dbhtext_t) ); | |
- if( pthis == NULL ) | |
- { | |
- goto bail; | |
- } | |
- pthis->close = dbtext_db_close; | |
- pthis->opentable = dbtext_db_opentable; | |
- if( dbname != NULL && *dbname != '\0' ) | |
- { | |
- dirlen = strlen( dbname ); | |
- pthis->dir = strdup( dbname ); | |
- if( pthis->dir[dirlen-1] == '/' ) | |
- { | |
- pthis->dir[dirlen-1] = '\0'; | |
- } | |
- } | |
- else | |
- { | |
- phome = getenv( "HOME" ); | |
- if( phome == NULL || *phome == '\0' ) | |
- { | |
- phome = "."; | |
- } | |
- pthis->dir = (char*)malloc( strlen(phome)+5+1 ); | |
- if( pthis->dir == NULL ) | |
- { | |
- goto bail; | |
- } | |
- sprintf( pthis->dir, "%s/.bmf", phome ); | |
- } | |
- | |
- /* ensure config directory exists */ | |
- if( stat( pthis->dir, &st ) != 0 ) | |
- { | |
- if( errno == ENOENT ) | |
- { | |
- if( mkdir( pthis->dir, S_IRUSR|S_IWUSR|S_IXUSR ) != 0 ) | |
- { | |
- goto bail; | |
- } | |
- } | |
- else | |
- { | |
- goto bail; | |
- } | |
- } | |
- else | |
- { | |
- if( !S_ISDIR( st.st_mode ) ) | |
- { | |
- goto bail; | |
- } | |
- } | |
- | |
- return (dbh_t*)pthis; | |
+ dbhtext_t *pthis; | |
+ | |
+ uint dirlen; | |
+ cpchar phome; | |
+ struct stat st; | |
+ | |
+ pthis = (dbhtext_t *) malloc(sizeof(dbhtext_t)); | |
+ if (pthis == NULL) { | |
+ goto bail; | |
+ } | |
+ pthis->close = dbtext_db_close; | |
+ pthis->opentable = dbtext_db_opentable; | |
+ if (dbname != NULL && *dbname != '\0') { | |
+ dirlen = strlen(dbname); | |
+ pthis->dir = strdup(dbname); | |
+ if (pthis->dir[dirlen - 1] == '/') { | |
+ pthis->dir[dirlen - 1] = '\0'; | |
+ } | |
+ } else { | |
+ phome = getenv("HOME"); | |
+ if (phome == NULL || *phome == '\0') { | |
+ phome = "."; | |
+ } | |
+ pthis->dir = (char *) malloc(strlen(phome) + 5 + 1); | |
+ if (pthis->dir == NULL) { | |
+ goto bail; | |
+ } | |
+ sprintf(pthis->dir, "%s/.bmf", phome); | |
+ } | |
+ | |
+ /* ensure config directory exists */ | |
+ if (stat(pthis->dir, &st) != 0) { | |
+ if (errno == ENOENT) { | |
+ if (mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != … | |
+ goto bail; | |
+ } | |
+ } else { | |
+ goto bail; | |
+ } | |
+ } else { | |
+ if (!S_ISDIR(st.st_mode)) { | |
+ goto bail; | |
+ } | |
+ } | |
+ | |
+ return (dbh_t *) pthis; | |
bail: | |
- return NULL; | |
+ return NULL; | |
} | |
-bool_t dbtext_db_close( dbhtext_t* pthis ) | |
+bool_t | |
+dbtext_db_close(dbhtext_t * pthis) | |
{ | |
- free( pthis->dir ); | |
- pthis->dir = NULL; | |
- return true; | |
+ free(pthis->dir); | |
+ pthis->dir = NULL; | |
+ return true; | |
} | |
-dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly ) | |
+dbt_t * | |
+dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly) | |
{ | |
- dbttext_t* ptable = NULL; | |
+ dbttext_t *ptable = NULL; | |
#ifndef NOLOCK | |
- struct flock lock; | |
-#endif /* ndef NOLOCK */ | |
- char szpath[PATH_MAX]; | |
- int flags; | |
- struct stat st; | |
- | |
- char* pbegin; | |
- char* pend; | |
- rec_t r; | |
- uint pos; | |
- | |
- if( pthis->dir == NULL ) | |
- { | |
- goto bail; | |
- } | |
- | |
- ptable = (dbttext_t*)malloc( sizeof(dbttext_t) ); | |
- if( ptable == NULL ) | |
- { | |
- perror( "malloc()" ); | |
- goto bail; | |
- } | |
- ptable->close = dbtext_table_close; | |
- ptable->mergeclose = dbtext_table_mergeclose; | |
- ptable->unmergeclose = dbtext_table_unmergeclose; | |
- ptable->import = dbtext_table_import; | |
- ptable->export = dbtext_table_export; | |
- ptable->getmsgcount = dbtext_table_getmsgcount; | |
- ptable->getcount = dbtext_table_getcount; | |
- ptable->fd = -1; | |
- ptable->pbuf = NULL; | |
- ptable->nmsgs = 0; | |
- ptable->nalloc = 0; | |
- ptable->nitems = 0; | |
- ptable->pitems = NULL; | |
- | |
- sprintf( szpath, "%s/%s.txt", pthis->dir, table ); | |
- flags = (rdonly ? O_RDONLY|O_CREAT : O_RDWR|O_CREAT); | |
- ptable->fd = open( szpath, flags, 0644 ); | |
- if( ptable->fd == -1 ) | |
- { | |
- perror( "open()" ); | |
- goto bail; | |
- } | |
- | |
+ struct flock lock; | |
+ | |
+#endif /* ndef NOLOCK */ | |
+ char szpath[PATH_MAX]; | |
+ int flags; | |
+ struct stat st; | |
+ | |
+ char *pbegin; | |
+ char *pend; | |
+ rec_t r; | |
+ uint pos; | |
+ | |
+ if (pthis->dir == NULL) { | |
+ goto bail; | |
+ } | |
+ ptable = (dbttext_t *) malloc(sizeof(dbttext_t)); | |
+ if (ptable == NULL) { | |
+ perror("malloc()"); | |
+ goto bail; | |
+ } | |
+ ptable->close = dbtext_table_close; | |
+ ptable->mergeclose = dbtext_table_mergeclose; | |
+ ptable->unmergeclose = dbtext_table_unmergeclose; | |
+ ptable->import = dbtext_table_import; | |
+ ptable->export = dbtext_table_export; | |
+ ptable->getmsgcount = dbtext_table_getmsgcount; | |
+ ptable->getcount = dbtext_table_getcount; | |
+ ptable->fd = -1; | |
+ ptable->pbuf = NULL; | |
+ ptable->nmsgs = 0; | |
+ ptable->nalloc = 0; | |
+ ptable->nitems = 0; | |
+ ptable->pitems = NULL; | |
+ | |
+ sprintf(szpath, "%s/%s.txt", pthis->dir, table); | |
+ flags = (rdonly ? O_RDONLY | O_CREAT : O_RDWR | O_CREAT); | |
+ ptable->fd = open(szpath, flags, 0644); | |
+ if (ptable->fd == -1) { | |
+ perror("open()"); | |
+ goto bail; | |
+ } | |
#ifndef NOLOCK | |
- memset( &lock, 0, sizeof(lock) ); | |
- lock.l_type = rdonly ? F_RDLCK : F_WRLCK; | |
- lock.l_start = 0; | |
- lock.l_whence = SEEK_SET; | |
- lock.l_len = 0; | |
- fcntl( ptable->fd, F_SETLKW, &lock ); | |
-#endif /* ndef NOLOCK */ | |
- | |
- if( fstat( ptable->fd, &st ) != 0 ) | |
- { | |
- perror( "fstat()" ); | |
- goto bail_uc; | |
- } | |
- | |
- if( st.st_size == 0 ) | |
- { | |
- return (dbt_t*)ptable; | |
- } | |
- | |
- ptable->pbuf = (char*)malloc( st.st_size ); | |
- if( ptable->pbuf == NULL ) | |
- { | |
- perror( "malloc()" ); | |
- goto bail_uc; | |
- } | |
- | |
- if( read( ptable->fd, ptable->pbuf, st.st_size ) != st.st_size ) | |
- { | |
- perror( "read()" ); | |
- goto bail_fuc; | |
- } | |
- | |
- /* XXX: bogofilter compatibility */ | |
- if( sscanf( ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs ) != 1 ) | |
- { | |
- goto bail_fuc; | |
- } | |
- pbegin = ptable->pbuf; | |
- while( *pbegin != '\n' ) pbegin++; | |
- pbegin++; | |
- | |
- pos = 0; | |
- while( pbegin < ptable->pbuf + st.st_size ) | |
- { | |
- pend = pbegin; | |
- r.w.p = pbegin; | |
- r.w.len = 0; | |
- r.n = 0; | |
- | |
- while( *pend != '\n' ) | |
- { | |
- if( pend >= ptable->pbuf + st.st_size ) | |
- { | |
- goto bail_fuc; | |
- } | |
- *pend = tolower(*pend); | |
- if( *pend == ' ' ) | |
- { | |
- r.w.len = (pend-pbegin); | |
- r.n = strtol( pend+1, NULL, 10 ); | |
- } | |
- pend++; | |
- } | |
- if( pend > pbegin && *pbegin != '#' && *pbegin != ';' ) | |
- { | |
- if( r.w.len == 0 || r.w.len > MAXWORDLEN ) | |
- { | |
- fprintf( stderr, "dbh_loadfile: bad file format\n" ); | |
- goto bail_fuc; | |
- } | |
- dbtext_table_setsize( ptable, pos+1 ); | |
- ptable->pitems[pos++] = r; | |
- ptable->nitems = pos; | |
- } | |
- pbegin = pend+1; | |
- } | |
- | |
- if( rdonly ) | |
- { | |
+ memset(&lock, 0, sizeof(lock)); | |
+ lock.l_type = rdonly ? F_RDLCK : F_WRLCK; | |
+ lock.l_start = 0; | |
+ lock.l_whence = SEEK_SET; | |
+ lock.l_len = 0; | |
+ fcntl(ptable->fd, F_SETLKW, &lock); | |
+#endif /* ndef NOLOCK */ | |
+ | |
+ if (fstat(ptable->fd, &st) != 0) { | |
+ perror("fstat()"); | |
+ goto bail_uc; | |
+ } | |
+ if (st.st_size == 0) { | |
+ return (dbt_t *) ptable; | |
+ } | |
+ ptable->pbuf = (char *) malloc(st.st_size); | |
+ if (ptable->pbuf == NULL) { | |
+ perror("malloc()"); | |
+ goto bail_uc; | |
+ } | |
+ if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) { | |
+ perror("read()"); | |
+ goto bail_fuc; | |
+ } | |
+ /* XXX: bogofilter compatibility */ | |
+ if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) { | |
+ goto bail_fuc; | |
+ } | |
+ pbegin = ptable->pbuf; | |
+ while (*pbegin != '\n') | |
+ pbegin++; | |
+ pbegin++; | |
+ | |
+ pos = 0; | |
+ while (pbegin < ptable->pbuf + st.st_size) { | |
+ pend = pbegin; | |
+ r.w.p = pbegin; | |
+ r.w.len = 0; | |
+ r.n = 0; | |
+ | |
+ while (*pend != '\n') { | |
+ if (pend >= ptable->pbuf + st.st_size) { | |
+ goto bail_fuc; | |
+ } | |
+ *pend = tolower(*pend); | |
+ if (*pend == ' ') { | |
+ r.w.len = (pend - pbegin); | |
+ r.n = strtol(pend + 1, NULL, 10); | |
+ } | |
+ pend++; | |
+ } | |
+ if (pend > pbegin && *pbegin != '#' && *pbegin != ';') { | |
+ if (r.w.len == 0 || r.w.len > MAXWORDLEN) { | |
+ fprintf(stderr, "dbh_loadfile: bad file format… | |
+ goto bail_fuc; | |
+ } | |
+ dbtext_table_setsize(ptable, pos + 1); | |
+ ptable->pitems[pos++] = r; | |
+ ptable->nitems = pos; | |
+ } | |
+ pbegin = pend + 1; | |
+ } | |
+ | |
+ if (rdonly) { | |
#ifndef NOLOCK | |
- lock.l_type = F_UNLCK; | |
- fcntl( ptable->fd, F_SETLKW, &lock ); | |
-#endif /* ndef NOLOCK */ | |
- close( ptable->fd ); | |
- ptable->fd = -1; | |
- } | |
- | |
- return (dbt_t*)ptable; | |
+ lock.l_type = F_UNLCK; | |
+ fcntl(ptable->fd, F_SETLKW, &lock); | |
+#endif /* ndef NOLOCK */ | |
+ close(ptable->fd); | |
+ ptable->fd = -1; | |
+ } | |
+ return (dbt_t *) ptable; | |
bail_fuc: | |
- free( ptable->pbuf ); | |
+ free(ptable->pbuf); | |
bail_uc: | |
#ifndef NOLOCK | |
- lock.l_type = F_UNLCK; | |
- fcntl( ptable->fd, F_SETLKW, &lock ); | |
-#endif /* ndef NOLOCK */ | |
+ lock.l_type = F_UNLCK; | |
+ fcntl(ptable->fd, F_SETLKW, &lock); | |
+#endif /* ndef NOLOCK */ | |
- close( ptable->fd ); | |
- ptable->fd = -1; | |
+ close(ptable->fd); | |
+ ptable->fd = -1; | |
bail: | |
- free( ptable ); | |
- return NULL; | |
+ free(ptable); | |
+ return NULL; | |
} | |
-bool_t dbtext_table_close( dbttext_t* pthis ) | |
+bool_t | |
+dbtext_table_close(dbttext_t * pthis) | |
{ | |
- struct flock lockall; | |
+ struct flock lockall; | |
- free( pthis->pbuf ); | |
- pthis->pbuf = NULL; | |
- free( pthis->pitems ); | |
- pthis->pitems = NULL; | |
+ free(pthis->pbuf); | |
+ pthis->pbuf = NULL; | |
+ free(pthis->pitems); | |
+ pthis->pitems = NULL; | |
- if( pthis->fd != -1 ) | |
- { | |
+ if (pthis->fd != -1) { | |
#ifndef NOLOCK | |
- memset( &lockall, 0, sizeof(lockall) ); | |
- lockall.l_type = F_UNLCK; | |
- lockall.l_start = 0; | |
- lockall.l_whence = SEEK_SET; | |
- lockall.l_len = 0; | |
- fcntl( pthis->fd, F_SETLKW, &lockall ); | |
-#endif /* ndef NOLOCK */ | |
- close( pthis->fd ); | |
- pthis->fd = -1; | |
- } | |
- | |
- return true; | |
+ memset(&lockall, 0, sizeof(lockall)); | |
+ lockall.l_type = F_UNLCK; | |
+ lockall.l_start = 0; | |
+ lockall.l_whence = SEEK_SET; | |
+ lockall.l_len = 0; | |
+ fcntl(pthis->fd, F_SETLKW, &lockall); | |
+#endif /* ndef NOLOCK */ | |
+ close(pthis->fd); | |
+ pthis->fd = -1; | |
+ } | |
+ return true; | |
} | |
-bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg ) | |
+bool_t | |
+dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg) | |
{ | |
- /* note that we require both vectors to be sorted */ | |
- | |
- uint pos; | |
- rec_t* prec; | |
- veciter_t msgiter; | |
- str_t* pmsgstr; | |
- uint count; | |
- char iobuf[IOBUFSIZE]; | |
- char* p; | |
- | |
- if( pthis->fd == -1 ) | |
- { | |
- return false; | |
- } | |
- ftruncate( pthis->fd, 0 ); | |
- lseek( pthis->fd, 0, SEEK_SET ); | |
- | |
- pthis->nmsgs++; | |
- | |
- p = iobuf; | |
- p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs ); | |
- | |
- vec_first( pmsg, &msgiter ); | |
- pmsgstr = veciter_get( &msgiter ); | |
- | |
- pos = 0; | |
- while( pos < pthis->nitems || pmsgstr != NULL ) | |
- { | |
- int cmp = 0; | |
- prec = &pthis->pitems[pos]; | |
- if( pmsgstr != NULL && pos < pthis->nitems ) | |
- { | |
- cmp = str_casecmp( &prec->w, pmsgstr ); | |
- } | |
- else | |
- { | |
- /* we exhausted one list or the other (but not both) */ | |
- cmp = (pos < pthis->nitems) ? -1 : 1; | |
- } | |
- if( cmp < 0 ) | |
- { | |
- /* write existing str */ | |
- assert( prec->w.p != NULL && prec->w.len > 0 ); | |
- assert( prec->w.len <= MAXWORDLEN ); | |
- count = prec->n; | |
- strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; | |
- *p++ = ' '; | |
- p += sprintf( p, "%u\n", count ); | |
- | |
- pos++; | |
- } | |
- else if( cmp == 0 ) | |
- { | |
- /* same str, merge and write sum */ | |
- assert( prec->w.p != NULL && prec->w.len > 0 ); | |
- assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); | |
- assert( prec->w.len <= MAXWORDLEN ); | |
- assert( pmsgstr->len <= MAXWORDLEN ); | |
- count = db_getnewcount( &msgiter ); | |
- count += prec->n; | |
- strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; | |
- *p++ = ' '; | |
- p += sprintf( p, "%u\n", count ); | |
- | |
- pos++; | |
- veciter_next( &msgiter ); | |
- pmsgstr = veciter_get( &msgiter ); | |
- } | |
- else /* cmp > 0 */ | |
- { | |
- /* write new str */ | |
- assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); | |
- assert( pmsgstr->len <= MAXWORDLEN ); | |
- count = db_getnewcount( &msgiter ); | |
- strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len; | |
- *p++ = ' '; | |
- p += sprintf( p, "%u\n", count ); | |
- | |
- veciter_next( &msgiter ); | |
- pmsgstr = veciter_get( &msgiter ); | |
- } | |
- | |
- if( p+TEXTDB_MAXLINELEN > (iobuf+1) ) | |
- { | |
- write( pthis->fd, iobuf, p-iobuf ); | |
- p = iobuf; | |
- } | |
- } | |
- if( p != iobuf ) | |
- { | |
- write( pthis->fd, iobuf, p-iobuf ); | |
- } | |
- | |
- veciter_destroy( &msgiter ); | |
- return dbtext_table_close( pthis ); | |
+ /* note that we require both vectors to be sorted */ | |
+ | |
+ uint pos; | |
+ rec_t *prec; | |
+ veciter_t msgiter; | |
+ str_t *pmsgstr; | |
+ uint count; | |
+ char iobuf[IOBUFSIZE]; | |
+ char *p; | |
+ | |
+ if (pthis->fd == -1) { | |
+ return false; | |
+ } | |
+ ftruncate(pthis->fd, 0); | |
+ lseek(pthis->fd, 0, SEEK_SET); | |
+ | |
+ pthis->nmsgs++; | |
+ | |
+ p = iobuf; | |
+ p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs); | |
+ | |
+ vec_first(pmsg, &msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ | |
+ pos = 0; | |
+ while (pos < pthis->nitems || pmsgstr != NULL) { | |
+ int cmp = 0; | |
+ | |
+ prec = &pthis->pitems[pos]; | |
+ if (pmsgstr != NULL && pos < pthis->nitems) { | |
+ cmp = str_casecmp(&prec->w, pmsgstr); | |
+ } else { | |
+ /* we exhausted one list or the other (but not both) */ | |
+ cmp = (pos < pthis->nitems) ? -1 : 1; | |
+ } | |
+ if (cmp < 0) { | |
+ /* write existing str */ | |
+ assert(prec->w.p != NULL && prec->w.len > 0); | |
+ assert(prec->w.len <= MAXWORDLEN); | |
+ count = prec->n; | |
+ strncpylwr(p, prec->w.p, prec->w.len); | |
+ p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ pos++; | |
+ } else if (cmp == 0) { | |
+ /* same str, merge and write sum */ | |
+ assert(prec->w.p != NULL && prec->w.len > 0); | |
+ assert(pmsgstr->p != NULL && pmsgstr->len > 0); | |
+ assert(prec->w.len <= MAXWORDLEN); | |
+ assert(pmsgstr->len <= MAXWORDLEN); | |
+ count = db_getnewcount(&msgiter); | |
+ count += prec->n; | |
+ strncpylwr(p, prec->w.p, prec->w.len); | |
+ p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ pos++; | |
+ veciter_next(&msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ } else { /* cmp > 0 */ | |
+ /* write new str */ | |
+ assert(pmsgstr->p != NULL && pmsgstr->len > 0); | |
+ assert(pmsgstr->len <= MAXWORDLEN); | |
+ count = db_getnewcount(&msgiter); | |
+ strncpylwr(p, pmsgstr->p, pmsgstr->len); | |
+ p += pmsgstr->len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ veciter_next(&msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ } | |
+ | |
+ if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) { | |
+ write(pthis->fd, iobuf, p - iobuf); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ if (p != iobuf) { | |
+ write(pthis->fd, iobuf, p - iobuf); | |
+ } | |
+ veciter_destroy(&msgiter); | |
+ return dbtext_table_close(pthis); | |
} | |
-bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg ) | |
+bool_t | |
+dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg) | |
{ | |
- /* note that we require both vectors to be sorted */ | |
- | |
- uint pos; | |
- rec_t* prec; | |
- veciter_t msgiter; | |
- str_t* pmsgstr; | |
- uint count; | |
- char iobuf[IOBUFSIZE]; | |
- char* p; | |
- | |
- if( pthis->fd == -1 ) | |
- { | |
- return false; | |
- } | |
- ftruncate( pthis->fd, 0 ); | |
- lseek( pthis->fd, 0, SEEK_SET ); | |
- | |
- pthis->nmsgs--; | |
- | |
- p = iobuf; | |
- p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs ); | |
- | |
- vec_first( pmsg, &msgiter ); | |
- pmsgstr = veciter_get( &msgiter ); | |
- | |
- pos = 0; | |
- while( pos < pthis->nitems || pmsgstr != NULL ) | |
- { | |
- int cmp = 0; | |
- prec = &pthis->pitems[pos]; | |
- if( pmsgstr != NULL && pos < pthis->nitems ) | |
- { | |
- cmp = str_casecmp( &prec->w, pmsgstr ); | |
- } | |
- else | |
- { | |
- /* we exhausted one list or the other (but not both) */ | |
- cmp = (pos < pthis->nitems) ? -1 : 1; | |
- } | |
- if( cmp < 0 ) | |
- { | |
- /* write existing str */ | |
- assert( prec->w.p != NULL && prec->w.len > 0 ); | |
- assert( prec->w.len <= MAXWORDLEN ); | |
- count = prec->n; | |
- strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; | |
- *p++ = ' '; | |
- p += sprintf( p, "%u\n", count ); | |
- | |
- pos++; | |
- } | |
- else if( cmp == 0 ) | |
- { | |
- /* same str, merge and write difference */ | |
- assert( prec->w.p != NULL && prec->w.len > 0 ); | |
- assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); | |
- assert( prec->w.len <= MAXWORDLEN ); | |
- assert( pmsgstr->len <= MAXWORDLEN ); | |
- count = db_getnewcount( &msgiter ); | |
- count = (prec->n > count) ? (prec->n - count) : 0; | |
- strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; | |
- *p++ = ' '; | |
- p += sprintf( p, "%u\n", count ); | |
- | |
- pos++; | |
- veciter_next( &msgiter ); | |
- pmsgstr = veciter_get( &msgiter ); | |
- } | |
- else /* cmp > 0 */ | |
- { | |
- /* this should not happen, so write with count=0 */ | |
- assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); | |
- assert( pmsgstr->len <= MAXWORDLEN ); | |
- db_getnewcount( &msgiter ); | |
- count = 0; | |
- strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len; | |
- *p++ = ' '; | |
- p += sprintf( p, "%u\n", count ); | |
- | |
- veciter_next( &msgiter ); | |
- pmsgstr = veciter_get( &msgiter ); | |
- } | |
- | |
- if( p+TEXTDB_MAXLINELEN > (iobuf+1) ) | |
- { | |
- write( pthis->fd, iobuf, p-iobuf ); | |
- p = iobuf; | |
- } | |
- } | |
- if( p != iobuf ) | |
- { | |
- write( pthis->fd, iobuf, p-iobuf ); | |
- } | |
- | |
- veciter_destroy( &msgiter ); | |
- return dbtext_table_close( pthis ); | |
+ /* note that we require both vectors to be sorted */ | |
+ | |
+ uint pos; | |
+ rec_t *prec; | |
+ veciter_t msgiter; | |
+ str_t *pmsgstr; | |
+ uint count; | |
+ char iobuf[IOBUFSIZE]; | |
+ char *p; | |
+ | |
+ if (pthis->fd == -1) { | |
+ return false; | |
+ } | |
+ ftruncate(pthis->fd, 0); | |
+ lseek(pthis->fd, 0, SEEK_SET); | |
+ | |
+ pthis->nmsgs--; | |
+ | |
+ p = iobuf; | |
+ p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs); | |
+ | |
+ vec_first(pmsg, &msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ | |
+ pos = 0; | |
+ while (pos < pthis->nitems || pmsgstr != NULL) { | |
+ int cmp = 0; | |
+ | |
+ prec = &pthis->pitems[pos]; | |
+ if (pmsgstr != NULL && pos < pthis->nitems) { | |
+ cmp = str_casecmp(&prec->w, pmsgstr); | |
+ } else { | |
+ /* we exhausted one list or the other (but not both) */ | |
+ cmp = (pos < pthis->nitems) ? -1 : 1; | |
+ } | |
+ if (cmp < 0) { | |
+ /* write existing str */ | |
+ assert(prec->w.p != NULL && prec->w.len > 0); | |
+ assert(prec->w.len <= MAXWORDLEN); | |
+ count = prec->n; | |
+ strncpylwr(p, prec->w.p, prec->w.len); | |
+ p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ pos++; | |
+ } else if (cmp == 0) { | |
+ /* same str, merge and write difference */ | |
+ assert(prec->w.p != NULL && prec->w.len > 0); | |
+ assert(pmsgstr->p != NULL && pmsgstr->len > 0); | |
+ assert(prec->w.len <= MAXWORDLEN); | |
+ assert(pmsgstr->len <= MAXWORDLEN); | |
+ count = db_getnewcount(&msgiter); | |
+ count = (prec->n > count) ? (prec->n - count) : 0; | |
+ strncpylwr(p, prec->w.p, prec->w.len); | |
+ p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ pos++; | |
+ veciter_next(&msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ } else { /* cmp > 0 */ | |
+ /* this should not happen, so write with count=0 */ | |
+ assert(pmsgstr->p != NULL && pmsgstr->len > 0); | |
+ assert(pmsgstr->len <= MAXWORDLEN); | |
+ db_getnewcount(&msgiter); | |
+ count = 0; | |
+ strncpylwr(p, pmsgstr->p, pmsgstr->len); | |
+ p += pmsgstr->len; | |
+ *p++ = ' '; | |
+ p += sprintf(p, "%u\n", count); | |
+ | |
+ veciter_next(&msgiter); | |
+ pmsgstr = veciter_get(&msgiter); | |
+ } | |
+ | |
+ if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) { | |
+ write(pthis->fd, iobuf, p - iobuf); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ if (p != iobuf) { | |
+ write(pthis->fd, iobuf, p - iobuf); | |
+ } | |
+ veciter_destroy(&msgiter); | |
+ return dbtext_table_close(pthis); | |
} | |
-bool_t dbtext_table_import( dbttext_t* pthis, cpchar filename ) | |
+bool_t | |
+dbtext_table_import(dbttext_t * pthis, cpchar filename) | |
{ | |
- return false; | |
+ return false; | |
} | |
-bool_t dbtext_table_export( dbttext_t* pthis, cpchar filename ) | |
+bool_t | |
+dbtext_table_export(dbttext_t * pthis, cpchar filename) | |
{ | |
- return false; | |
+ return false; | |
} | |
-uint dbtext_table_getmsgcount( dbttext_t* pthis ) | |
+uint | |
+dbtext_table_getmsgcount(dbttext_t * pthis) | |
{ | |
- return pthis->nmsgs; | |
+ return pthis->nmsgs; | |
} | |
-uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword ) | |
+uint | |
+dbtext_table_getcount(dbttext_t * pthis, str_t * pword) | |
{ | |
- int lo, hi, mid; | |
- | |
- if( pthis->nitems == 0 ) | |
- { | |
- return 0; | |
- } | |
- | |
- hi = pthis->nitems - 1; | |
- lo = -1; | |
- while( hi-lo > 1 ) | |
- { | |
- mid = (hi+lo)/2; | |
- if( str_casecmp( pword, &pthis->pitems[mid].w ) <= 0 ) | |
- hi = mid; | |
- else | |
- lo = mid; | |
- } | |
- assert( hi >= 0 && hi < pthis->nitems ); | |
- | |
- if( str_casecmp( pword, &pthis->pitems[hi].w ) != 0 ) | |
- { | |
- return 0; | |
- } | |
- | |
- return pthis->pitems[hi].n; | |
+ int lo, hi, mid; | |
+ | |
+ if (pthis->nitems == 0) { | |
+ return 0; | |
+ } | |
+ hi = pthis->nitems - 1; | |
+ lo = -1; | |
+ while (hi - lo > 1) { | |
+ mid = (hi + lo) / 2; | |
+ if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ assert(hi >= 0 && hi < pthis->nitems); | |
+ | |
+ if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) { | |
+ return 0; | |
+ } | |
+ return pthis->pitems[hi].n; | |
} | |
#ifdef UNIT_TEST | |
-int main( int argc, char** argv ) | |
+int | |
+main(int argc, char **argv) | |
{ | |
- dbh_t* pdb; | |
- veciter_t iter; | |
- str_t* pstr; | |
- uint n; | |
- | |
- if( argc != 2 ) | |
- { | |
- fprintf( stderr, "usage: %s <file>\n", argv[0] ); | |
- return 1; | |
- } | |
- | |
- for( n = 0; n < 100; n++ ) | |
- { | |
- pdb = dbh_open( "testlist", true ); | |
- | |
- vec_first( &db, &iter ); | |
- while( (pstr = veciter_get( &iter )) != NULL ) | |
- { | |
- char buf[MAXWORDLEN+32]; | |
- char* p; | |
- if( pstr->len > 200 ) | |
- { | |
- fprintf( stderr, "str too long: %u chars\n", pstr->len ); | |
- break; | |
- } | |
- p = buf; | |
- strcpy( buf, "str: " ); | |
- p += 6; | |
- memcpy( p, pstr->p, pstr->len ); | |
- p += pstr->len; | |
- sprintf( p, " %u", pstr->count ); | |
- puts( buf ); | |
- | |
- veciter_next( &iter ); | |
- } | |
- | |
- dbh_close( &db ); | |
- } | |
- | |
- return 0; | |
+ dbh_t *pdb; | |
+ veciter_t iter; | |
+ str_t *pstr; | |
+ uint n; | |
+ | |
+ if (argc != 2) { | |
+ fprintf(stderr, "usage: %s <file>\n", argv[0]); | |
+ return 1; | |
+ } | |
+ for (n = 0; n < 100; n++) { | |
+ pdb = dbh_open("testlist", true); | |
+ | |
+ vec_first(&db, &iter); | |
+ while ((pstr = veciter_get(&iter)) != NULL) { | |
+ char buf[MAXWORDLEN + 32]; | |
+ char *p; | |
+ | |
+ if (pstr->len > 200) { | |
+ fprintf(stderr, "str too long: %u chars\n", ps… | |
+ break; | |
+ } | |
+ p = buf; | |
+ strcpy(buf, "str: "); | |
+ p += 6; | |
+ memcpy(p, pstr->p, pstr->len); | |
+ p += pstr->len; | |
+ sprintf(p, " %u", pstr->count); | |
+ puts(buf); | |
+ | |
+ veciter_next(&iter); | |
+ } | |
+ | |
+ dbh_close(&db); | |
+ } | |
+ | |
+ return 0; | |
} | |
-#endif /* def UNIT_TEST */ | |
+#endif /* def UNIT_TEST */ | |
diff --git a/filt.c b/filt.c | |
@@ -21,155 +21,143 @@ | |
#define DEVIATION(n) fabs((n)-0.5f) | |
/* Dump the contents of a statistics structure */ | |
-void statdump( stats_t* pstat, int fd ) | |
+void | |
+statdump(stats_t * pstat, int fd) | |
{ | |
- char iobuf[IOBUFSIZE]; | |
- char* p; | |
- discrim_t* pp; | |
- | |
- p = iobuf; | |
- p += sprintf( iobuf, "# Spamicity: %f\n", pstat->spamicity ); | |
- | |
- for (pp = pstat->extrema; pp < pstat->extrema + pstat->keepers; pp++) | |
- { | |
- if (pp->key.len) | |
- { | |
- strcpy( p, "# '" ); p += 3; | |
- strncpylwr( p, pp->key.p, pp->key.len ); p += pp->key.len; | |
- p += snprintf( p, 28, "' -> %f\n", pp->prob ); | |
- if( p+MAXWORDLEN+32 > (iobuf+1) ) | |
- { | |
- write( fd, iobuf, p-iobuf ); | |
- p = iobuf; | |
- } | |
- } | |
- } | |
- if( p != iobuf ) | |
- { | |
- write( fd, iobuf, p-iobuf ); | |
- } | |
+ char iobuf[IOBUFSIZE]; | |
+ char *p; | |
+ discrim_t *pp; | |
+ | |
+ p = iobuf; | |
+ p += sprintf(iobuf, "# Spamicity: %f\n", pstat->spamicity); | |
+ | |
+ for (pp = pstat->extrema; pp < pstat->extrema + pstat->keepers; pp++) { | |
+ if (pp->key.len) { | |
+ strcpy(p, "# '"); | |
+ p += 3; | |
+ strncpylwr(p, pp->key.p, pp->key.len); | |
+ p += pp->key.len; | |
+ p += snprintf(p, 28, "' -> %f\n", pp->prob); | |
+ if (p + MAXWORDLEN + 32 > (iobuf + 1)) { | |
+ write(fd, iobuf, p - iobuf); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ } | |
+ if (p != iobuf) { | |
+ write(fd, iobuf, p - iobuf); | |
+ } | |
} | |
-void bayesfilt( dbt_t* pglist, dbt_t* pblist, vec_t* pmlist, stats_t* pstats ) | |
+void | |
+bayesfilt(dbt_t * pglist, dbt_t * pblist, vec_t * pmlist, stats_t * pstats) | |
{ | |
- veciter_t iter; | |
- str_t* pword; | |
+ veciter_t iter; | |
+ str_t *pword; | |
- double prob, product, invproduct, dev; | |
- double slotdev, hitdev; | |
+ double prob, product, invproduct, dev; | |
+ double slotdev, hitdev; | |
#ifdef NON_EQUIPROBABLE | |
- /* There is an argument that we should (go?) by number of *words* here. */ | |
- double msg_prob = ((double)pblist->nitems / (double)pglist->nitems); | |
+ /* There is an argument that we should (go?) by number of *words* | |
+ * here. */ | |
+ double msg_prob = ((double) pblist->nitems / (double) pglist->nitems); | |
+ | |
#endif | |
- discrim_t* pp; | |
- discrim_t* hit; | |
- | |
- for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) | |
- { | |
- pp->key.p = NULL; | |
- pp->key.len = 0; | |
- pp->prob = 0.5f; | |
- } | |
- | |
- vec_first( pmlist, &iter ); | |
- while( (pword = veciter_get( &iter )) != NULL ) | |
- { | |
- double goodness = pglist->getcount( pglist, pword ); | |
- double spamness = pblist->getcount( pblist, pword ); | |
- uint goodtotal = pglist->getmsgcount( pglist ); | |
- uint spamtotal = pblist->getmsgcount( pblist ); | |
- | |
- if( goodness + spamness < MINIMUM_FREQ ) | |
- { | |
+ discrim_t *pp; | |
+ discrim_t *hit; | |
+ | |
+ for (pp = pstats->extrema; pp < pstats->extrema + pstats->keepers; pp+… | |
+ pp->key.p = NULL; | |
+ pp->key.len = 0; | |
+ pp->prob = 0.5f; | |
+ } | |
+ | |
+ vec_first(pmlist, &iter); | |
+ while ((pword = veciter_get(&iter)) != NULL) { | |
+ double goodness = pglist->getcount(pglist, pword); | |
+ double spamness = pblist->getcount(pblist, pword); | |
+ uint goodtotal = pglist->getmsgcount(pglist); | |
+ uint spamtotal = pblist->getmsgcount(pblist); | |
+ | |
+ if (goodness + spamness < MINIMUM_FREQ) { | |
#ifdef NON_EQUIPROBABLE | |
- /* | |
- * In the absence of evidence, the probability that a new word will | |
- * be spam is the historical ratio of spam words to nonspam words. | |
- */ | |
- prob = msg_prob; | |
+ /* | |
+ * In the absence of evidence, the probability that a … | |
+ * be spam is the historical ratio of spam words to no… | |
+ */ | |
+ prob = msg_prob; | |
#else | |
- prob = UNKNOWN_WORD; | |
+ prob = UNKNOWN_WORD; | |
#endif | |
- } | |
- else | |
- { | |
- double goodprob = goodtotal ? min( 1.0, (goodness / goodtotal) ) :… | |
- double spamprob = spamtotal ? min( 1.0, (spamness / spamtotal) ) :… | |
- assert( goodtotal > 0 || spamtotal > 0 ); | |
+ } else { | |
+ double goodprob = goodtotal ? min(1.0, (goodness / goo… | |
+ double spamprob = spamtotal ? min(1.0, (spamness / spa… | |
+ | |
+ assert(goodtotal > 0 || spamtotal > 0); | |
#ifdef NON_EQUIPROBABLE | |
- prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_prob)) + (spa… | |
+ prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_p… | |
#else | |
- prob = spamprob / (goodprob + spamprob); | |
+ prob = spamprob / (goodprob + spamprob); | |
#endif | |
- prob = minmax( prob, 0.01, 0.99 ); | |
- } | |
- | |
- /* update the list of tokens with maximum deviation */ | |
- dev = DEVIATION(prob); | |
- hit = NULL; | |
- hitdev = 0; | |
- for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) | |
- { | |
- /* don't allow duplicate tokens in the stats.extrema */ | |
- if( pp->key.len > 0 && str_casecmp( pword, &pp->key ) == 0 ) | |
- { | |
- hit = NULL; | |
- break; | |
- } | |
- | |
- slotdev = DEVIATION(pp->prob); | |
- if (dev>slotdev && dev>hitdev) | |
- { | |
- hit = pp; | |
- hitdev = slotdev; | |
- } | |
- } | |
- if (hit) | |
- { | |
- hit->prob = prob; | |
- hit->key = *pword; | |
- } | |
- | |
- veciter_next( &iter ); | |
- } | |
- veciter_destroy( &iter ); | |
- | |
- /* | |
- * Bayes' theorem. | |
- * For discussion, see <http://www.mathpages.com/home/kmath267.htm>. | |
- */ | |
- product = invproduct = 1.0f; | |
- for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) | |
- { | |
- if( pp->prob == 0 ) | |
- { | |
- break; | |
- } | |
- else | |
- { | |
- product *= pp->prob; | |
- invproduct *= (1 - pp->prob); | |
- } | |
- } | |
- pstats->spamicity = product / (product + invproduct); | |
+ prob = minmax(prob, 0.01, 0.99); | |
+ } | |
+ | |
+ /* update the list of tokens with maximum deviation */ | |
+ dev = DEVIATION(prob); | |
+ hit = NULL; | |
+ hitdev = 0; | |
+ for (pp = pstats->extrema; pp < pstats->extrema + pstats->keep… | |
+ /* don't allow duplicate tokens in the stats.extrema */ | |
+ if (pp->key.len > 0 && str_casecmp(pword, &pp->key) ==… | |
+ hit = NULL; | |
+ break; | |
+ } | |
+ slotdev = DEVIATION(pp->prob); | |
+ if (dev > slotdev && dev > hitdev) { | |
+ hit = pp; | |
+ hitdev = slotdev; | |
+ } | |
+ } | |
+ if (hit) { | |
+ hit->prob = prob; | |
+ hit->key = *pword; | |
+ } | |
+ veciter_next(&iter); | |
+ } | |
+ veciter_destroy(&iter); | |
+ | |
+ /* | |
+ * Bayes' theorem. | |
+ * For discussion, see <http://www.mathpages.com/home/kmath267.htm>. | |
+ */ | |
+ product = invproduct = 1.0f; | |
+ for (pp = pstats->extrema; pp < pstats->extrema + pstats->keepers; pp+… | |
+ if (pp->prob == 0) { | |
+ break; | |
+ } else { | |
+ product *= pp->prob; | |
+ invproduct *= (1 - pp->prob); | |
+ } | |
+ } | |
+ pstats->spamicity = product / (product + invproduct); | |
} | |
-bool_t bvec_loadmsg( vec_t* pthis, lex_t* plex, tok_t* ptok ) | |
+bool_t | |
+bvec_loadmsg(vec_t * pthis, lex_t * plex, tok_t * ptok) | |
{ | |
- str_t w; | |
- | |
- lex_nexttoken( plex, ptok ); | |
- while( ptok->tt != eof && ptok->tt != from ) | |
- { | |
- w.p = ptok->p; | |
- w.len = ptok->len; | |
- vec_addtail( pthis, &w ); | |
- lex_nexttoken( plex, ptok ); | |
- } | |
- | |
- return true; | |
+ str_t w; | |
+ | |
+ lex_nexttoken(plex, ptok); | |
+ while (ptok->tt != eof && ptok->tt != from) { | |
+ w.p = ptok->p; | |
+ w.len = ptok->len; | |
+ vec_addtail(pthis, &w); | |
+ lex_nexttoken(plex, ptok); | |
+ } | |
+ | |
+ return true; | |
} | |
diff --git a/lex.c b/lex.c | |
@@ -16,772 +16,708 @@ | |
static cpchar g_htmltags[] = | |
{ | |
- "abbr", | |
- "above", | |
- "accesskey", | |
- "acronym", | |
- "align", | |
- "alink", | |
- "all", | |
- "alt", | |
- "applet", | |
- "archive", | |
- "axis", | |
- "basefont", | |
- "baseline", | |
- "below", | |
- "bgcolor", | |
- "big", | |
- "body", | |
- "border", | |
- "bottom", | |
- "box", | |
- "button", | |
- "cellpadding", | |
- "cellspacing", | |
- "center", | |
- "char", | |
- "charoff", | |
- "charset", | |
- "circle", | |
- "cite", | |
- "class", | |
- "classid", | |
- "clear", | |
- "codebase", | |
- "codetype", | |
- "color", | |
- "cols", | |
- "colspan", | |
- "compact", | |
- "content", | |
- "coords", | |
- "data", | |
- "datetime", | |
- "declare", | |
- "default", | |
- "defer", | |
- "dfn", | |
- "dir", | |
- "disabled", | |
- "face", | |
- "font", | |
- "frameborder", | |
- "groups", | |
- "head", | |
- "headers", | |
- "height", | |
- "href", | |
- "hreflang", | |
- "hsides", | |
- "hspace", | |
- "http-equiv", | |
- "iframe", | |
- "img", | |
- "input", | |
- "ismap", | |
- "justify", | |
- "kbd", | |
- "label", | |
- "lang", | |
- "language", | |
- "left", | |
- "lhs", | |
- "link", | |
- "longdesc", | |
- "map", | |
- "marginheight", | |
- "marginwidth", | |
- "media", | |
- "meta", | |
- "middle", | |
- "multiple", | |
- "name", | |
- "nohref", | |
- "none", | |
- "noresize", | |
- "noshade", | |
- "nowrap", | |
- "object", | |
- "onblur", | |
- "onchange", | |
- "onclick", | |
- "ondblclick", | |
- "onfocus", | |
- "onkeydown", | |
- "onkeypress", | |
- "onkeyup", | |
- "onload", | |
- "onmousedown", | |
- "onmousemove", | |
- "onmouseout", | |
- "onmouseover", | |
- "onmouseup", | |
- "onselect", | |
- "onunload", | |
- "param", | |
- "poly", | |
- "profile", | |
- "prompt", | |
- "readonly", | |
- "rect", | |
- "rel", | |
- "rev", | |
- "rhs", | |
- "right", | |
- "rows", | |
- "rowspan", | |
- "rules", | |
- "samp", | |
- "scheme", | |
- "scope", | |
- "script", | |
- "scrolling", | |
- "select", | |
- "selected", | |
- "shape", | |
- "size", | |
- "small", | |
- "span", | |
- "src", | |
- "standby", | |
- "strike", | |
- "strong", | |
- "style", | |
- "sub", | |
- "summary", | |
- "sup", | |
- "tabindex", | |
- "table", | |
- "target", | |
- "textarea", | |
- "title", | |
- "top", | |
- "type", | |
- "usemap", | |
- "valign", | |
- "value", | |
- "valuetype", | |
- "var", | |
- "vlink", | |
- "void", | |
- "vsides", | |
- "vspace", | |
- "width" | |
+ "abbr", | |
+ "above", | |
+ "accesskey", | |
+ "acronym", | |
+ "align", | |
+ "alink", | |
+ "all", | |
+ "alt", | |
+ "applet", | |
+ "archive", | |
+ "axis", | |
+ "basefont", | |
+ "baseline", | |
+ "below", | |
+ "bgcolor", | |
+ "big", | |
+ "body", | |
+ "border", | |
+ "bottom", | |
+ "box", | |
+ "button", | |
+ "cellpadding", | |
+ "cellspacing", | |
+ "center", | |
+ "char", | |
+ "charoff", | |
+ "charset", | |
+ "circle", | |
+ "cite", | |
+ "class", | |
+ "classid", | |
+ "clear", | |
+ "codebase", | |
+ "codetype", | |
+ "color", | |
+ "cols", | |
+ "colspan", | |
+ "compact", | |
+ "content", | |
+ "coords", | |
+ "data", | |
+ "datetime", | |
+ "declare", | |
+ "default", | |
+ "defer", | |
+ "dfn", | |
+ "dir", | |
+ "disabled", | |
+ "face", | |
+ "font", | |
+ "frameborder", | |
+ "groups", | |
+ "head", | |
+ "headers", | |
+ "height", | |
+ "href", | |
+ "hreflang", | |
+ "hsides", | |
+ "hspace", | |
+ "http-equiv", | |
+ "iframe", | |
+ "img", | |
+ "input", | |
+ "ismap", | |
+ "justify", | |
+ "kbd", | |
+ "label", | |
+ "lang", | |
+ "language", | |
+ "left", | |
+ "lhs", | |
+ "link", | |
+ "longdesc", | |
+ "map", | |
+ "marginheight", | |
+ "marginwidth", | |
+ "media", | |
+ "meta", | |
+ "middle", | |
+ "multiple", | |
+ "name", | |
+ "nohref", | |
+ "none", | |
+ "noresize", | |
+ "noshade", | |
+ "nowrap", | |
+ "object", | |
+ "onblur", | |
+ "onchange", | |
+ "onclick", | |
+ "ondblclick", | |
+ "onfocus", | |
+ "onkeydown", | |
+ "onkeypress", | |
+ "onkeyup", | |
+ "onload", | |
+ "onmousedown", | |
+ "onmousemove", | |
+ "onmouseout", | |
+ "onmouseover", | |
+ "onmouseup", | |
+ "onselect", | |
+ "onunload", | |
+ "param", | |
+ "poly", | |
+ "profile", | |
+ "prompt", | |
+ "readonly", | |
+ "rect", | |
+ "rel", | |
+ "rev", | |
+ "rhs", | |
+ "right", | |
+ "rows", | |
+ "rowspan", | |
+ "rules", | |
+ "samp", | |
+ "scheme", | |
+ "scope", | |
+ "script", | |
+ "scrolling", | |
+ "select", | |
+ "selected", | |
+ "shape", | |
+ "size", | |
+ "small", | |
+ "span", | |
+ "src", | |
+ "standby", | |
+ "strike", | |
+ "strong", | |
+ "style", | |
+ "sub", | |
+ "summary", | |
+ "sup", | |
+ "tabindex", | |
+ "table", | |
+ "target", | |
+ "textarea", | |
+ "title", | |
+ "top", | |
+ "type", | |
+ "usemap", | |
+ "valign", | |
+ "value", | |
+ "valuetype", | |
+ "var", | |
+ "vlink", | |
+ "void", | |
+ "vsides", | |
+ "vspace", | |
+ "width" | |
}; | |
-static const uint g_nhtmltags = sizeof(g_htmltags)/sizeof(cpchar); | |
+static const uint g_nhtmltags = sizeof(g_htmltags) / sizeof(cpchar); | |
static cpchar g_ignoredheaders[] = | |
{ | |
- "Date:", | |
- "Delivery-date:", | |
- "Message-ID:", | |
- "X-Sorted:", | |
- "X-Spam-" | |
+ "Date:", | |
+ "Delivery-date:", | |
+ "Message-ID:", | |
+ "X-Sorted:", | |
+ "X-Spam-" | |
}; | |
-static const uint g_nignoredheaders = sizeof(g_ignoredheaders)/sizeof(cpchar); | |
+static const uint g_nignoredheaders = sizeof(g_ignoredheaders) / sizeof(cpchar… | |
-static inline bool_t is_whitespace( int c ) | |
+static inline bool_t | |
+is_whitespace(int c) | |
{ | |
- return ( c == ' ' || c == '\t' || c == '\r' ); | |
+ return (c == ' ' || c == '\t' || c == '\r'); | |
} | |
-static inline bool_t is_base64char(c) | |
+static inline bool_t | |
+is_base64char(c) | |
{ | |
- return ( isalnum(c) || (c == '/' || c == '+') ); | |
+ return (isalnum(c) || (c == '/' || c == '+')); | |
} | |
-static inline bool_t is_wordmidchar(c) | |
+static inline bool_t | |
+is_wordmidchar(c) | |
{ | |
- return ( isalnum(c) || c == '$' || c == '\'' || c == '.' || c == '-' ); | |
+ return (isalnum(c) || c == '$' || c == '\'' || c == '.' || c == '-'); | |
} | |
-static inline bool_t is_wordendchar(c) | |
+static inline bool_t | |
+is_wordendchar(c) | |
{ | |
- return ( isalnum(c) || c == '$' ); | |
+ return (isalnum(c) || c == '$'); | |
} | |
-static inline bool_t is_htmltag( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_htmltag(cpchar p, uint len, uint * ptoklen) | |
{ | |
- int lo, hi, mid, minlen, cmp; | |
- | |
- *ptoklen = 0; | |
- | |
- hi = g_nhtmltags-1; | |
- lo = -1; | |
- while( hi-lo > 1 ) | |
- { | |
- mid = (hi+lo)/2; | |
- minlen = min( strlen(g_htmltags[mid]), len ); | |
- cmp = strncmp( g_htmltags[mid], p, minlen ); | |
- if( cmp > 0 || (cmp == 0 && minlen < len && !islower(p[minlen])) ) | |
- hi = mid; | |
- else | |
- lo = mid; | |
- } | |
- minlen = min( strlen(g_htmltags[hi]), len ); | |
- if( len == minlen || strncmp(g_htmltags[hi], p, minlen) != 0 ) | |
- { | |
- return false; | |
- } | |
- | |
- /* check if is_word() will have a longer match */ | |
- if( is_wordendchar(p[minlen]) ) | |
- { | |
- return false; | |
- } | |
- if( is_wordmidchar(p[minlen]) && is_wordendchar(p[minlen+1]) ) | |
- { | |
- return false; | |
- } | |
- | |
- *ptoklen = strlen(g_htmltags[hi]); | |
- | |
- return true; | |
+ int lo, hi, mid, minlen, cmp; | |
+ | |
+ *ptoklen = 0; | |
+ | |
+ hi = g_nhtmltags - 1; | |
+ lo = -1; | |
+ while (hi - lo > 1) { | |
+ mid = (hi + lo) / 2; | |
+ minlen = min(strlen(g_htmltags[mid]), len); | |
+ cmp = strncmp(g_htmltags[mid], p, minlen); | |
+ if (cmp > 0 || (cmp == 0 && minlen < len && !islower(p[minlen]… | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ minlen = min(strlen(g_htmltags[hi]), len); | |
+ if (len == minlen || strncmp(g_htmltags[hi], p, minlen) != 0) { | |
+ return false; | |
+ } | |
+ /* check if is_word() will have a longer match */ | |
+ if (is_wordendchar(p[minlen])) { | |
+ return false; | |
+ } | |
+ if (is_wordmidchar(p[minlen]) && is_wordendchar(p[minlen + 1])) { | |
+ return false; | |
+ } | |
+ *ptoklen = strlen(g_htmltags[hi]); | |
+ | |
+ return true; | |
} | |
-static inline bool_t is_htmlcomment( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_htmlcomment(cpchar p, uint len, uint * ptoklen) | |
{ | |
- *ptoklen = 0; | |
- | |
- if( len >=4 && memcmp( p, "<!--", 4 ) == 0 ) | |
- { | |
- *ptoklen = 4; | |
- return true; | |
- } | |
- if( len >= 3 && memcmp( p, "-->", 3 ) == 0 ) | |
- { | |
- *ptoklen = 3; | |
- return true; | |
- } | |
- | |
- return false; | |
+ *ptoklen = 0; | |
+ | |
+ if (len >= 4 && memcmp(p, "<!--", 4) == 0) { | |
+ *ptoklen = 4; | |
+ return true; | |
+ } | |
+ if (len >= 3 && memcmp(p, "-->", 3) == 0) { | |
+ *ptoklen = 3; | |
+ return true; | |
+ } | |
+ return false; | |
} | |
-static inline bool_t is_base64( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_base64(cpchar p, uint len, uint * ptoklen) | |
{ | |
- *ptoklen = 0; | |
- while( len > 0 ) | |
- { | |
- if( *p != '\n' && *p != '\r' && !is_base64char(*p) ) | |
- { | |
- return false; | |
- } | |
- p++; | |
- len--; | |
- (*ptoklen)++; | |
- } | |
- return true; | |
+ *ptoklen = 0; | |
+ while (len > 0) { | |
+ if (*p != '\n' && *p != '\r' && !is_base64char(*p)) { | |
+ return false; | |
+ } | |
+ p++; | |
+ len--; | |
+ (*ptoklen)++; | |
+ } | |
+ return true; | |
} | |
-static inline bool_t is_mimeboundary( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_mimeboundary(cpchar p, uint len, uint * ptoklen) | |
{ | |
- *ptoklen = 0; | |
- | |
- if( len < 3 || p[0] != '-' || p[1] != '-' ) | |
- { | |
- return false; | |
- } | |
- p += 2; | |
- len -= 2; | |
- *ptoklen += 2; | |
- while( len > 0 ) | |
- { | |
- if( is_whitespace(*p) ) | |
- { | |
- return false; | |
- } | |
- if( *p == '\n' || *p == '\r' ) | |
- { | |
- break; | |
- } | |
- p++; | |
- len--; | |
- (*ptoklen)++; | |
- } | |
- return true; | |
+ *ptoklen = 0; | |
+ | |
+ if (len < 3 || p[0] != '-' || p[1] != '-') { | |
+ return false; | |
+ } | |
+ p += 2; | |
+ len -= 2; | |
+ *ptoklen += 2; | |
+ while (len > 0) { | |
+ if (is_whitespace(*p)) { | |
+ return false; | |
+ } | |
+ if (*p == '\n' || *p == '\r') { | |
+ break; | |
+ } | |
+ p++; | |
+ len--; | |
+ (*ptoklen)++; | |
+ } | |
+ return true; | |
} | |
-static inline bool_t is_ipaddr( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_ipaddr(cpchar p, uint len, uint * ptoklen) | |
{ | |
- uint noctets, ndigits; | |
- | |
- *ptoklen = 0; | |
- | |
- noctets = 0; | |
- while( len > 0 && noctets < 4 ) | |
- { | |
- ndigits = 0; | |
- while( len > 0 && isdigit(*p) ) | |
- { | |
- ndigits++; | |
- p++; | |
- len--; | |
- (*ptoklen)++; | |
- } | |
- if( ndigits == 0 || ndigits > 3 ) | |
- { | |
- return false; | |
- } | |
- noctets++; | |
- if( noctets < 4 ) | |
- { | |
- if( *p != '.' ) | |
- { | |
- return false; | |
- } | |
- p++; | |
- len--; | |
- (*ptoklen)++; | |
- } | |
- } | |
- if( noctets < 4 ) | |
- { | |
- return false; | |
- } | |
- return true; | |
+ uint noctets, ndigits; | |
+ | |
+ *ptoklen = 0; | |
+ | |
+ noctets = 0; | |
+ while (len > 0 && noctets < 4) { | |
+ ndigits = 0; | |
+ while (len > 0 && isdigit(*p)) { | |
+ ndigits++; | |
+ p++; | |
+ len--; | |
+ (*ptoklen)++; | |
+ } | |
+ if (ndigits == 0 || ndigits > 3) { | |
+ return false; | |
+ } | |
+ noctets++; | |
+ if (noctets < 4) { | |
+ if (*p != '.') { | |
+ return false; | |
+ } | |
+ p++; | |
+ len--; | |
+ (*ptoklen)++; | |
+ } | |
+ } | |
+ if (noctets < 4) { | |
+ return false; | |
+ } | |
+ return true; | |
} | |
-static inline bool_t is_word( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_word(cpchar p, uint len, uint * ptoklen) | |
{ | |
- if( len < 3 ) | |
- { | |
- return false; | |
- } | |
- if( !(isalpha(*p) || *p == '$') ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = 1; | |
- p++; | |
- len--; | |
- while( len > 0 ) | |
- { | |
- if( !is_wordmidchar(*p) ) | |
- { | |
- break; | |
- } | |
- (*ptoklen)++; | |
- p++; | |
- len--; | |
- } | |
- while( *ptoklen >= 3 && !is_wordendchar(*(p-1)) ) | |
- { | |
- (*ptoklen)--; | |
- p--; | |
- len++; | |
- } | |
- if( *ptoklen < 3 ) | |
- { | |
- return false; | |
- } | |
- | |
- return true; | |
+ if (len < 3) { | |
+ return false; | |
+ } | |
+ if (!(isalpha(*p) || *p == '$')) { | |
+ return false; | |
+ } | |
+ *ptoklen = 1; | |
+ p++; | |
+ len--; | |
+ while (len > 0) { | |
+ if (!is_wordmidchar(*p)) { | |
+ break; | |
+ } | |
+ (*ptoklen)++; | |
+ p++; | |
+ len--; | |
+ } | |
+ while (*ptoklen >= 3 && !is_wordendchar(*(p - 1))) { | |
+ (*ptoklen)--; | |
+ p--; | |
+ len++; | |
+ } | |
+ if (*ptoklen < 3) { | |
+ return false; | |
+ } | |
+ return true; | |
} | |
-static inline bool_t is_ignoredheader( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_ignoredheader(cpchar p, uint len, uint * ptoklen) | |
{ | |
- int lo, hi, mid, minlen, cmp; | |
- | |
- hi = g_nignoredheaders-1; | |
- lo = -1; | |
- while( hi-lo > 1 ) | |
- { | |
- mid = (hi+lo)/2; | |
- minlen = min( strlen(g_ignoredheaders[mid]), len ); | |
- cmp = strncasecmp( g_ignoredheaders[mid], p, minlen ); | |
- if( cmp >= 0 ) | |
- hi = mid; | |
- else | |
- lo = mid; | |
- } | |
- minlen = min( strlen(g_ignoredheaders[hi]), len ); | |
- if( len == minlen || strncasecmp(g_ignoredheaders[hi], p, minlen) != 0 ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = len; | |
- return true; | |
+ int lo, hi, mid, minlen, cmp; | |
+ | |
+ hi = g_nignoredheaders - 1; | |
+ lo = -1; | |
+ while (hi - lo > 1) { | |
+ mid = (hi + lo) / 2; | |
+ minlen = min(strlen(g_ignoredheaders[mid]), len); | |
+ cmp = strncasecmp(g_ignoredheaders[mid], p, minlen); | |
+ if (cmp >= 0) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ minlen = min(strlen(g_ignoredheaders[hi]), len); | |
+ if (len == minlen || strncasecmp(g_ignoredheaders[hi], p, minlen) != 0… | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
} | |
-static inline bool_t is_mailerid( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_mailerid(cpchar p, uint len, uint * ptoklen) | |
{ | |
- if( len < 4 || strncmp( p, "\tid ", 4 ) != 0 ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = len; | |
- return true; | |
+ if (len < 4 || strncmp(p, "\tid ", 4) != 0) { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
} | |
-static inline bool_t is_spamtext( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_spamtext(cpchar p, uint len, uint * ptoklen) | |
{ | |
- if( len < 5 || strncmp( p, "SPAM:", 5 ) != 0 ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = len; | |
- return true; | |
+ if (len < 5 || strncmp(p, "SPAM:", 5) != 0) { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
} | |
-static inline bool_t is_smtpid( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_smtpid(cpchar p, uint len, uint * ptoklen) | |
{ | |
- if( len < 8 || strncmp( p, "SMTP id ", 8 ) != 0 ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = len; | |
- return true; | |
+ if (len < 8 || strncmp(p, "SMTP id ", 8) != 0) { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
} | |
-static inline bool_t is_boundaryequal( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_boundaryequal(cpchar p, uint len, uint * ptoklen) | |
{ | |
- if( len < 9 || strncmp( p, "boundary=", 9 ) != 0 ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = len; | |
- return true; | |
+ if (len < 9 || strncmp(p, "boundary=", 9) != 0) { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
} | |
-static inline bool_t is_nameequal( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_nameequal(cpchar p, uint len, uint * ptoklen) | |
{ | |
- if( len < 6 || strncmp( p, "name=\"", 6 ) != 0 ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = 6; | |
- return true; | |
+ if (len < 6 || strncmp(p, "name=\"", 6) != 0) { | |
+ return false; | |
+ } | |
+ *ptoklen = 6; | |
+ return true; | |
} | |
-static inline bool_t is_filenameequal( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_filenameequal(cpchar p, uint len, uint * ptoklen) | |
{ | |
- if( len < 10 || strncmp( p, "filename=\"", 10 ) != 0 ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = 10; | |
- return true; | |
+ if (len < 10 || strncmp(p, "filename=\"", 10) != 0) { | |
+ return false; | |
+ } | |
+ *ptoklen = 10; | |
+ return true; | |
} | |
-static inline bool_t is_from( cpchar p, uint len, uint* ptoklen ) | |
+static inline bool_t | |
+is_from(cpchar p, uint len, uint * ptoklen) | |
{ | |
- if( len < 5 || strncmp( p, "From ", 5 ) != 0 ) | |
- { | |
- return false; | |
- } | |
- *ptoklen = 5; | |
- return true; | |
+ if (len < 5 || strncmp(p, "From ", 5) != 0) { | |
+ return false; | |
+ } | |
+ *ptoklen = 5; | |
+ return true; | |
} | |
-/*****************************************************************************/ | |
- | |
-void lex_create( lex_t* pthis, mbox_t mboxtype ) | |
+void | |
+lex_create(lex_t * pthis, mbox_t mboxtype) | |
{ | |
- pthis->mboxtype = mboxtype; | |
- pthis->section = envelope; | |
- pthis->pos = 0; | |
- pthis->bom = 0; | |
- pthis->eom = 0; | |
- pthis->lineend = 0; | |
- pthis->buflen = 0; | |
- pthis->pbuf = NULL; | |
+ pthis->mboxtype = mboxtype; | |
+ pthis->section = envelope; | |
+ pthis->pos = 0; | |
+ pthis->bom = 0; | |
+ pthis->eom = 0; | |
+ pthis->lineend = 0; | |
+ pthis->buflen = 0; | |
+ pthis->pbuf = NULL; | |
} | |
-void lex_destroy( lex_t* pthis ) | |
+void | |
+lex_destroy(lex_t * pthis) | |
{ | |
- free( pthis->pbuf ); | |
+ free(pthis->pbuf); | |
} | |
-bool_t lex_load( lex_t* pthis, int fd ) | |
+bool_t | |
+lex_load(lex_t * pthis, int fd) | |
{ | |
- uint nalloc; | |
- ssize_t nread; | |
- | |
- nalloc = IOBUFSIZE; | |
- pthis->pbuf = (char*)malloc( IOBUFSIZE ); | |
- if( pthis->pbuf == NULL ) | |
- { | |
- return false; | |
- } | |
- | |
- while( (nread = read( fd, pthis->pbuf + pthis->buflen, nalloc - pthis->buf… | |
- { | |
- pthis->buflen += nread; | |
- if( pthis->buflen == nalloc ) | |
- { | |
- char* pnewbuf; | |
- nalloc += IOBUFSIZE; | |
- pnewbuf = (char*)realloc( pthis->pbuf, nalloc ); | |
- if( pnewbuf == NULL ) | |
- { | |
- free( pthis->pbuf ); | |
- pthis->pbuf = NULL; | |
- return false; | |
- } | |
- pthis->pbuf = pnewbuf; | |
- } | |
- } | |
- if( nread < 0 ) | |
- { | |
- free( pthis->pbuf ); | |
- pthis->pbuf = NULL; | |
- return false; | |
- } | |
- if( pthis->mboxtype == detect ) | |
- { | |
- if( pthis->buflen > 5 && memcmp( pthis->pbuf, "From ", 5 ) == 0 ) | |
- { | |
- verbose( 1, "Input looks like an mbox\n" ); | |
- pthis->mboxtype = mbox; | |
- } | |
- else | |
- { | |
- verbose( 1, "Input looks like a maildir\n" ); | |
- pthis->mboxtype = maildir; | |
- } | |
- } | |
- | |
- return true; | |
+ uint nalloc; | |
+ ssize_t nread; | |
+ | |
+ nalloc = IOBUFSIZE; | |
+ pthis->pbuf = (char *) malloc(IOBUFSIZE); | |
+ if (pthis->pbuf == NULL) { | |
+ return false; | |
+ } | |
+ while ((nread = read(fd, pthis->pbuf + pthis->buflen, nalloc - pthis->… | |
+ pthis->buflen += nread; | |
+ if (pthis->buflen == nalloc) { | |
+ char *pnewbuf; | |
+ | |
+ nalloc += IOBUFSIZE; | |
+ pnewbuf = (char *) realloc(pthis->pbuf, nalloc); | |
+ if (pnewbuf == NULL) { | |
+ free(pthis->pbuf); | |
+ pthis->pbuf = NULL; | |
+ return false; | |
+ } | |
+ pthis->pbuf = pnewbuf; | |
+ } | |
+ } | |
+ if (nread < 0) { | |
+ free(pthis->pbuf); | |
+ pthis->pbuf = NULL; | |
+ return false; | |
+ } | |
+ if (pthis->mboxtype == detect) { | |
+ if (pthis->buflen > 5 && memcmp(pthis->pbuf, "From ", 5) == 0)… | |
+ verbose(1, "Input looks like an mbox\n"); | |
+ pthis->mboxtype = mbox; | |
+ } else { | |
+ verbose(1, "Input looks like a maildir\n"); | |
+ pthis->mboxtype = maildir; | |
+ } | |
+ } | |
+ return true; | |
} | |
-static bool_t lex_nextline( lex_t* pthis ) | |
+static bool_t | |
+lex_nextline(lex_t * pthis) | |
{ | |
- cpchar pbuf; | |
- uint len; | |
- uint toklen; | |
+ cpchar pbuf; | |
+ uint len; | |
+ uint toklen; | |
again: | |
- /* XXX: use and update pthis->section */ | |
- pthis->pos = pthis->lineend; | |
- if( pthis->lineend == pthis->buflen ) | |
- { | |
- return false; | |
- } | |
- | |
- pbuf = pthis->pbuf + pthis->pos; | |
- len = 0; | |
- while( pthis->pos + len < pthis->buflen && pbuf[len] != '\n' ) | |
- { | |
- len++; | |
- } | |
- if( pthis->pos + len < pthis->buflen ) | |
- { | |
- len++; /* bump past the LF */ | |
- } | |
- | |
- pthis->lineend = pthis->pos + len; | |
- | |
- /* check beginning-of-line patterns */ | |
- if( is_base64( pbuf, len, &toklen ) || | |
- is_ignoredheader( pbuf, len, &toklen ) || | |
- is_mailerid( pbuf, len, &toklen ) || | |
- is_mimeboundary( pbuf, len, &toklen ) || | |
- is_spamtext( pbuf, len, &toklen ) ) | |
- { | |
- /* ignore line */ | |
- pthis->pos += toklen; | |
- goto again; | |
- } | |
- | |
- return true; | |
+ /* XXX: use and update pthis->section */ | |
+ pthis->pos = pthis->lineend; | |
+ if (pthis->lineend == pthis->buflen) { | |
+ return false; | |
+ } | |
+ pbuf = pthis->pbuf + pthis->pos; | |
+ len = 0; | |
+ while (pthis->pos + len < pthis->buflen && pbuf[len] != '\n') { | |
+ len++; | |
+ } | |
+ if (pthis->pos + len < pthis->buflen) { | |
+ len++; /* bump past the LF */ | |
+ } | |
+ pthis->lineend = pthis->pos + len; | |
+ | |
+ /* check beginning-of-line patterns */ | |
+ if (is_base64(pbuf, len, &toklen) || | |
+ is_ignoredheader(pbuf, len, &toklen) || | |
+ is_mailerid(pbuf, len, &toklen) || | |
+ is_mimeboundary(pbuf, len, &toklen) || | |
+ is_spamtext(pbuf, len, &toklen)) { | |
+ /* ignore line */ | |
+ pthis->pos += toklen; | |
+ goto again; | |
+ } | |
+ return true; | |
} | |
-void lex_nexttoken( lex_t* pthis, tok_t* ptok ) | |
+void | |
+lex_nexttoken(lex_t * pthis, tok_t * ptok) | |
{ | |
- cpchar pbuf; | |
- uint len; | |
- uint toklen; | |
- | |
- assert( pthis->pbuf != NULL ); | |
+ cpchar pbuf; | |
+ uint len; | |
+ uint toklen; | |
- if( pthis->pos == pthis->eom ) | |
- { | |
- pthis->bom = pthis->pos; | |
- } | |
+ assert(pthis->pbuf != NULL); | |
+ if (pthis->pos == pthis->eom) { | |
+ pthis->bom = pthis->pos; | |
+ } | |
again: | |
- /* skip whitespace between tokens */ | |
- while( pthis->pos != pthis->lineend && is_whitespace(pthis->pbuf[pthis->po… | |
- { | |
- pthis->pos++; | |
- } | |
- | |
- pbuf = pthis->pbuf + pthis->pos; | |
- len = pthis->lineend - pthis->pos; | |
- | |
- /* possibilities: end-of-line, html-comment, ipaddr, word, junk */ | |
- | |
- if( pthis->pos == pthis->lineend ) | |
- { | |
- if( !lex_nextline( pthis ) ) | |
- { | |
- pthis->eom = pthis->pos; | |
- ptok->tt = eof; | |
- return; | |
- } | |
- | |
- pbuf = pthis->pbuf + pthis->pos; | |
- len = pthis->lineend - pthis->pos; | |
- | |
- if( pthis->mboxtype == mbox ) | |
- { | |
- if( is_from( pbuf, len, &toklen ) ) | |
- { | |
- pthis->eom = pthis->pos; | |
- ptok->tt = from; | |
- ptok->p = pthis->pbuf + pthis->pos; | |
- ptok->len = toklen; | |
- pthis->pos += toklen; | |
- return; | |
- } | |
- } | |
- | |
- goto again; /* skip lws */ | |
- } | |
- | |
- if( is_htmltag( pbuf, len, &toklen ) || | |
- is_htmlcomment( pbuf, len, &toklen ) || | |
- is_smtpid( pbuf, len, &toklen ) || | |
- is_boundaryequal( pbuf, len, &toklen ) || | |
- is_nameequal( pbuf, len, &toklen ) || | |
- is_filenameequal( pbuf, len, &toklen ) ) | |
- { | |
- /* ignore it */ | |
- pthis->pos += toklen; | |
- goto again; | |
- } | |
- | |
- if( is_ipaddr( pbuf, len, &toklen ) ) | |
- { | |
- ptok->tt = word; | |
- ptok->p = pthis->pbuf + pthis->pos; | |
- ptok->len = toklen; | |
- pthis->pos += toklen; | |
- return; | |
- } | |
- if( is_word( pbuf, len, &toklen ) ) | |
- { | |
- ptok->tt = word; | |
- ptok->p = pthis->pbuf + pthis->pos; | |
- ptok->len = toklen; | |
- pthis->pos += toklen; | |
- if( toklen > MAXWORDLEN ) | |
- { | |
- goto again; | |
- } | |
- return; | |
- } | |
- | |
- /* junk */ | |
- pthis->pos++; | |
- goto again; | |
+ /* skip whitespace between tokens */ | |
+ while (pthis->pos != pthis->lineend && is_whitespace(pthis->pbuf[pthis… | |
+ pthis->pos++; | |
+ } | |
+ | |
+ pbuf = pthis->pbuf + pthis->pos; | |
+ len = pthis->lineend - pthis->pos; | |
+ | |
+ /* possibilities: end-of-line, html-comment, ipaddr, word, junk */ | |
+ | |
+ if (pthis->pos == pthis->lineend) { | |
+ if (!lex_nextline(pthis)) { | |
+ pthis->eom = pthis->pos; | |
+ ptok->tt = eof; | |
+ return; | |
+ } | |
+ pbuf = pthis->pbuf + pthis->pos; | |
+ len = pthis->lineend - pthis->pos; | |
+ | |
+ if (pthis->mboxtype == mbox) { | |
+ if (is_from(pbuf, len, &toklen)) { | |
+ pthis->eom = pthis->pos; | |
+ ptok->tt = from; | |
+ ptok->p = pthis->pbuf + pthis->pos; | |
+ ptok->len = toklen; | |
+ pthis->pos += toklen; | |
+ return; | |
+ } | |
+ } | |
+ goto again; /* skip lws */ | |
+ } | |
+ if (is_htmltag(pbuf, len, &toklen) || | |
+ is_htmlcomment(pbuf, len, &toklen) || | |
+ is_smtpid(pbuf, len, &toklen) || | |
+ is_boundaryequal(pbuf, len, &toklen) || | |
+ is_nameequal(pbuf, len, &toklen) || | |
+ is_filenameequal(pbuf, len, &toklen)) { | |
+ /* ignore it */ | |
+ pthis->pos += toklen; | |
+ goto again; | |
+ } | |
+ if (is_ipaddr(pbuf, len, &toklen)) { | |
+ ptok->tt = word; | |
+ ptok->p = pthis->pbuf + pthis->pos; | |
+ ptok->len = toklen; | |
+ pthis->pos += toklen; | |
+ return; | |
+ } | |
+ if (is_word(pbuf, len, &toklen)) { | |
+ ptok->tt = word; | |
+ ptok->p = pthis->pbuf + pthis->pos; | |
+ ptok->len = toklen; | |
+ pthis->pos += toklen; | |
+ if (toklen > MAXWORDLEN) { | |
+ goto again; | |
+ } | |
+ return; | |
+ } | |
+ /* junk */ | |
+ pthis->pos++; | |
+ goto again; | |
} | |
/* SpamAssassin style passthru */ | |
-void lex_passthru( lex_t* pthis, bool_t is_spam, double hits ) | |
+void | |
+lex_passthru(lex_t * pthis, bool_t is_spam, double hits) | |
{ | |
- char szbuf[256]; | |
- bool_t in_headers = true; | |
- | |
- assert( pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen ); | |
- assert( pthis->bom <= pthis->eom ); | |
- | |
- pthis->pos = pthis->bom; | |
- if( is_spam ) | |
- { | |
- sprintf( szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests=bmf\n" | |
- "X-Spam-Flag: YES\n", | |
- hits, SPAM_CUTOFF ); | |
- } | |
- else | |
- { | |
- sprintf( szbuf, "X-Spam-Status: No, hits=%f required=%f\n", | |
- hits, SPAM_CUTOFF ); | |
- } | |
- | |
- /* existing headers */ | |
- while( in_headers && pthis->pos < pthis->eom ) | |
- { | |
- cpchar pbuf = pthis->pbuf + pthis->pos; | |
- uint len = 0; | |
- while( pthis->pos + len < pthis->buflen && pbuf[len] != '\n' ) | |
- { | |
- len++; | |
- } | |
- if( pthis->pos + len < pthis->buflen ) | |
- { | |
- len++; /* bump past the LF */ | |
- } | |
- | |
- /* check for end of headers */ | |
- if( pbuf[0] == '\n' || (pbuf[0] == '\r' && pbuf[1] == '\n') ) | |
- { | |
- /* end of headers */ | |
- break; | |
- } | |
- | |
- /* write header, ignoring existing spam headers */ | |
- if( strncasecmp( pbuf, "X-Spam-", 7 ) != 0 ) | |
- { | |
- write( STDOUT_FILENO, pbuf, len ); | |
- } | |
- | |
- pthis->pos += len; | |
- } | |
- | |
- /* new headers */ | |
- write( STDOUT_FILENO, szbuf, strlen(szbuf) ); | |
- | |
- /* remainder */ | |
- if( pthis->pos < pthis->eom ) | |
- { | |
- write( STDOUT_FILENO, pthis->pbuf+pthis->pos, pthis->eom-pthis->pos ); | |
- } | |
- pthis->bom = pthis->eom; | |
+ char szbuf[256]; | |
+ bool_t in_headers = true; | |
+ | |
+ assert(pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen); | |
+ assert(pthis->bom <= pthis->eom); | |
+ | |
+ pthis->pos = pthis->bom; | |
+ if (is_spam) { | |
+ sprintf(szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests… | |
+ "X-Spam-Flag: YES\n", | |
+ hits, SPAM_CUTOFF); | |
+ } else { | |
+ sprintf(szbuf, "X-Spam-Status: No, hits=%f required=%f\n", | |
+ hits, SPAM_CUTOFF); | |
+ } | |
+ | |
+ /* existing headers */ | |
+ while (in_headers && pthis->pos < pthis->eom) { | |
+ cpchar pbuf = pthis->pbuf + pthis->pos; | |
+ uint len = 0; | |
+ | |
+ while (pthis->pos + len < pthis->buflen && pbuf[len] != '\n') { | |
+ len++; | |
+ } | |
+ if (pthis->pos + len < pthis->buflen) { | |
+ len++; /* bump past the LF */ | |
+ } | |
+ /* check for end of headers */ | |
+ if (pbuf[0] == '\n' || (pbuf[0] == '\r' && pbuf[1] == '\n')) { | |
+ /* end of headers */ | |
+ break; | |
+ } | |
+ /* write header, ignoring existing spam headers */ | |
+ if (strncasecmp(pbuf, "X-Spam-", 7) != 0) { | |
+ write(STDOUT_FILENO, pbuf, len); | |
+ } | |
+ pthis->pos += len; | |
+ } | |
+ | |
+ /* new headers */ | |
+ write(STDOUT_FILENO, szbuf, strlen(szbuf)); | |
+ | |
+ /* remainder */ | |
+ if (pthis->pos < pthis->eom) { | |
+ write(STDOUT_FILENO, pthis->pbuf + pthis->pos, pthis->eom - pt… | |
+ } | |
+ pthis->bom = pthis->eom; | |
} | |
#ifdef UNIT_TEST | |
-int main( int argc, char** argv ) | |
+int | |
+main(int argc, char **argv) | |
{ | |
- int fd; | |
- lex_t lex; | |
- tok_t tok; | |
- | |
- fd = STDIN_FILENO; | |
- if( argc == 2 ) | |
- { | |
- fd = open( argv[1], O_RDONLY ); | |
- } | |
- | |
- lex_create( &lex ); | |
- if( ! lex_load( &lex, fd ) ) | |
- { | |
- fprintf( stderr, "cannot load file\n" ); | |
- exit( 1 ); | |
- } | |
- | |
- lex_nexttoken( &lex, &tok ); | |
- while( tok.tt != eof ) | |
- { | |
- char sztok[64]; | |
- if( tok.len > MAXWORDLEN ) | |
- { | |
- printf( "*** token too long! ***\n" ); | |
- exit( 1 ); | |
- } | |
- | |
- memcpy( sztok, tok.p, tok.len ); | |
- strlwr( sztok ); | |
- sztok[tok.len] = '\0'; | |
- printf( "get_token: %d '%s'\n", tok.tt, sztok ); | |
- | |
- lex_nexttoken( &lex, &tok ); | |
- } | |
- | |
- lex_destroy( &lex ); | |
- return 0; | |
+ int fd; | |
+ lex_t lex; | |
+ tok_t tok; | |
+ | |
+ fd = STDIN_FILENO; | |
+ if (argc == 2) { | |
+ fd = open(argv[1], O_RDONLY); | |
+ } | |
+ lex_create(&lex); | |
+ if (!lex_load(&lex, fd)) { | |
+ fprintf(stderr, "cannot load file\n"); | |
+ exit(1); | |
+ } | |
+ lex_nexttoken(&lex, &tok); | |
+ while (tok.tt != eof) { | |
+ char sztok[64]; | |
+ | |
+ if (tok.len > MAXWORDLEN) { | |
+ printf("*** token too long! ***\n"); | |
+ exit(1); | |
+ } | |
+ memcpy(sztok, tok.p, tok.len); | |
+ strlwr(sztok); | |
+ sztok[tok.len] = '\0'; | |
+ printf("get_token: %d '%s'\n", tok.tt, sztok); | |
+ | |
+ lex_nexttoken(&lex, &tok); | |
+ } | |
+ | |
+ lex_destroy(&lex); | |
+ return 0; | |
} | |
-#endif /* def UNIT_TEST */ | |
+#endif /* def UNIT_TEST */ | |
diff --git a/str.c b/str.c | |
@@ -11,68 +11,72 @@ | |
#include "dbg.h" | |
#include "str.h" | |
-void strlwr( char* s ) | |
+void | |
+strlwr(char *s) | |
{ | |
- while( *s != '\0' ) | |
- { | |
- *s = tolower(*s); | |
- s++; | |
- } | |
+ while (*s != '\0') { | |
+ *s = tolower(*s); | |
+ s++; | |
+ } | |
} | |
-void strcpylwr( char* d, const char* s ) | |
+void | |
+strcpylwr(char *d, const char *s) | |
{ | |
- while( *s != '\0' ) | |
- { | |
- *d++ = tolower(*s++); | |
- } | |
+ while (*s != '\0') { | |
+ *d++ = tolower(*s++); | |
+ } | |
} | |
-void strncpylwr( char* d, const char* s, int n ) | |
+void | |
+strncpylwr(char *d, const char *s, int n) | |
{ | |
- while( n-- ) | |
- { | |
- *d++ = tolower(*s++); | |
- } | |
+ while (n--) { | |
+ *d++ = tolower(*s++); | |
+ } | |
} | |
-void str_create( str_t* pstr ) | |
+void | |
+str_create(str_t * pstr) | |
{ | |
- pstr->p = NULL; | |
- pstr->len = 0; | |
+ pstr->p = NULL; | |
+ pstr->len = 0; | |
} | |
-void str_destroy( str_t* pstr ) | |
+void | |
+str_destroy(str_t * pstr) | |
{ | |
- /* empty */ | |
+ /* empty */ | |
} | |
-int str_cmp( const str_t* pthis, const str_t* pother ) | |
+int | |
+str_cmp(const str_t * pthis, const str_t * pother) | |
{ | |
- uint minlen = min( pthis->len, pother->len ); | |
- int cmp; | |
- assert( pthis->p != NULL && pother->p != NULL && minlen != 0 ); | |
+ uint minlen = min(pthis->len, pother->len); | |
+ int cmp; | |
- cmp = strncmp( pthis->p, pother->p, minlen ); | |
+ assert(pthis->p != NULL && pother->p != NULL && minlen != 0); | |
- if( cmp == 0 && pthis->len != pother->len ) | |
- { | |
- cmp = (pthis->len < pother->len) ? -1 : 1; | |
- } | |
- return cmp; | |
+ cmp = strncmp(pthis->p, pother->p, minlen); | |
+ | |
+ if (cmp == 0 && pthis->len != pother->len) { | |
+ cmp = (pthis->len < pother->len) ? -1 : 1; | |
+ } | |
+ return cmp; | |
} | |
-int str_casecmp( const str_t* pthis, const str_t* pother ) | |
+int | |
+str_casecmp(const str_t * pthis, const str_t * pother) | |
{ | |
- uint minlen = min( pthis->len, pother->len ); | |
- int cmp; | |
- assert( pthis->p != NULL && pother->p != NULL && minlen != 0 ); | |
+ uint minlen = min(pthis->len, pother->len); | |
+ int cmp; | |
+ | |
+ assert(pthis->p != NULL && pother->p != NULL && minlen != 0); | |
- cmp = strncasecmp( pthis->p, pother->p, minlen ); | |
+ cmp = strncasecmp(pthis->p, pother->p, minlen); | |
- if( cmp == 0 && pthis->len != pother->len ) | |
- { | |
- cmp = (pthis->len < pother->len) ? -1 : 1; | |
- } | |
- return cmp; | |
+ if (cmp == 0 && pthis->len != pother->len) { | |
+ cmp = (pthis->len < pother->len) ? -1 : 1; | |
+ } | |
+ return cmp; | |
} | |
diff --git a/vec.c b/vec.c | |
@@ -23,323 +23,316 @@ | |
* vector | |
*/ | |
-void vec_create( vec_t* pthis ) | |
+void | |
+vec_create(vec_t * pthis) | |
{ | |
- pthis->nalloc = VEC_INITIAL_SIZE; | |
- pthis->nitems = 0; | |
- pthis->pitems = (str_t*)malloc( VEC_INITIAL_SIZE*sizeof(str_t) ); | |
+ pthis->nalloc = VEC_INITIAL_SIZE; | |
+ pthis->nitems = 0; | |
+ pthis->pitems = (str_t *) malloc(VEC_INITIAL_SIZE * sizeof(str_t)); | |
} | |
-void vec_destroy( vec_t* pthis ) | |
+void | |
+vec_destroy(vec_t * pthis) | |
{ | |
- free( pthis->pitems ); | |
+ free(pthis->pitems); | |
} | |
-static void vec_setsize( vec_t* pthis, uint nsize ) | |
+static void | |
+vec_setsize(vec_t * pthis, uint nsize) | |
{ | |
- if( nsize > pthis->nalloc ) | |
- { | |
- uint nnewalloc; | |
- str_t* pnewitems; | |
- uint n; | |
- | |
- nnewalloc = pthis->nalloc * 2; | |
- if( nnewalloc < nsize ) nnewalloc = nsize; | |
- pnewitems = (str_t*)realloc( pthis->pitems, nnewalloc*sizeof(str_t) ); | |
- if( pnewitems == NULL ) | |
- { | |
- exit( 2 ); | |
- } | |
- for( n = pthis->nitems; n < nsize; n++ ) | |
- { | |
- str_create( &pnewitems[n] ); | |
- } | |
- pthis->pitems = pnewitems; | |
- pthis->nalloc = nnewalloc; | |
- } | |
+ if (nsize > pthis->nalloc) { | |
+ uint nnewalloc; | |
+ str_t *pnewitems; | |
+ uint n; | |
+ | |
+ nnewalloc = pthis->nalloc * 2; | |
+ if (nnewalloc < nsize) | |
+ nnewalloc = nsize; | |
+ pnewitems = (str_t *) realloc(pthis->pitems, nnewalloc * sizeo… | |
+ if (pnewitems == NULL) { | |
+ exit(2); | |
+ } | |
+ for (n = pthis->nitems; n < nsize; n++) { | |
+ str_create(&pnewitems[n]); | |
+ } | |
+ pthis->pitems = pnewitems; | |
+ pthis->nalloc = nnewalloc; | |
+ } | |
} | |
-void vec_addhead( vec_t* pthis, str_t* pstr ) | |
+void | |
+vec_addhead(vec_t * pthis, str_t * pstr) | |
{ | |
- assert( pstr->p != NULL && pstr->len > 0 ); | |
+ assert(pstr->p != NULL && pstr->len > 0); | |
- vec_setsize( pthis, pthis->nitems+1 ); | |
- memmove( &pthis->pitems[1], &pthis->pitems[0], pthis->nitems*sizeof(str_t)… | |
- pthis->pitems[0] = *pstr; | |
- pthis->nitems++; | |
+ vec_setsize(pthis, pthis->nitems + 1); | |
+ memmove(&pthis->pitems[1], &pthis->pitems[0], pthis->nitems * sizeof(s… | |
+ pthis->pitems[0] = *pstr; | |
+ pthis->nitems++; | |
} | |
-void vec_addtail( vec_t* pthis, str_t* pstr ) | |
+void | |
+vec_addtail(vec_t * pthis, str_t * pstr) | |
{ | |
- assert( pstr->p != NULL && pstr->len > 0 ); | |
+ assert(pstr->p != NULL && pstr->len > 0); | |
- vec_setsize( pthis, pthis->nitems+1 ); | |
- pthis->pitems[pthis->nitems] = *pstr; | |
- pthis->nitems++; | |
+ vec_setsize(pthis, pthis->nitems + 1); | |
+ pthis->pitems[pthis->nitems] = *pstr; | |
+ pthis->nitems++; | |
} | |
-void vec_delhead( vec_t* pthis ) | |
+void | |
+vec_delhead(vec_t * pthis) | |
{ | |
- assert( pthis->nitems > 0 ); | |
- pthis->nitems--; | |
- memmove( &pthis->pitems[0], &pthis->pitems[1], pthis->nitems*sizeof(str_t)… | |
+ assert(pthis->nitems > 0); | |
+ pthis->nitems--; | |
+ memmove(&pthis->pitems[0], &pthis->pitems[1], pthis->nitems * sizeof(s… | |
} | |
-void vec_deltail( vec_t* pthis ) | |
+void | |
+vec_deltail(vec_t * pthis) | |
{ | |
- assert( pthis->nitems > 0 ); | |
- pthis->nitems--; | |
+ assert(pthis->nitems > 0); | |
+ pthis->nitems--; | |
} | |
-void vec_first( vec_t* pthis, veciter_t* piter ) | |
+void | |
+vec_first(vec_t * pthis, veciter_t * piter) | |
{ | |
- piter->plist = pthis; | |
- piter->index = 0; | |
+ piter->plist = pthis; | |
+ piter->index = 0; | |
} | |
-void vec_last( vec_t* pthis, veciter_t* piter ) | |
+void | |
+vec_last(vec_t * pthis, veciter_t * piter) | |
{ | |
- piter->plist = pthis; | |
- piter->index = pthis->nitems; | |
+ piter->plist = pthis; | |
+ piter->index = pthis->nitems; | |
} | |
/***************************************************************************** | |
* sorted vector | |
*/ | |
-static int svec_compare( const void* p1, const void* p2 ) | |
+static int | |
+svec_compare(const void *p1, const void *p2) | |
{ | |
- return str_casecmp( (const str_t*)p1, (const str_t*)p2 ); | |
+ return str_casecmp((const str_t *) p1, (const str_t *) p2); | |
} | |
-void svec_add( vec_t* pthis, str_t* pstr ) | |
+void | |
+svec_add(vec_t * pthis, str_t * pstr) | |
{ | |
- int lo, hi, mid; | |
- veciter_t iter; | |
- | |
- if( pthis->nitems == 0 ) | |
- { | |
- vec_addtail( pthis, pstr ); | |
- return; | |
- } | |
- | |
- if( str_casecmp( pstr, &pthis->pitems[0] ) < 0 ) | |
- { | |
- vec_addhead( pthis, pstr ); | |
- return; | |
- } | |
- | |
- hi = pthis->nitems - 1; | |
- lo = -1; | |
- while( hi-lo > 1 ) | |
- { | |
- mid = (hi+lo)/2; | |
- if( str_casecmp( pstr, &pthis->pitems[mid] ) <= 0 ) | |
- hi = mid; | |
- else | |
- lo = mid; | |
- } | |
- assert( hi < pthis->nitems ); | |
- | |
- iter.plist = pthis; | |
- iter.index = hi; | |
- | |
- if( str_casecmp( pstr, &pthis->pitems[hi] ) < 0 ) | |
- { | |
- veciter_addbefore( &iter, pstr ); | |
- } | |
- else | |
- { | |
- veciter_addafter( &iter, pstr ); | |
- } | |
+ int lo, hi, mid; | |
+ veciter_t iter; | |
+ | |
+ if (pthis->nitems == 0) { | |
+ vec_addtail(pthis, pstr); | |
+ return; | |
+ } | |
+ if (str_casecmp(pstr, &pthis->pitems[0]) < 0) { | |
+ vec_addhead(pthis, pstr); | |
+ return; | |
+ } | |
+ hi = pthis->nitems - 1; | |
+ lo = -1; | |
+ while (hi - lo > 1) { | |
+ mid = (hi + lo) / 2; | |
+ if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ assert(hi < pthis->nitems); | |
+ | |
+ iter.plist = pthis; | |
+ iter.index = hi; | |
+ | |
+ if (str_casecmp(pstr, &pthis->pitems[hi]) < 0) { | |
+ veciter_addbefore(&iter, pstr); | |
+ } else { | |
+ veciter_addafter(&iter, pstr); | |
+ } | |
} | |
-str_t* svec_find( vec_t* pthis, str_t* pstr ) | |
+str_t * | |
+svec_find(vec_t * pthis, str_t * pstr) | |
{ | |
- int lo, hi, mid; | |
- | |
- if( pthis->nitems == 0 ) | |
- { | |
- return NULL; | |
- } | |
- | |
- hi = pthis->nitems - 1; | |
- lo = -1; | |
- while( hi-lo > 1 ) | |
- { | |
- mid = (hi+lo)/2; | |
- if( str_casecmp( pstr, &pthis->pitems[mid] ) <= 0 ) | |
- hi = mid; | |
- else | |
- lo = mid; | |
- } | |
- assert( hi >= 0 && hi < pthis->nitems ); | |
- | |
- if( str_casecmp( pstr, &pthis->pitems[hi] ) != 0 ) | |
- { | |
- return NULL; | |
- } | |
- | |
- return &pthis->pitems[hi]; | |
+ int lo, hi, mid; | |
+ | |
+ if (pthis->nitems == 0) { | |
+ return NULL; | |
+ } | |
+ hi = pthis->nitems - 1; | |
+ lo = -1; | |
+ while (hi - lo > 1) { | |
+ mid = (hi + lo) / 2; | |
+ if (str_casecmp(pstr, &pthis->pitems[mid]) <= 0) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ assert(hi >= 0 && hi < pthis->nitems); | |
+ | |
+ if (str_casecmp(pstr, &pthis->pitems[hi]) != 0) { | |
+ return NULL; | |
+ } | |
+ return &pthis->pitems[hi]; | |
} | |
-void svec_sort( vec_t* pthis ) | |
+void | |
+svec_sort(vec_t * pthis) | |
{ | |
- if( pthis->nitems > 1 ) | |
- { | |
- qsort( pthis->pitems, pthis->nitems, sizeof(str_t), svec_compare ); | |
- } | |
+ if (pthis->nitems > 1) { | |
+ qsort(pthis->pitems, pthis->nitems, sizeof(str_t), svec_compar… | |
+ } | |
} | |
/***************************************************************************** | |
* vector iterator | |
*/ | |
-void veciter_destroy( veciter_t* pthis ) | |
+void | |
+veciter_destroy(veciter_t * pthis) | |
{ | |
- /* empty */ | |
+ /* empty */ | |
} | |
-str_t* veciter_get( veciter_t* pthis ) | |
+str_t * | |
+veciter_get(veciter_t * pthis) | |
{ | |
- if( pthis->plist == NULL || pthis->index >= pthis->plist->nitems ) | |
- { | |
- return NULL; | |
- } | |
- | |
- return &pthis->plist->pitems[pthis->index]; | |
+ if (pthis->plist == NULL || pthis->index >= pthis->plist->nitems) { | |
+ return NULL; | |
+ } | |
+ return &pthis->plist->pitems[pthis->index]; | |
} | |
-bool_t veciter_equal( veciter_t* pthis, veciter_t* pthat ) | |
+bool_t | |
+veciter_equal(veciter_t * pthis, veciter_t * pthat) | |
{ | |
- if( pthis->plist != pthat->plist || | |
- pthis->index != pthat->index ) | |
- { | |
- return false; | |
- } | |
- | |
- return true; | |
+ if (pthis->plist != pthat->plist || | |
+ pthis->index != pthat->index) { | |
+ return false; | |
+ } | |
+ return true; | |
} | |
-bool_t veciter_hasitem( veciter_t* pthis ) | |
+bool_t | |
+veciter_hasitem(veciter_t * pthis) | |
{ | |
- if( pthis->plist == NULL || pthis->index >= pthis->plist->nitems ) | |
- { | |
- return false; | |
- } | |
- return true; | |
+ if (pthis->plist == NULL || pthis->index >= pthis->plist->nitems) { | |
+ return false; | |
+ } | |
+ return true; | |
} | |
-bool_t veciter_prev( veciter_t* pthis ) | |
+bool_t | |
+veciter_prev(veciter_t * pthis) | |
{ | |
- if( pthis->index == 0 ) | |
- { | |
- return false; | |
- } | |
- pthis->index--; | |
- return true; | |
+ if (pthis->index == 0) { | |
+ return false; | |
+ } | |
+ pthis->index--; | |
+ return true; | |
} | |
-bool_t veciter_next( veciter_t* pthis ) | |
+bool_t | |
+veciter_next(veciter_t * pthis) | |
{ | |
- pthis->index++; | |
- if( pthis->index == pthis->plist->nitems ) | |
- { | |
- return false; | |
- } | |
- return true; | |
+ pthis->index++; | |
+ if (pthis->index == pthis->plist->nitems) { | |
+ return false; | |
+ } | |
+ return true; | |
} | |
-void veciter_addafter( veciter_t* pthis, str_t* pstr ) | |
+void | |
+veciter_addafter(veciter_t * pthis, str_t * pstr) | |
{ | |
- str_t* pitems; | |
- | |
- vec_setsize( pthis->plist, pthis->plist->nitems+1 ); | |
- assert( pthis->index < pthis->plist->nitems ); | |
- pitems = pthis->plist->pitems; | |
- | |
- if( pthis->index != pthis->plist->nitems-1 ) | |
- { | |
- memmove( &pitems[pthis->index+2], &pitems[pthis->index+1], | |
- (pthis->plist->nitems-pthis->index-1) * sizeof(str_t) ); | |
- } | |
- | |
- pitems[pthis->index+1] = *pstr; | |
- pthis->plist->nitems++; | |
+ str_t *pitems; | |
+ | |
+ vec_setsize(pthis->plist, pthis->plist->nitems + 1); | |
+ assert(pthis->index < pthis->plist->nitems); | |
+ pitems = pthis->plist->pitems; | |
+ | |
+ if (pthis->index != pthis->plist->nitems - 1) { | |
+ memmove(&pitems[pthis->index + 2], &pitems[pthis->index + 1], | |
+ (pthis->plist->nitems - pthis->index - 1) * sizeof(str_t)); | |
+ } | |
+ pitems[pthis->index + 1] = *pstr; | |
+ pthis->plist->nitems++; | |
} | |
-void veciter_addbefore( veciter_t* pthis, str_t* pstr ) | |
+void | |
+veciter_addbefore(veciter_t * pthis, str_t * pstr) | |
{ | |
- str_t* pitems; | |
+ str_t *pitems; | |
- vec_setsize( pthis->plist, pthis->plist->nitems+1 ); | |
- assert( pthis->index < pthis->plist->nitems ); | |
- pitems = pthis->plist->pitems; | |
+ vec_setsize(pthis->plist, pthis->plist->nitems + 1); | |
+ assert(pthis->index < pthis->plist->nitems); | |
+ pitems = pthis->plist->pitems; | |
- memmove( &pitems[pthis->index+1], &pitems[pthis->index], | |
- (pthis->plist->nitems-pthis->index) * sizeof(str_t) ); | |
+ memmove(&pitems[pthis->index + 1], &pitems[pthis->index], | |
+ (pthis->plist->nitems - pthis->index) * sizeof(str_t)); | |
- pitems[pthis->index] = *pstr; | |
- pthis->plist->nitems++; | |
+ pitems[pthis->index] = *pstr; | |
+ pthis->plist->nitems++; | |
} | |
-void veciter_del( veciter_t* pthis ) | |
+void | |
+veciter_del(veciter_t * pthis) | |
{ | |
- str_t* pitems; | |
- | |
- assert( pthis->plist->nitems > 0 ); | |
- pthis->plist->nitems--; | |
- if( pthis->index < pthis->plist->nitems ) | |
- { | |
- pitems = pthis->plist->pitems; | |
- memmove( &pitems[pthis->index], &pitems[pthis->index+1], | |
- (pthis->plist->nitems-pthis->index) * sizeof(str_t) ); | |
- } | |
+ str_t *pitems; | |
+ | |
+ assert(pthis->plist->nitems > 0); | |
+ pthis->plist->nitems--; | |
+ if (pthis->index < pthis->plist->nitems) { | |
+ pitems = pthis->plist->pitems; | |
+ memmove(&pitems[pthis->index], &pitems[pthis->index + 1], | |
+ (pthis->plist->nitems - pthis->index) * sizeof(str_t)); | |
+ } | |
} | |
#ifdef UNIT_TEST | |
-int main( int argc, char** argv ) | |
+int | |
+main(int argc, char **argv) | |
{ | |
- vec_t vl; | |
- veciter_t iter; | |
- str_t* pstr; | |
- uint n; | |
- | |
- if( argc != 2 ) | |
- { | |
- fprintf( stderr, "usage: %s <file>\n", argv[0] ); | |
- return 1; | |
- } | |
- | |
- for( n = 0; n < 100; n++ ) | |
- { | |
- vec_create( &vl ); | |
- vec_load( &vl, argv[1] ); | |
- | |
- vec_first( &vl, &iter ); | |
- while( (pstr = veciter_get( &iter )) != NULL ) | |
- { | |
- char buf[256]; | |
- char* p; | |
- if( pstr->len > 200 ) | |
- { | |
- fprintf( stderr, "str too long: %u chars\n", pstr->len ); | |
- break; | |
- } | |
- p = buf; | |
- strcpy( buf, "str: " ); | |
- p += 6; | |
- memcpy( p, pstr->p, pstr->len ); | |
- p += pstr->len; | |
- sprintf( p, " %u", pstr->count ); | |
- puts( buf ); | |
- | |
- veciter_next( &iter ); | |
- } | |
- | |
- vec_destroy( &vl ); | |
- } | |
- | |
- return 0; | |
+ vec_t vl; | |
+ veciter_t iter; | |
+ str_t *pstr; | |
+ uint n; | |
+ | |
+ if (argc != 2) { | |
+ fprintf(stderr, "usage: %s <file>\n", argv[0]); | |
+ return 1; | |
+ } | |
+ for (n = 0; n < 100; n++) { | |
+ vec_create(&vl); | |
+ vec_load(&vl, argv[1]); | |
+ | |
+ vec_first(&vl, &iter); | |
+ while ((pstr = veciter_get(&iter)) != NULL) { | |
+ char buf[256]; | |
+ char *p; | |
+ | |
+ if (pstr->len > 200) { | |
+ fprintf(stderr, "str too long: %u chars\n", ps… | |
+ break; | |
+ } | |
+ p = buf; | |
+ strcpy(buf, "str: "); | |
+ p += 6; | |
+ memcpy(p, pstr->p, pstr->len); | |
+ p += pstr->len; | |
+ sprintf(p, " %u", pstr->count); | |
+ puts(buf); | |
+ | |
+ veciter_next(&iter); | |
+ } | |
+ | |
+ vec_destroy(&vl); | |
+ } | |
+ | |
+ return 0; | |
} | |
-#endif /* def UNIT_TEST */ | |
+#endif /* def UNIT_TEST */ |