Introduction
Introduction Statistics Contact Development Disclaimer Help
bmf.c - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches
git clone git://git.codemadness.org/bmf
Log
Files
Refs
README
LICENSE
---
bmf.c (9060B)
---
1 /* $Id: bmf.c,v 1.20 2002/10/20 18:19:17 tommy Exp $ */
2
3 /*
4 * Copyright (c) 2002 Tom Marshall <[email protected]>
5 *
6 * This program is free software. It may be distributed under the terms
7 * in the file LICENSE, found in the top level of the distribution.
8 *
9 * bmf.c: top level Bayesian mail filter app.
10 */
11
12 #include "config.h"
13 #include "dbg.h"
14 #include "str.h"
15 #include "lex.h"
16 #include "vec.h"
17 #include "dbh.h"
18 #include "filt.h"
19
20 #define PACKAGE "bmf"
21
22 /* modes of operation (mutually exclusive) */
23 typedef enum {
24 mode_test, /* test and produce report */
25 mode_normal, /* test and register result */
26 mode_reg_s, /* register as spam */
27 mode_reg_n, /* register as non-spam */
28 mode_n_to_s, /* undo non-spam registration and re…
29 * spam */
30 mode_s_to_n, /* undo spam registration and regist…
31 * non-spam */
32 /* test and product report in bulk, read file list from stdin, o…
33 mode_bulk
34 } runmode_t;
35
36 static void
37 usage(void)
38 {
39 printf("\n"
40 "Usage: " PACKAGE " [mode] [options]\n"
41 "\n"
42 "Modes of operation (mutually exclusive; the last one spe…
43 "\t\tRegister message using historical data if no mode is…
44 "\t-b\tBulk test mode, read file list from stdin, output …
45 "\t-n\tRegister message as non-spam.\n"
46 "\t-s\tRegister message as spam.\n"
47 "\t-N\tRegister message as non-spam and undo prior regist…
48 "\t-S\tRegister message as spam and undo prior registrati…
49 "\t-t\tTest mode, print report and do not save results.\n"
50 "\n"
51 "Other options:\n"
52 "\t-d db\tSpecify database or directory name.\n"
53 "\t-k n\tSpecify count of extrema to use (keepers), defau…
54 "\t-m type\t[DEPRECATED] Specify mail storage format (mbo…
55 "\t-p\tPassthrough mode, like SpamAssassin.\n"
56 "\t-v\tIncrease verbosity level.\n"
57 "\t-V\tShow version information and exit.\n"
58 "\t-h\tShow this message and exit.\n"
59 "\n");
60 exit(2);
61 }
62
63 static void
64 version(void)
65 {
66 printf("\n"
67 PACKAGE " version " VERSION " - a Bayesian mail filter\n"
68 "Copyright (c) 2002 Tom Marshall\n"
69 "\n"
70 PACKAGE " comes with ABSOLUTELY NO WARRANTY.\n"
71 "This is free software. You are welcome to redistribute …
72 "of the GNU General Public License. See the file LICENSE…
73 "distribution, or visit http://www.gnu.org/licenses/gpl.h…
74 "\n");
75 exit(2);
76 }
77
78 int
79 main(int argc, char **argv)
80 {
81 char *dbname = NULL;
82 bool_t rdonly;
83 runmode_t mode = mode_normal;
84 mbox_t mboxtype = detect;
85 bool_t do_passthru = false;
86 dbhtext_t *pdb;
87 dbt_t *pblist, *pglist, *ptable;
88 vec_t mlist;
89 stats_t stats;
90 lex_t lex;
91 tok_t tok;
92 bool_t is_spam = false;
93 int ch;
94 char *line = NULL;
95 size_t linesiz = 0;
96 ssize_t n;
97
98 int fd = STDIN_FILENO;
99
100 if (pledge("stdio rpath wpath cpath flock unveil", NULL) == -1)
101 err(1, "pledge");
102
103 srand(time(NULL));
104
105 stats.keepers = DEF_KEEPERS;
106 while ((ch = getopt(argc, argv, "NSVd:hk:m:bnpstv")) != EOF) {
107 switch (ch) {
108 case 'b':
109 mode = mode_bulk;
110 break;
111 case 'N':
112 mode = mode_s_to_n;
113 break;
114 case 'S':
115 mode = mode_n_to_s;
116 break;
117 case 'V':
118 version();
119 break; /* NOTREACHED */
120 case 'd':
121 free(dbname);
122 if (!(dbname = strdup(optarg))) {
123 perror("strdup()");
124 exit(2);
125 }
126 break;
127 case 'h':
128 usage();
129 break; /* NOTREACHED */
130 case 'k':
131 stats.keepers = atoi(optarg);
132 break;
133 case 'm':
134 if (strcasecmp(optarg, "mbox") == 0) {
135 mboxtype = mbox;
136 } else if (strcasecmp(optarg, "maildir") == 0) {
137 mboxtype = maildir;
138 } else {
139 usage();
140 }
141 break;
142 case 'n':
143 mode = mode_reg_n;
144 break;
145 case 'p':
146 do_passthru = true;
147 break;
148 case 's':
149 mode = mode_reg_s;
150 break;
151 case 't':
152 mode = mode_test;
153 if (pledge("stdio rpath cpath flock unveil", NUL…
154 err(1, "pledge");
155 break;
156 case 'v':
157 g_verbose++;
158 verbose(1, "Verbose level now %u\n", g_verbose);
159 break;
160 default:
161 usage();
162 }
163 }
164 stats.extrema = (discrim_t *) malloc(stats.keepers * sizeof(disc…
165
166 rdonly = (mode == mode_test) ? 1 : 0;
167
168 /* create directory if it doesn't exist yet, when dbname is NULL…
169 pdb = dbtext_db_open(dbname, rdonly);
170 if (pdb == NULL) {
171 fprintf(stderr, "%s: cannot open database\n", argv[0]);
172 exit(2);
173 }
174
175 /* bulk mode */
176 if (mode == mode_bulk) {
177 pblist = pdb->opentable(pdb, "spamlist", rdonly);
178 if (pblist == NULL) {
179 fprintf(stderr, "%s: cannot open spamlist\n", ar…
180 exit(2);
181 }
182 pglist = pdb->opentable(pdb, "goodlist", rdonly);
183 if (pglist == NULL) {
184 fprintf(stderr, "%s: cannot open goodlist\n", ar…
185 exit(2);
186 }
187
188 while ((n = getline(&line, &linesiz, stdin)) > 0) {
189 if (line[n - 1] == '\n')
190 line[--n] = '\0';
191
192 if ((fd = open(line, O_RDONLY)) == -1)
193 err(1, "open: %s", line);
194
195 memset(stats.extrema, 0, stats.keepers * sizeof(…
196
197 lex_create(&lex, mboxtype);
198 if (!lex_load(&lex, fd)) {
199 fprintf(stderr, "%s: cannot read input\n…
200 exit(2);
201 }
202 lex_nexttoken(&lex, &tok);
203 if (tok.tt == eof) {
204 fprintf(stderr, "%s: no input available\…
205 exit(2);
206 }
207
208 while (tok.tt != eof) {
209 /* TODO: vec_create at top, vec->nitems …
210 vec_create(&mlist);
211
212 bvec_loadmsg(&mlist, &lex, &tok);
213 bayesfilt(pglist, pblist, &mlist, &stats…
214
215 vec_destroy(&mlist);
216
217 printf("%s\t%f\n", line, stats.spamicity…
218 }
219
220 lex_destroy(&lex);
221
222 close(fd);
223 }
224
225 pglist->close(pglist);
226 free(pglist);
227 pblist->close(pblist);
228 free(pblist);
229
230 pdb->close(pdb);
231 free(pdb);
232
233 free(stats.extrema);
234
235 return 0;
236 }
237
238 lex_create(&lex, mboxtype);
239 if (!lex_load(&lex, fd)) {
240 fprintf(stderr, "%s: cannot read input\n", argv[0]);
241 exit(2);
242 }
243 lex_nexttoken(&lex, &tok);
244 if (tok.tt == eof) {
245 fprintf(stderr, "%s: no input available\n", argv[0]);
246 exit(2);
247 }
248
249 if (mode == mode_test) {
250 pblist = pdb->opentable(pdb, "spamlist", rdonly);
251 if (pblist == NULL) {
252 fprintf(stderr, "%s: cannot open spamlist\n", ar…
253 exit(2);
254 }
255 pglist = pdb->opentable(pdb, "goodlist", rdonly);
256 if (pglist == NULL) {
257 fprintf(stderr, "%s: cannot open goodlist\n", ar…
258 exit(2);
259 }
260 if (pledge("stdio", NULL) == -1)
261 err(1, "pledge");
262 }
263 while (tok.tt != eof) {
264 if (mboxtype == mbox && tok.tt != from) {
265 fprintf(stderr, "%s: input does not look like an…
266 exit(2);
267 }
268 if (mode != mode_test) {
269 pblist = pdb->opentable(pdb, "spamlist", rdonly);
270 if (pblist == NULL) {
271 fprintf(stderr, "%s: cannot open spamlis…
272 exit(2);
273 }
274 pglist = pdb->opentable(pdb, "goodlist", rdonly);
275 if (pglist == NULL) {
276 fprintf(stderr, "%s: cannot open goodlis…
277 exit(2);
278 }
279 }
280 vec_create(&mlist);
281 bvec_loadmsg(&mlist, &lex, &tok);
282
283 switch (mode) {
284 case mode_test:
285 bayesfilt(pglist, pblist, &mlist, &stats);
286 is_spam = (stats.spamicity > SPAM_CUTOFF);
287 break;
288 case mode_normal:
289 bayesfilt(pglist, pblist, &mlist, &stats);
290 is_spam = (stats.spamicity > SPAM_CUTOFF);
291 ptable = (is_spam ? pblist : pglist);
292 svec_sort(&mlist);
293 if (!ptable->mergeclose(ptable, &mlist)) {
294 fprintf(stderr, "%s: cannot merge/save l…
295 exit(2);
296 }
297 break;
298 case mode_reg_s:
299 stats.spamicity = 1.0;
300 is_spam = true;
301 svec_sort(&mlist);
302 if (!pblist->mergeclose(pblist, &mlist)) {
303 fprintf(stderr, "%s: cannot merge/save l…
304 exit(2);
305 }
306 break;
307 case mode_reg_n:
308 stats.spamicity = 0.0;
309 is_spam = false;
310 svec_sort(&mlist);
311 if (!pglist->mergeclose(pglist, &mlist)) {
312 fprintf(stderr, "%s: cannot merge/save l…
313 exit(2);
314 }
315 break;
316 case mode_n_to_s:
317 stats.spamicity = 1.0;
318 is_spam = true;
319 svec_sort(&mlist);
320 if (!pblist->mergeclose(pblist, &mlist) ||
321 !pglist->unmergeclose(pglist, &mlist)) {
322 fprintf(stderr, "%s: cannot merge/save l…
323 exit(2);
324 }
325 break;
326 case mode_s_to_n:
327 stats.spamicity = 0.0;
328 is_spam = false;
329 svec_sort(&mlist);
330 if (!pblist->unmergeclose(pblist, &mlist) ||
331 !pglist->mergeclose(pglist, &mlist)) {
332 fprintf(stderr, "%s: cannot merge/save l…
333 exit(2);
334 }
335 break;
336 default:
337 usage();
338 }
339
340 if (mode == mode_test) {
341 statdump(&stats, stdout);
342 }
343 if (do_passthru) {
344 lex_passthru(&lex, is_spam, stats.spamicity);
345 }
346 vec_destroy(&mlist);
347
348 if (mode != mode_test) {
349 pglist->close(pglist);
350 free(pglist);
351 pblist->close(pblist);
352 free(pblist);
353 }
354 }
355
356 if (mode == mode_test) {
357 pglist->close(pglist);
358 free(pglist);
359 pblist->close(pblist);
360 free(pblist);
361 }
362 lex_destroy(&lex);
363
364 pdb->close(pdb);
365 free(pdb);
366
367 free(stats.extrema);
368
369 return ((do_passthru || is_spam) ? 0 : 1);
370 }
You are viewing proxied material from codemadness.org. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.