import bmf 0.9.4 - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches | |
git clone git://git.codemadness.org/bmf | |
Log | |
Files | |
Refs | |
README | |
LICENSE | |
--- | |
commit 0983b0f64c3e1bf7fa03f2a4060e6f25e9e79cef | |
Author: Hiltjo Posthuma <[email protected]> | |
Date: Sat, 22 Sep 2018 17:46:14 +0200 | |
import bmf 0.9.4 | |
Diffstat: | |
A AUTHORS | 4 ++++ | |
A ChangeLog | 95 ++++++++++++++++++++++++++++++ | |
A LICENSE | 340 +++++++++++++++++++++++++++++… | |
A Makefile.in | 81 ++++++++++++++++++++++++++++++ | |
A README | 130 +++++++++++++++++++++++++++++… | |
A TODO | 4 ++++ | |
A bmf.1 | 148 +++++++++++++++++++++++++++++… | |
A bmf.c | 339 +++++++++++++++++++++++++++++… | |
A bmf.spec.in | 64 +++++++++++++++++++++++++++++… | |
A bmfconv.1 | 81 ++++++++++++++++++++++++++++++ | |
A bmfconv.c | 169 +++++++++++++++++++++++++++++… | |
A config.h | 80 +++++++++++++++++++++++++++++… | |
A configure | 354 +++++++++++++++++++++++++++++… | |
A dbdb.c | 684 +++++++++++++++++++++++++++++… | |
A dbdb.h | 61 +++++++++++++++++++++++++++++… | |
A dbg.c | 302 +++++++++++++++++++++++++++++… | |
A dbg.h | 35 +++++++++++++++++++++++++++++… | |
A dbh.c | 74 +++++++++++++++++++++++++++++… | |
A dbh.h | 56 +++++++++++++++++++++++++++++… | |
A dbmysql.c | 545 +++++++++++++++++++++++++++++… | |
A dbmysql.h | 60 +++++++++++++++++++++++++++++… | |
A dbtext.c | 591 +++++++++++++++++++++++++++++… | |
A dbtext.h | 53 ++++++++++++++++++++++++++++++ | |
A filt.c | 175 +++++++++++++++++++++++++++++… | |
A filt.h | 31 +++++++++++++++++++++++++++++… | |
A lex.c | 787 +++++++++++++++++++++++++++++… | |
A lex.h | 44 +++++++++++++++++++++++++++++… | |
A str.c | 78 +++++++++++++++++++++++++++++… | |
A str.h | 30 ++++++++++++++++++++++++++++++ | |
A vec.c | 345 +++++++++++++++++++++++++++++… | |
A vec.h | 58 ++++++++++++++++++++++++++++++ | |
31 files changed, 5898 insertions(+), 0 deletions(-) | |
--- | |
diff --git a/AUTHORS b/AUTHORS | |
@@ -0,0 +1,4 @@ | |
+# $Id: AUTHORS,v 1.1.1.1 2002/09/30 21:08:29 tommy Exp $ | |
+ | |
+Tom Marshall <[email protected]> | |
+ Initial version | |
diff --git a/ChangeLog b/ChangeLog | |
@@ -0,0 +1,95 @@ | |
+Revision history for bmf: | |
+ | |
+0.9.4: 20 Oct 2002 | |
+ * Remove X-RBL-Warning from ignored headers. | |
+ | |
+0.9.4pre7: 20 Oct 2002 | |
+ * Update documentation. | |
+ | |
+0.9.4pre6: 20 Oct 2002 | |
+ * Move Bayes stuff into its own file. | |
+ | |
+0.9.4pre5: 20 Oct 2002 | |
+ * Fix NaN exception: if list is empty, use zero for probability. | |
+ * Make extrema array (keepers) variable size. Needs more work. | |
+ | |
+0.9.4pre4: 19 Oct 2002 | |
+ * Add configure section for Darwin. | |
+ * Don't use file locking on Darwin, it is not supported. | |
+ | |
+0.9.4pre3: 19 Oct 2002 | |
+ * Fixup configure script for OSF1. | |
+ * which(1) always returns 0 on OSF1, use type(1) instead. | |
+ * Add SYSLIBS to the makefile. | |
+ * Fix gcc-ism in dbg.c (ptr arithmetic on void*). | |
+ * Fix off-by-one in html tag check. | |
+ * Fix unaligned access in libdb. | |
+ | |
+0.9.4pre2: 18 Oct 2002 | |
+ * Fix bug in -d handling for text and libdb. | |
+ (Found by Bj�rn Kalkbrenn) | |
+ | |
+0.9.4pre1: 17 Oct 2002 | |
+ * Autodetect mailbox type and deprecate the -m option. | |
+ | |
+0.9.3: 14 Oct 2002 | |
+ * Ditch the builtin libdb locks, use fcntl instead. | |
+ * Fix memory leak in dbtext. | |
+ * Fix some trivial issues with the lexer: | |
+ - Be more strict about recognizing IP addresses. | |
+ - Do case-insensitive header name comparisons. | |
+ * Fix multiple database closure with mbox format. | |
+ * Fix a bogus assert in passthrough. | |
+ * Add verbose flag (no functionality yet). | |
+ * Add heap checking in debug mode. | |
+ * Fix bug in -N mode which made it act the same as -S. | |
+ * Add X-RBL-Warning to ignored headers. | |
+ * Support maildir style folders. | |
+ | |
+0.9.2: 12 Oct 2002 | |
+ * Fix bug in multiple message registration. | |
+ | |
+0.9.1: 12 Oct 2002 | |
+ * Improve error reporting and clarify some messages. | |
+ * Package preformatted manpage instead of XML. | |
+ * Remove single message per invocation restriction. | |
+ | |
+0.84: 09 Oct 2002 | |
+ * Fix linker flags for autodetected libdb 4.1 in /usr/local on BSD. | |
+ | |
+0.84pre3: 07 Oct 2002 | |
+ * Yet another libdb api fix. DB->open() in 4.0 is the same as 3.x. | |
+ | |
+0.84pre2: 07 Oct 2002 | |
+ * Fix bug in dbdb (v1 only) that prevented file locking on FreeBSD. | |
+ | |
+0.84pre1: 07 Oct 2002 | |
+ * Fix bug in dbtext that caused segfault searching an empty list. | |
+ | |
+0.83: 07 Oct 2002 | |
+ * Fix bug preventing creation of libdb files when using -n or -s. | |
+ * Fix bug in libdb unmergeclose function logic. Users are strongly | |
+ encouraged to delete and rebuild word lists if possible. If not | |
+ possible, at least export to text and remove entries | |
+ that represent unsigned underflow (eg. 4.2 billion). | |
+ | |
+0.82: 06 Oct 2002 | |
+ * Support GNU style --with-package=path options. | |
+ (copied from autoconf output) | |
+ * Use mysql_config in configure script. | |
+ * Support libdb v1 and v4. | |
+ * Find and use BerkeleyDB 4.1 in *BSD. | |
+ (thanks to [email protected]) | |
+ * Clarify and robustify argument handling (fixes pr618875). | |
+ | |
+0.81: 03 Oct 2002 | |
+ * Add configure script and conditionally compile libdb and mysql. | |
+ * Add manpage for bmfconv. | |
+ * Cleanup manpage for bmf. | |
+ * Rearrange makefile a bit. | |
+ * Remove -f text options in bmfconv, it is not supported. | |
+ * Fix mysql typo that prevented linking. | |
+ * Tweak X-Spam headers a bit. | |
+ | |
+0.80: 02 Oct 2002 | |
+ * Initial release. | |
diff --git a/LICENSE b/LICENSE | |
@@ -0,0 +1,340 @@ | |
+ GNU GENERAL PUBLIC LICENSE | |
+ Version 2, June 1991 | |
+ | |
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc. | |
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
+ Everyone is permitted to copy and distribute verbatim copies | |
+ of this license document, but changing it is not allowed. | |
+ | |
+ Preamble | |
+ | |
+ The licenses for most software are designed to take away your | |
+freedom to share and change it. By contrast, the GNU General Public | |
+License is intended to guarantee your freedom to share and change free | |
+software--to make sure the software is free for all its users. This | |
+General Public License applies to most of the Free Software | |
+Foundation's software and to any other program whose authors commit to | |
+using it. (Some other Free Software Foundation software is covered by | |
+the GNU Library General Public License instead.) You can apply it to | |
+your programs, too. | |
+ | |
+ When we speak of free software, we are referring to freedom, not | |
+price. Our General Public Licenses are designed to make sure that you | |
+have the freedom to distribute copies of free software (and charge for | |
+this service if you wish), that you receive source code or can get it | |
+if you want it, that you can change the software or use pieces of it | |
+in new free programs; and that you know you can do these things. | |
+ | |
+ To protect your rights, we need to make restrictions that forbid | |
+anyone to deny you these rights or to ask you to surrender the rights. | |
+These restrictions translate to certain responsibilities for you if you | |
+distribute copies of the software, or if you modify it. | |
+ | |
+ For example, if you distribute copies of such a program, whether | |
+gratis or for a fee, you must give the recipients all the rights that | |
+you have. You must make sure that they, too, receive or can get the | |
+source code. And you must show them these terms so they know their | |
+rights. | |
+ | |
+ We protect your rights with two steps: (1) copyright the software, and | |
+(2) offer you this license which gives you legal permission to copy, | |
+distribute and/or modify the software. | |
+ | |
+ Also, for each author's protection and ours, we want to make certain | |
+that everyone understands that there is no warranty for this free | |
+software. If the software is modified by someone else and passed on, we | |
+want its recipients to know that what they have is not the original, so | |
+that any problems introduced by others will not reflect on the original | |
+authors' reputations. | |
+ | |
+ Finally, any free program is threatened constantly by software | |
+patents. We wish to avoid the danger that redistributors of a free | |
+program will individually obtain patent licenses, in effect making the | |
+program proprietary. To prevent this, we have made it clear that any | |
+patent must be licensed for everyone's free use or not licensed at all. | |
+ | |
+ The precise terms and conditions for copying, distribution and | |
+modification follow. | |
+ | |
+ GNU GENERAL PUBLIC LICENSE | |
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION | |
+ | |
+ 0. This License applies to any program or other work which contains | |
+a notice placed by the copyright holder saying it may be distributed | |
+under the terms of this General Public License. The "Program", below, | |
+refers to any such program or work, and a "work based on the Program" | |
+means either the Program or any derivative work under copyright law: | |
+that is to say, a work containing the Program or a portion of it, | |
+either verbatim or with modifications and/or translated into another | |
+language. (Hereinafter, translation is included without limitation in | |
+the term "modification".) Each licensee is addressed as "you". | |
+ | |
+Activities other than copying, distribution and modification are not | |
+covered by this License; they are outside its scope. The act of | |
+running the Program is not restricted, and the output from the Program | |
+is covered only if its contents constitute a work based on the | |
+Program (independent of having been made by running the Program). | |
+Whether that is true depends on what the Program does. | |
+ | |
+ 1. You may copy and distribute verbatim copies of the Program's | |
+source code as you receive it, in any medium, provided that you | |
+conspicuously and appropriately publish on each copy an appropriate | |
+copyright notice and disclaimer of warranty; keep intact all the | |
+notices that refer to this License and to the absence of any warranty; | |
+and give any other recipients of the Program a copy of this License | |
+along with the Program. | |
+ | |
+You may charge a fee for the physical act of transferring a copy, and | |
+you may at your option offer warranty protection in exchange for a fee. | |
+ | |
+ 2. You may modify your copy or copies of the Program or any portion | |
+of it, thus forming a work based on the Program, and copy and | |
+distribute such modifications or work under the terms of Section 1 | |
+above, provided that you also meet all of these conditions: | |
+ | |
+ a) You must cause the modified files to carry prominent notices | |
+ stating that you changed the files and the date of any change. | |
+ | |
+ b) You must cause any work that you distribute or publish, that in | |
+ whole or in part contains or is derived from the Program or any | |
+ part thereof, to be licensed as a whole at no charge to all third | |
+ parties under the terms of this License. | |
+ | |
+ c) If the modified program normally reads commands interactively | |
+ when run, you must cause it, when started running for such | |
+ interactive use in the most ordinary way, to print or display an | |
+ announcement including an appropriate copyright notice and a | |
+ notice that there is no warranty (or else, saying that you provide | |
+ a warranty) and that users may redistribute the program under | |
+ these conditions, and telling the user how to view a copy of this | |
+ License. (Exception: if the Program itself is interactive but | |
+ does not normally print such an announcement, your work based on | |
+ the Program is not required to print an announcement.) | |
+ | |
+These requirements apply to the modified work as a whole. If | |
+identifiable sections of that work are not derived from the Program, | |
+and can be reasonably considered independent and separate works in | |
+themselves, then this License, and its terms, do not apply to those | |
+sections when you distribute them as separate works. But when you | |
+distribute the same sections as part of a whole which is a work based | |
+on the Program, the distribution of the whole must be on the terms of | |
+this License, whose permissions for other licensees extend to the | |
+entire whole, and thus to each and every part regardless of who wrote it. | |
+ | |
+Thus, it is not the intent of this section to claim rights or contest | |
+your rights to work written entirely by you; rather, the intent is to | |
+exercise the right to control the distribution of derivative or | |
+collective works based on the Program. | |
+ | |
+In addition, mere aggregation of another work not based on the Program | |
+with the Program (or with a work based on the Program) on a volume of | |
+a storage or distribution medium does not bring the other work under | |
+the scope of this License. | |
+ | |
+ 3. You may copy and distribute the Program (or a work based on it, | |
+under Section 2) in object code or executable form under the terms of | |
+Sections 1 and 2 above provided that you also do one of the following: | |
+ | |
+ a) Accompany it with the complete corresponding machine-readable | |
+ source code, which must be distributed under the terms of Sections | |
+ 1 and 2 above on a medium customarily used for software interchange; or, | |
+ | |
+ b) Accompany it with a written offer, valid for at least three | |
+ years, to give any third party, for a charge no more than your | |
+ cost of physically performing source distribution, a complete | |
+ machine-readable copy of the corresponding source code, to be | |
+ distributed under the terms of Sections 1 and 2 above on a medium | |
+ customarily used for software interchange; or, | |
+ | |
+ c) Accompany it with the information you received as to the offer | |
+ to distribute corresponding source code. (This alternative is | |
+ allowed only for noncommercial distribution and only if you | |
+ received the program in object code or executable form with such | |
+ an offer, in accord with Subsection b above.) | |
+ | |
+The source code for a work means the preferred form of the work for | |
+making modifications to it. For an executable work, complete source | |
+code means all the source code for all modules it contains, plus any | |
+associated interface definition files, plus the scripts used to | |
+control compilation and installation of the executable. However, as a | |
+special exception, the source code distributed need not include | |
+anything that is normally distributed (in either source or binary | |
+form) with the major components (compiler, kernel, and so on) of the | |
+operating system on which the executable runs, unless that component | |
+itself accompanies the executable. | |
+ | |
+If distribution of executable or object code is made by offering | |
+access to copy from a designated place, then offering equivalent | |
+access to copy the source code from the same place counts as | |
+distribution of the source code, even though third parties are not | |
+compelled to copy the source along with the object code. | |
+ | |
+ 4. You may not copy, modify, sublicense, or distribute the Program | |
+except as expressly provided under this License. Any attempt | |
+otherwise to copy, modify, sublicense or distribute the Program is | |
+void, and will automatically terminate your rights under this License. | |
+However, parties who have received copies, or rights, from you under | |
+this License will not have their licenses terminated so long as such | |
+parties remain in full compliance. | |
+ | |
+ 5. You are not required to accept this License, since you have not | |
+signed it. However, nothing else grants you permission to modify or | |
+distribute the Program or its derivative works. These actions are | |
+prohibited by law if you do not accept this License. Therefore, by | |
+modifying or distributing the Program (or any work based on the | |
+Program), you indicate your acceptance of this License to do so, and | |
+all its terms and conditions for copying, distributing or modifying | |
+the Program or works based on it. | |
+ | |
+ 6. Each time you redistribute the Program (or any work based on the | |
+Program), the recipient automatically receives a license from the | |
+original licensor to copy, distribute or modify the Program subject to | |
+these terms and conditions. You may not impose any further | |
+restrictions on the recipients' exercise of the rights granted herein. | |
+You are not responsible for enforcing compliance by third parties to | |
+this License. | |
+ | |
+ 7. If, as a consequence of a court judgment or allegation of patent | |
+infringement or for any other reason (not limited to patent issues), | |
+conditions are imposed on you (whether by court order, agreement or | |
+otherwise) that contradict the conditions of this License, they do not | |
+excuse you from the conditions of this License. If you cannot | |
+distribute so as to satisfy simultaneously your obligations under this | |
+License and any other pertinent obligations, then as a consequence you | |
+may not distribute the Program at all. For example, if a patent | |
+license would not permit royalty-free redistribution of the Program by | |
+all those who receive copies directly or indirectly through you, then | |
+the only way you could satisfy both it and this License would be to | |
+refrain entirely from distribution of the Program. | |
+ | |
+If any portion of this section is held invalid or unenforceable under | |
+any particular circumstance, the balance of the section is intended to | |
+apply and the section as a whole is intended to apply in other | |
+circumstances. | |
+ | |
+It is not the purpose of this section to induce you to infringe any | |
+patents or other property right claims or to contest validity of any | |
+such claims; this section has the sole purpose of protecting the | |
+integrity of the free software distribution system, which is | |
+implemented by public license practices. Many people have made | |
+generous contributions to the wide range of software distributed | |
+through that system in reliance on consistent application of that | |
+system; it is up to the author/donor to decide if he or she is willing | |
+to distribute software through any other system and a licensee cannot | |
+impose that choice. | |
+ | |
+This section is intended to make thoroughly clear what is believed to | |
+be a consequence of the rest of this License. | |
+ | |
+ 8. If the distribution and/or use of the Program is restricted in | |
+certain countries either by patents or by copyrighted interfaces, the | |
+original copyright holder who places the Program under this License | |
+may add an explicit geographical distribution limitation excluding | |
+those countries, so that distribution is permitted only in or among | |
+countries not thus excluded. In such case, this License incorporates | |
+the limitation as if written in the body of this License. | |
+ | |
+ 9. The Free Software Foundation may publish revised and/or new versions | |
+of the General Public License from time to time. Such new versions will | |
+be similar in spirit to the present version, but may differ in detail to | |
+address new problems or concerns. | |
+ | |
+Each version is given a distinguishing version number. If the Program | |
+specifies a version number of this License which applies to it and "any | |
+later version", you have the option of following the terms and conditions | |
+either of that version or of any later version published by the Free | |
+Software Foundation. If the Program does not specify a version number of | |
+this License, you may choose any version ever published by the Free Software | |
+Foundation. | |
+ | |
+ 10. If you wish to incorporate parts of the Program into other free | |
+programs whose distribution conditions are different, write to the author | |
+to ask for permission. For software which is copyrighted by the Free | |
+Software Foundation, write to the Free Software Foundation; we sometimes | |
+make exceptions for this. Our decision will be guided by the two goals | |
+of preserving the free status of all derivatives of our free software and | |
+of promoting the sharing and reuse of software generally. | |
+ | |
+ NO WARRANTY | |
+ | |
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY | |
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN | |
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES | |
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED | |
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS | |
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE | |
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, | |
+REPAIR OR CORRECTION. | |
+ | |
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | |
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR | |
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, | |
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING | |
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED | |
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY | |
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER | |
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE | |
+POSSIBILITY OF SUCH DAMAGES. | |
+ | |
+ END OF TERMS AND CONDITIONS | |
+ | |
+ How to Apply These Terms to Your New Programs | |
+ | |
+ If you develop a new program, and you want it to be of the greatest | |
+possible use to the public, the best way to achieve this is to make it | |
+free software which everyone can redistribute and change under these terms. | |
+ | |
+ To do so, attach the following notices to the program. It is safest | |
+to attach them to the start of each source file to most effectively | |
+convey the exclusion of warranty; and each file should have at least | |
+the "copyright" line and a pointer to where the full notice is found. | |
+ | |
+ <one line to give the program's name and a brief idea of what it does.> | |
+ Copyright (C) <year> <name of author> | |
+ | |
+ This program is free software; you can redistribute it and/or modify | |
+ it under the terms of the GNU General Public License as published by | |
+ the Free Software Foundation; either version 2 of the License, or | |
+ (at your option) any later version. | |
+ | |
+ This program is distributed in the hope that it will be useful, | |
+ but WITHOUT ANY WARRANTY; without even the implied warranty of | |
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
+ GNU General Public License for more details. | |
+ | |
+ You should have received a copy of the GNU General Public License | |
+ along with this program; if not, write to the Free Software | |
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
+ | |
+ | |
+Also add information on how to contact you by electronic and paper mail. | |
+ | |
+If the program is interactive, make it output a short notice like this | |
+when it starts in an interactive mode: | |
+ | |
+ Gnomovision version 69, Copyright (C) year name of author | |
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. | |
+ This is free software, and you are welcome to redistribute it | |
+ under certain conditions; type `show c' for details. | |
+ | |
+The hypothetical commands `show w' and `show c' should show the appropriate | |
+parts of the General Public License. Of course, the commands you use may | |
+be called something other than `show w' and `show c'; they could even be | |
+mouse-clicks or menu items--whatever suits your program. | |
+ | |
+You should also get your employer (if you work as a programmer) or your | |
+school, if any, to sign a "copyright disclaimer" for the program, if | |
+necessary. Here is a sample; alter the names: | |
+ | |
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program | |
+ `Gnomovision' (which makes passes at compilers) written by James Hacker. | |
+ | |
+ <signature of Ty Coon>, 1 April 1989 | |
+ Ty Coon, President of Vice | |
+ | |
+This General Public License does not permit incorporating your program into | |
+proprietary programs. If your program is a subroutine library, you may | |
+consider it more useful to permit linking proprietary applications with the | |
+library. If this is what you want to do, use the GNU Library General | |
+Public License instead of this License. | |
diff --git a/Makefile.in b/Makefile.in | |
@@ -0,0 +1,81 @@ | |
+# Makefile for bmf | |
+ | |
+BINDIR=/usr/bin | |
+MANDIR=/usr/share/man | |
+ | |
+VERSION=0.9.4 | |
+ | |
+CC=@CC@ | |
+CFLAGS=@CFLAGS@ | |
+LDFLAGS=@LDFLAGS@ | |
+SYSLIBS=@SYSLIBS@ | |
+ | |
+# For creating rpm packages | |
+RPMROOT=/usr/src/rpm | |
+RPM = rpm | |
+RPMFLAGS = -ba | |
+ARCH=`arch|sed 's/i[4-9]86/i386/'` | |
+ | |
+all: bmf bmfconv | |
+ | |
+bmf: bmf.o filt.o dbmysql.o dbdb.o dbtext.o dbh.o lex.o vec.o str.o dbg.o | |
+ $(CC) -o $@ bmf.o filt.o dbmysql.o dbdb.o dbtext.o dbh.o lex.o vec.o s… | |
+ | |
+bmf.o: bmf.c | |
+ $(CC) $(CFLAGS) -DPACKAGE=\"bmf\" -DVERSION=\"$(VERSION)\" -c $< | |
+ | |
+bmfconv: bmfconv.o dbmysql.o dbdb.o dbtext.o dbh.o vec.o str.o dbg.o | |
+ $(CC) -o $@ bmfconv.o dbmysql.o dbdb.o dbtext.o dbh.o vec.o str.o dbg.… | |
+ | |
+bmfconv.o: bmfconv.c | |
+ $(CC) $(CFLAGS) -DPACKAGE=\"bmfconv\" -DVERSION=\"$(VERSION)\" -c $< | |
+ | |
+install: checkroot bmf bmf.1 bmfconv bmfconv.1 | |
+ [ -d $(DESTDIR)$(BINDIR) ] || mkdir -p $(DESTDIR)$(BINDIR) | |
+ [ -d $(DESTDIR)$(MANDIR)/man1 ] || mkdir -p $(DESTDIR)$(MANDIR)/man1 | |
+ cp bmf $(DESTDIR)$(BINDIR) | |
+ cp bmf.1 $(DESTDIR)$(MANDIR)/man1 | |
+ cp bmfconv $(DESTDIR)$(BINDIR) | |
+ cp bmfconv.1 $(DESTDIR)$(MANDIR)/man1 | |
+ | |
+uninstall: checkroot | |
+ rm -f $(DESTDIR)$(BINDIR)/bmf | |
+ rm -f $(DESTDIR)$(MANDIR)/man1/bmf.1 | |
+ rm -f $(DESTDIR)$(BINDIR)/bmfconv | |
+ rm -f $(DESTDIR)$(MANDIR)/man1/bmfconv.1 | |
+ | |
+clean: | |
+ rm -f core *.o bmf bmfconv | |
+ | |
+distclean: clean | |
+ rm -f Makefile | |
+ | |
+dist: tarball rpmpkg debpkg | |
+ | |
+tarball: distclean | |
+ (cd ..; \ | |
+ cp -ar bmf bmf-$(VERSION); \ | |
+ tar czvf bmf-$(VERSION).tar.gz `find bmf-$(VERSION) -type f | egrep -… | |
+ rm -rf bmf-$(VERSION); \ | |
+ cd $(PWD)) | |
+ | |
+debpkg: checkroot | |
+ debian/rules binary | |
+ rm -rf debian/tmp | |
+ | |
+rpmpkg: checkroot | |
+ (mkdir -p $(RPMROOT); \ | |
+ mkdir -p $(RPMROOT)/SOURCES; \ | |
+ mkdir -p $(RPMROOT)/SPECS; \ | |
+ mkdir -p $(RPMROOT)/RPMS; \ | |
+ mkdir -p $(RPMROOT)/SRPMS; \ | |
+ cp ../bmf-$(VERSION).tar.gz $(RPMROOT)/SOURCES; \ | |
+ cat bmf.spec.in | sed 's/VERSION/$(VERSION)/' > $(RPMROOT)/SPECS/bmf.… | |
+ cd $(RPMROOT)/SPECS; \ | |
+ $(RPM) $(RPMFLAGS) bmf.spec; \ | |
+ cp $(RPMROOT)/RPMS/$(ARCH)/bmf-$(VERSION)-*.rpm $(PWD)/..; \ | |
+ cp $(RPMROOT)/SRPMS/bmf-$(VERSION)-*.src.rpm $(PWD)/..; \ | |
+ cd $(PWD)) | |
+ | |
+checkroot: | |
+ [ "`whoami`" = root ] || (echo Need root; exit 1) | |
diff --git a/README b/README | |
@@ -0,0 +1,130 @@ | |
+ bmf -- Bayesian Mail Filter | |
+ | |
+About bmf | |
+========= | |
+ | |
+This is a mail filter which uses the Bayes algorithm as explained in Paul | |
+Graham's article "A Plan for Spam". It aims to be faster, smaller, and more | |
+versatile than similar applicatios. Implementation is ANSI C and uses POSIX | |
+functions. Supported platforms are (in theory) all POSIX systems. Support | |
+for win32 is undecided. | |
+ | |
+This project provides features which are not available in other filters: | |
+ | |
+(1) Independence from external programs and libraries. Tokens are stored in | |
+memory using simple vectors which require no heavyweight external data | |
+structure libraries. Multiple token database formats are supported, | |
+including flat files, libdb, and mysql. Conversion between formats will | |
+always be possible with the included import/export utility and flat files | |
+will always remain an option. | |
+ | |
+(2) Efficient processing. Input data is parsed by a handcrafted parser | |
+which weighs in under 3% of the equivalent code generated by flex. No | |
+portion of the input is ever copied and all i/o and memory allocation are | |
+done in large chunks. Updated token lists are merged and written in one | |
+step. Hashing is being considered for the next version to improve lookup | |
+speed. | |
+ | |
+(3) Simple and elegant implementation. No heavyweight, copy-intensive mime | |
+decoding routines are used. Decoding of quoted-printable text for selected | |
+mime types is being considered for the next version. | |
+ | |
+Note: the core filter function is from esr's bogofilter v0.6 (available at | |
+http://sourceforge.net/projects/bogofilter/) with bugfix updates. | |
+ | |
+For the most recent version of this software, see: | |
+ | |
+ http://sourceforge.net/projects/bmf/ | |
+ | |
+How to integrate bmf | |
+==================== | |
+ | |
+The following procmail recipes will invoke bmf for each incoming email and | |
+place spam into $MAILDIR/spam. The first sample invokes bmf in its normal | |
+mode of operation and the second invokes bmf as a filter. | |
+ | |
+ ### begin sample one ### | |
+ # Invoke bmf and use return code to filter spam in one step | |
+ :0HB | |
+ * ? bmf | |
+ | formail -A"X-Spam-Status: Yes, tests=bmf" >>$MAILDIR/spam | |
+ | |
+ ### begin sample two ### | |
+ # Invoke bmf as a filter | |
+ :0 fw | |
+ | bmf -p | |
+ | |
+ # Filter spam | |
+ :0: | |
+ ^X-Spam-Status: Yes | |
+ $MAILDIR/spam | |
+ | |
+The following maildrop equivalents are suggested by Christian Kurz. | |
+ | |
+ ### begin sample one ### | |
+ # Invoke bmf and use return code to filter spam in one step | |
+ exception { | |
+ `bmf` | |
+ if ( $RETURNCODE == 0 ) | |
+ to $MAILDIR/spam | |
+ } | |
+ | |
+ ### begin sample two ### | |
+ # Invoke bmf as a filter | |
+ exception { | |
+ xfilter "bmf -p" | |
+ if (/^X-Stam-Status: Yes/) | |
+ to $MAILDIR/spam | |
+ } | |
+ | |
+ | |
+If you put bmf in your procmail or maildrop scripts as suggested above, it | |
+will always register an email as either spam or non-spam. To reverse this | |
+registration and train bmf, the following mutt macros may be useful: | |
+ | |
+ macro index \ed "<enter-command>unset wait_key\n<pipe-entry>bmf -S\n<enter-c… | |
+ macro index \et "<enter-command>unset wait_key\n<pipe-entry>bmf -t\n<enter-c… | |
+ macro index \eu "<enter-command>unset wait_key\n<pipe-entry>bmf -N\n<enter-c… | |
+ | |
+These will override these commands: | |
+ | |
+ <Esc>d = de-register as non-spam, register as spam, and move to spam folder. | |
+ <Esc>t = test for spamicity. | |
+ <Esc>u = de-register as spam, register as non-spam, and move to inbox folder. | |
+ | |
+How to train bmf | |
+================ | |
+ | |
+First, please keep in mind that bmf "learns" how to recognize spam from the | |
+input that you give it. It works best if you give it exactly the email that | |
+you receive, or have received in the recent past. | |
+ | |
+Here are some good techniques for training bmf: | |
+ | |
+ - If you keep a history of email that you have received, use your current | |
+ and/or saved emails. It is fairly easy to create a small shell script | |
+ that will pass all of your normal email to "bmf -n" and all of your spam | |
+ to "bmf -s". Note that if you do not use the mbox storage format, you | |
+ MUST invoke bmf exactly once per email. Using "cat * | bmf -n" will NOT | |
+ work properly because bmf sees the entire input as one big email. | |
+ | |
+ - If you already use spamassassin, you can use it to train bmf for a | |
+ couple of days or weeks. If spamassassin tags it as spam, run it | |
+ through "bmf -s". If not, run it through "bmf -n". This can be | |
+ automated with procmail or maildrop recipes. | |
+ | |
+Here are some things that you should NOT do: | |
+ | |
+ - Get impatient with the training process and repeatedly pass one email | |
+ through "bmf -s". | |
+ | |
+ - Manually move words around between lists and/or adjust the word counts. | |
+ | |
+Final words | |
+=========== | |
+ | |
+Thanks for trying bmf. If you have any problems, comments, or suggestions, | |
+please direct them to the bmf mailing list, [email protected]. | |
+ | |
+ Tom Marshall | |
+ 20 Oct 2002 | |
diff --git a/TODO b/TODO | |
@@ -0,0 +1,4 @@ | |
+* Make extrema size configurable, and default to ~5% of tokens. | |
+* Teach lexer about multiline MIME headers and case (in)sensitivity. | |
+* Teach lexer about MIME quoted-printable and base64 encodings. | |
+* Make a pop3 proxy (?) | |
diff --git a/bmf.1 b/bmf.1 | |
@@ -0,0 +1,148 @@ | |
+.\"Generated by db2man.xsl. Don't modify this, modify the source. | |
+.de Sh \" Subsection | |
+.br | |
+.if t .Sp | |
+.ne 5 | |
+.PP | |
+\fB\\$1\fR | |
+.PP | |
+.. | |
+.de Sp \" Vertical space (when we can't use .PP) | |
+.if t .sp .5v | |
+.if n .sp | |
+.. | |
+.de Ip \" List item | |
+.br | |
+.ie \\n(.$>=3 .ne \\$3 | |
+.el .ne 3 | |
+.IP "\\$1" \\$2 | |
+.. | |
+.TH "BMF" 1 "" "" "" | |
+.SH NAME | |
+bmf \- efficient Bayesian mail filter | |
+.SH "SYNOPSIS" | |
+ | |
+.nf | |
+\fBbmf\fR [-t] [-n] [-s] [-N] [-S] [-f fmt] [-d db] [-i file] [-k n] [-m type]… | |
+ [-v] [-V] [-h] | |
+.fi | |
+ | |
+.SH "DESCRIPTION" | |
+ | |
+.PP | |
+bmf is a Bayesian mail filter. In its normal mode of operation, it takes an em… | |
+ | |
+.PP | |
+bmf supports both mbox and maildir mail storage formats. It will automatically… | |
+ | |
+.SH "OPTIONS" | |
+ | |
+.PP | |
+Without command-line options, bmf processes the input, registers it as either … | |
+ | |
+.PP | |
+\fB-t\fR Test to see if the input is spam. The word lists are not updated. A r… | |
+ | |
+.PP | |
+\fB-n\fR Register the input as non-spam. | |
+ | |
+.PP | |
+\fB-s\fR Register the input as spam. | |
+ | |
+.PP | |
+\fB-N\fR Register the input as non-spam and undo a prior registration as spam. | |
+ | |
+.PP | |
+\fB-S\fR Register the input as spam and undo a prior registration as non-spam. | |
+ | |
+.PP | |
+\fB-f fmt\fR Specify database format. Valid formats are text, db, and mysql. T… | |
+ | |
+.PP | |
+\fB-d db\fR Specify database or directory for loading and saving word lists. T… | |
+ | |
+.PP | |
+\fB-i file\fR Use file for input instead of stdin. | |
+ | |
+.PP | |
+\fB-k n\fR Specify the number of extrema (keepers) to use in the Bayes calcula… | |
+ | |
+.PP | |
+\fB-m fmt\fR Specify mail storage format. Valid formats are mbox and maildir. … | |
+ | |
+.PP | |
+\fB-p\fR Copy the input to the output (passthrough) and insert spam headers in… | |
+ | |
+.PP | |
+\fB-v\fR Be more verbose. This option is not well supported yet. | |
+ | |
+.PP | |
+\fB-V\fR Display version information. | |
+ | |
+.PP | |
+\fB-h\fR Display usage information. | |
+ | |
+.SH "THEORY OF OPERATION" | |
+ | |
+.PP | |
+bmf treats its input as a bag of tokens. Each token is checked against "good" … | |
+ | |
+.PP | |
+While this method sounds crude compared to the more usual pattern-matching app… | |
+ | |
+.PP | |
+bmf improves on Paul's proposal by doing smarter lexical analysis. In particul… | |
+ | |
+.PP | |
+MIME and other attachments are not decoded. Experience from watching the token… | |
+ | |
+.SH "INTEGRATION WITH OTHER TOOLS" | |
+ | |
+.PP | |
+Please see the README for samples and suggestions. | |
+ | |
+.SH "RETURN VALUES" | |
+ | |
+.PP | |
+In passthrough mode: zero for success, nonzero for failure. | |
+ | |
+.PP | |
+In non-passthrough mode: 0 for spam; 1 for non-spam; 2 for I/O or other errors. | |
+ | |
+.SH "FILES" | |
+ | |
+.TP | |
+\fI~/.bmf/goodlist.txt\fR | |
+List of good tokens for text mode. | |
+ | |
+.TP | |
+\fI~/.bmf/spamlist.txt\fR | |
+List of bad tokens for text mode. | |
+ | |
+.TP | |
+\fI~/.bmf/goodlist.db\fR | |
+List of good tokens for libdb mode. | |
+ | |
+.TP | |
+\fI~/.bmf/spamlist.db\fR | |
+List of bad tokens for libdb mode. | |
+ | |
+.SH "BUGS" | |
+ | |
+.PP | |
+The lexer should recognize multiline headers. | |
+ | |
+.PP | |
+The lexer should recognize MIME attachments. | |
+ | |
+.PP | |
+Content-Transfer-Encoding is not decoded. | |
+ | |
+.SH "AUTHOR" | |
+ | |
+.PP | |
+Tom Marshall <[email protected]>. | |
+ | |
+.PP | |
+The Bayes algorithm is from bogofilter by Eric S. Raymond <[email protected]>. b… | |
+ | |
diff --git a/bmf.c b/bmf.c | |
@@ -0,0 +1,339 @@ | |
+/* $Id: bmf.c,v 1.20 2002/10/20 18:19:17 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * bmf.c: top level Bayesian mail filter app. | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "lex.h" | |
+#include "vec.h" | |
+#include "dbh.h" | |
+#include "filt.h" | |
+ | |
+/* modes of operation (mutually exclusive) */ | |
+typedef enum | |
+{ | |
+ mode_test, /* test and produce report */ | |
+ mode_normal, /* test and register result */ | |
+ mode_reg_s, /* register as spam */ | |
+ mode_reg_n, /* register as non-spam */ | |
+ mode_n_to_s, /* undo non-spam registration and register as spam */ | |
+ mode_s_to_n /* undo spam registration and register as non-spam */ | |
+} runmode_t; | |
+ | |
+static void usage( void ) | |
+{ | |
+ printf( "\n" | |
+ "Usage: " PACKAGE " [mode] [options]\n" | |
+ "\n" | |
+ "Modes of operation (mutually exclusive; the last one specified is… | |
+ "\t\tRegister message using historical data if no mode is specifie… | |
+ "\t-n\tRegister message as non-spam.\n" | |
+ "\t-s\tRegister message as spam.\n" | |
+ "\t-N\tRegister message as non-spam and undo prior registration as… | |
+ "\t-S\tRegister message as spam and undo prior registration as non… | |
+ "\t-t\tTest mode, print report and do not save results.\n" | |
+ "\n" | |
+ "Other options:\n" | |
+ "\t-f fmt\tSpecify database format (text|db|mysql).\n" | |
+ "\t-d db\tSpecify database or directory name.\n" | |
+ "\t-i file\tSpecify file to read instead of stdin.\n" | |
+ "\t-k n\tSpecify count of extrema to use (keepers), default is 15.… | |
+ "\t-m type\t[DEPRECATED] Specify mail storage format (mbox|maildir… | |
+ "\t-p\tPassthrough mode, like SpamAssassin.\n" | |
+ "\t-v\tIncrease verbosity level.\n" | |
+ "\t-V\tShow version information and exit.\n" | |
+ "\t-h\tShow this message and exit.\n" | |
+ "\n" ); | |
+ exit( 2 ); | |
+} | |
+ | |
+static void version( void ) | |
+{ | |
+ printf( "\n" | |
+ PACKAGE " version " VERSION " - a Bayesian mail filter\n" | |
+ "Copyright (c) 2002 Tom Marshall\n" | |
+ "\n" | |
+ PACKAGE " comes with ABSOLUTELY NO WARRANTY.\n" | |
+ "This is free software. You are welcome to redistribute it under … | |
+ "of the GNU General Public License. See the file LICENSE in the s… | |
+ "distribution, or visit http://www.gnu.org/licenses/gpl.html\n" | |
+ "\n" ); | |
+ exit( 2 ); | |
+} | |
+ | |
+int main( int argc, char** argv ) | |
+{ | |
+ int ch; | |
+ dbfmt_t dbfmt = db_db; | |
+ char* dbname = NULL; | |
+ bool_t rdonly; | |
+ | |
+ runmode_t mode = mode_normal; | |
+ mbox_t mboxtype = detect; | |
+ bool_t do_passthru = false; | |
+ | |
+ dbh_t* pdb; | |
+ dbt_t* pblist; | |
+ dbt_t* pglist; | |
+ dbt_t* ptable; | |
+ vec_t mlist; | |
+ stats_t stats; | |
+ lex_t lex; | |
+ tok_t tok; | |
+ bool_t is_spam; | |
+ | |
+ int fd = STDIN_FILENO; | |
+ char* infile = NULL; | |
+ | |
+ srand(time(NULL)); | |
+ atexit( dump_alloc_heap ); | |
+ | |
+#ifdef HAVE_LIBDB | |
+ dbfmt = db_db; | |
+#else | |
+ dbfmt = db_text; | |
+#endif | |
+ | |
+ stats.keepers = DEF_KEEPERS; | |
+ while( (ch = getopt( argc, argv, "NSVd:f:i:hk:m:npstv" )) != EOF ) | |
+ { | |
+ switch( ch ) | |
+ { | |
+ case 'N': | |
+ mode = mode_s_to_n; | |
+ break; | |
+ case 'S': | |
+ mode = mode_n_to_s; | |
+ break; | |
+ case 'V': | |
+ version(); | |
+ break; /* notreached */ | |
+ case 'd': | |
+ free( dbname ); | |
+ dbname = strdup( optarg ); | |
+ break; | |
+ case 'f': | |
+ if( strcasecmp( optarg, "text" ) == 0 ) | |
+ { | |
+ dbfmt = db_text; | |
+ } | |
+ else if( strcasecmp( optarg, "db" ) == 0 ) | |
+ { | |
+ dbfmt = db_db; | |
+ } | |
+ else if( strcasecmp( optarg, "mysql" ) == 0 ) | |
+ { | |
+ dbfmt = db_mysql; | |
+ } | |
+ else | |
+ { | |
+ usage(); | |
+ } | |
+ break; | |
+ case 'h': | |
+ usage(); | |
+ break; /* notreached */ | |
+ case 'i': | |
+ free( infile ); | |
+ infile = strdup( optarg ); | |
+ break; | |
+ case 'k': | |
+ stats.keepers = atoi( optarg ); | |
+ break; | |
+ case 'm': | |
+ if( strcasecmp( optarg, "mbox" ) == 0 ) | |
+ { | |
+ mboxtype = mbox; | |
+ } | |
+ else if( strcasecmp( optarg, "maildir" ) == 0 ) | |
+ { | |
+ mboxtype = maildir; | |
+ } | |
+ else | |
+ { | |
+ usage(); | |
+ } | |
+ break; | |
+ case 'n': | |
+ mode = mode_reg_n; | |
+ break; | |
+ case 'p': | |
+ do_passthru = true; | |
+ break; | |
+ case 's': | |
+ mode = mode_reg_s; | |
+ break; | |
+ case 't': | |
+ mode = mode_test; | |
+ break; | |
+ case 'v': | |
+ g_verbose++; | |
+ verbose( 1, "Verbose level now %u\n", g_verbose ); | |
+ break; | |
+ default: | |
+ usage(); | |
+ } | |
+ } | |
+ stats.extrema = (discrim_t*)malloc( stats.keepers*sizeof(discrim_t) ); | |
+ | |
+ if( infile != NULL ) | |
+ { | |
+ fd = open( infile, O_RDONLY ); | |
+ if( fd == -1 ) | |
+ { | |
+ fprintf( stderr, "%s: cannot open input file '%s': %s\n", | |
+ argv[0], infile, strerror(errno) ); | |
+ exit( 2 ); | |
+ } | |
+ } | |
+ | |
+ pdb = dbh_open( dbfmt, "localhost", dbname, DB_USER, DB_PASS ); | |
+ if( pdb == NULL ) | |
+ { | |
+ fprintf( stderr, "%s: cannot open database\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ | |
+ lex_create( &lex, mboxtype ); | |
+ if( !lex_load( &lex, fd ) ) | |
+ { | |
+ fprintf( stderr, "%s: cannot read input\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ lex_nexttoken( &lex, &tok ); | |
+ if( tok.tt == eof ) | |
+ { | |
+ fprintf( stderr, "%s: no input available\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ | |
+ while( tok.tt != eof ) | |
+ { | |
+ if( mboxtype == mbox && tok.tt != from ) | |
+ { | |
+ fprintf( stderr, "%s: input does not look like an mbox message\n",… | |
+ exit( 2 ); | |
+ } | |
+ | |
+ rdonly = (mode == mode_test || mode == mode_reg_n); | |
+ pblist = pdb->opentable( pdb, "spamlist", rdonly ); | |
+ if( pblist == NULL ) | |
+ { | |
+ fprintf( stderr, "%s: cannot open spamlist\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ | |
+ rdonly = (mode == mode_test || mode == mode_reg_s); | |
+ pglist = pdb->opentable( pdb, "goodlist", rdonly ); | |
+ if( pglist == NULL ) | |
+ { | |
+ fprintf( stderr, "%s: cannot open goodlist\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ | |
+ vec_create( &mlist ); | |
+ bvec_loadmsg( &mlist, &lex, &tok ); | |
+ | |
+ switch( mode ) | |
+ { | |
+ case mode_test: | |
+ bayesfilt( pglist, pblist, &mlist, &stats ); | |
+ is_spam = (stats.spamicity > SPAM_CUTOFF); | |
+ break; | |
+ case mode_normal: | |
+ bayesfilt( pglist, pblist, &mlist, &stats ); | |
+ is_spam = (stats.spamicity > SPAM_CUTOFF); | |
+ ptable = (is_spam ? pblist : pglist); | |
+ svec_sort( &mlist ); | |
+ if( !ptable->mergeclose( ptable, &mlist ) ) | |
+ { | |
+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ break; | |
+ case mode_reg_s: | |
+ stats.spamicity = 1.0; | |
+ is_spam = true; | |
+ svec_sort( &mlist ); | |
+ if( !pblist->mergeclose( pblist, &mlist ) ) | |
+ { | |
+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ break; | |
+ case mode_reg_n: | |
+ stats.spamicity = 0.0; | |
+ is_spam = false; | |
+ svec_sort( &mlist ); | |
+ if( !pglist->mergeclose( pglist, &mlist ) ) | |
+ { | |
+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ break; | |
+ case mode_n_to_s: | |
+ stats.spamicity = 1.0; | |
+ is_spam = true; | |
+ svec_sort( &mlist ); | |
+ if( !pblist->mergeclose( pblist, &mlist ) || | |
+ !pglist->unmergeclose( pglist, &mlist ) ) | |
+ { | |
+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ break; | |
+ case mode_s_to_n: | |
+ stats.spamicity = 0.0; | |
+ is_spam = false; | |
+ svec_sort( &mlist ); | |
+ if( !pblist->unmergeclose( pblist, &mlist ) || | |
+ !pglist->mergeclose( pglist, &mlist ) ) | |
+ { | |
+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] ); | |
+ exit( 2 ); | |
+ } | |
+ break; | |
+ default: | |
+ usage(); | |
+ } | |
+ | |
+ if( mode == mode_test ) | |
+ { | |
+ statdump( &stats, STDOUT_FILENO ); | |
+ } | |
+ | |
+ if( do_passthru ) | |
+ { | |
+ lex_passthru( &lex, is_spam, stats.spamicity ); | |
+ } | |
+ | |
+ vec_destroy( &mlist ); | |
+ | |
+ pglist->close( pglist ); | |
+ free( pglist ); | |
+ pblist->close( pblist ); | |
+ free( pblist ); | |
+ } | |
+ | |
+ lex_destroy( &lex ); | |
+ | |
+ pdb->close( pdb ); | |
+ free( pdb ); | |
+ | |
+ if( infile != NULL ) | |
+ { | |
+ free( infile ); | |
+ close( fd ); | |
+ } | |
+ free( stats.extrema ); | |
+ | |
+ return ( (do_passthru || is_spam) ? 0 : 1 ); | |
+} | |
diff --git a/bmf.spec.in b/bmf.spec.in | |
@@ -0,0 +1,64 @@ | |
+Name: bmf | |
+Version: VERSION | |
+Release: 1 | |
+URL: http://www.sourceforge.net/projects/bmf | |
+Source0: %{name}-%{version}.tar.gz | |
+License: GPL | |
+Group: Applications/Internet | |
+Summary: fast anti-spam filtering by Bayesian statistical analysis | |
+Buildroot: %{_tmppath}/%{name}-%{version}-root | |
+ | |
+%description | |
+bmf is a Bayesian mail filter. It takes an email message or other text on | |
+stdin, does a statistical check against lists of "good" and "spam" words, | |
+and returns a status code indicating whether or not the message is spam. | |
+bmf is efficient, small, and self-contained. | |
+ | |
+%prep | |
+ | |
+%setup | |
+ | |
+%build | |
+./configure --with-libdb --without-mysql | |
+make | |
+ | |
+%install | |
+[ -n "$RPM_BUILD_ROOT" -a "$RPM_BUILD_ROOT" != / ] && rm -rf $RPM_BUILD_ROOT | |
+make DESTDIR=${RPM_BUILD_ROOT} install | |
+gzip $RPM_BUILD_ROOT/%{_mandir}/*/*.? | |
+ | |
+ | |
+%files | |
+%{_bindir}/bmf | |
+%{_mandir}/man1/bmf.1.gz | |
+%{_bindir}/bmfconv | |
+%{_mandir}/man1/bmfconv.1.gz | |
+%doc README LICENSE | |
+ | |
+%changelog | |
+* Mon Oct 14 2002 Tom Marshall <[email protected]> | |
+- Update to version 0.9.3. | |
+ | |
+* Sat Oct 12 2002 Tom Marshall <[email protected]> | |
+- Update to version 0.9.2. | |
+ | |
+* Sat Oct 12 2002 Tom Marshall <[email protected]> | |
+- Update to version 0.9.1. | |
+ | |
+* Wed Oct 09 2002 Tom Marshall <[email protected]> | |
+- Update to version 0.84. | |
+ | |
+* Mon Oct 07 2002 Tom Marshall <[email protected]> | |
+- Update to version 0.83. | |
+ | |
+* Sat Oct 05 2002 Tom Marshall <[email protected]> | |
+- Update to version 0.82. | |
+ | |
+* Thu Oct 03 2002 Tom Marshall <[email protected]> | |
+- Update to version 0.81. | |
+- Add bmfconv. | |
+- Use new configure script. | |
+ | |
+* Fri Sep 27 2002 Tom Marshall <[email protected]> | |
+- Initial build. | |
+ | |
diff --git a/bmfconv.1 b/bmfconv.1 | |
@@ -0,0 +1,81 @@ | |
+.\"Generated by db2man.xsl. Don't modify this, modify the source. | |
+.de Sh \" Subsection | |
+.br | |
+.if t .Sp | |
+.ne 5 | |
+.PP | |
+\fB\\$1\fR | |
+.PP | |
+.. | |
+.de Sp \" Vertical space (when we can't use .PP) | |
+.if t .sp .5v | |
+.if n .sp | |
+.. | |
+.de Ip \" List item | |
+.br | |
+.ie \\n(.$>=3 .ne \\$3 | |
+.el .ne 3 | |
+.IP "\\$1" \\$2 | |
+.. | |
+.TH "BMFCONV" 1 "" "" "" | |
+.SH NAME | |
+bmfconv \- Database converter for bmf | |
+.SH "SYNOPSIS" | |
+ | |
+.nf | |
+\fBbmfconv\fR [-f fmt] [-d db] [-e] [-i] [-v] [-h] | |
+.fi | |
+ | |
+.SH "DESCRIPTION" | |
+ | |
+.PP | |
+bmfconv converts bmf token databases between the supported formats. It can imp… | |
+ | |
+.PP | |
+PLEASE NOTE that the text files used in import and export operations are read … | |
+ | |
+.SH "OPTIONS" | |
+ | |
+.PP | |
+\fB-f fmt\fR Specify database format. Supported formats are "db" for libdb and… | |
+ | |
+.PP | |
+\fB-d db\fR Specify database name. | |
+ | |
+.PP | |
+\fB-e\fR Export the database to text files. | |
+ | |
+.PP | |
+\fB-i\fR Import the database from text files. | |
+ | |
+.PP | |
+\fB-v\fR Display version information. | |
+ | |
+.PP | |
+\fB-h\fR Display usage information. | |
+ | |
+.SH "RETURN VALUES" | |
+ | |
+.PP | |
+0 if conversion succeeds, nonzero if conversion fails. | |
+ | |
+.SH "FILES" | |
+ | |
+.TP | |
+\fIgoodlist.txt\fR | |
+Text file for import or export of good tokens. | |
+ | |
+.TP | |
+\fIspamlist.txt\fR | |
+Text file for import or export of spam tokens. | |
+ | |
+.SH "BUGS" | |
+ | |
+.PP | |
+Should be more robust. | |
+ | |
+.SH "AUTHOR" | |
+ | |
+.PP | |
+Tom Marshall <[email protected]>. bmfconv is a part of the bmf package. | |
+ | |
diff --git a/bmfconv.c b/bmfconv.c | |
@@ -0,0 +1,169 @@ | |
+/* $Id: bmfconv.c,v 1.9 2002/10/20 18:19:17 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * bmfconv.c: bmf database converter | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "vec.h" | |
+#include "dbh.h" | |
+ | |
+typedef enum | |
+{ | |
+ none, | |
+ db2text, | |
+ text2db | |
+} dir_t; | |
+ | |
+static void usage( void ) | |
+{ | |
+ printf( "\n" | |
+ "Usage: " PACKAGE " [options]\n" | |
+ "\t-f fmt\tSpecify database format (db|mysql).\n" | |
+ "\t-d db\tSpecify database or directory name.\n" | |
+ "\t-e\tExport to text files goodlist.txt and spamlist.txt.\n" | |
+ "\t-i\tImport from text files goodlist.txt and spamlist.txt.\n" | |
+ "\t-v\tShow version information and exit\n" | |
+ "\t-h\tShow this message and exit\n" | |
+ "\n" ); | |
+ exit( 2 ); | |
+} | |
+ | |
+static void version( void ) | |
+{ | |
+ printf( "\n" | |
+ PACKAGE " version " VERSION " - a Bayesian mail filter\n" | |
+ "Copyright (c) 2002 Tom Marshall\n" | |
+ "\n" | |
+ PACKAGE " comes with ABSOLUTELY NO WARRANTY.\n" | |
+ "This is free software. You are welcome to redistribute it under … | |
+ "of the GNU General Public License. See the file LICENSE in the s… | |
+ "distribution, or visit http://www.gnu.org/licenses/gpl.html\n" | |
+ "\n" ); | |
+ exit( 2 ); | |
+} | |
+ | |
+int main( int argc, char** argv ) | |
+{ | |
+ int ch; | |
+ dbfmt_t dbfmt = db_db; | |
+ char* dbname = NULL; | |
+ bool_t rdonly; | |
+ | |
+ dbh_t* pdb; | |
+ dbt_t* ptable; | |
+ dir_t dir = none; | |
+ | |
+ while( (ch = getopt( argc, argv, "d:ef:ihv" )) != EOF ) | |
+ { | |
+ switch( ch ) | |
+ { | |
+ case 'd': | |
+ free( dbname ); | |
+ dbname = strdup( optarg ); | |
+ break; | |
+ case 'e': | |
+ dir = db2text; | |
+ break; | |
+ case 'f': | |
+ if( strcasecmp( optarg, "db" ) == 0 ) | |
+ { | |
+ dbfmt = db_db; | |
+ } | |
+ else if( strcasecmp( optarg, "mysql" ) == 0 ) | |
+ { | |
+ dbfmt = db_mysql; | |
+ } | |
+ else | |
+ { | |
+ usage(); | |
+ } | |
+ break; | |
+ case 'h': | |
+ usage(); | |
+ break; /* notreached */ | |
+ case 'i': | |
+ dir = text2db; | |
+ break; | |
+ case 'v': | |
+ version(); | |
+ break; /* notreached */ | |
+ default: | |
+ usage(); | |
+ } | |
+ } | |
+ if( dir == none ) | |
+ { | |
+ usage(); | |
+ } | |
+ | |
+ pdb = dbh_open( dbfmt, "localhost", dbname, DB_USER, DB_PASS ); | |
+ if( pdb == NULL ) | |
+ { | |
+ fprintf( stderr, "cannot open database\n" ); | |
+ exit( 1 ); | |
+ } | |
+ rdonly = (dir == db2text ? true : false); | |
+ | |
+ ptable = pdb->opentable( pdb, "spamlist", rdonly ); | |
+ if( ptable == NULL ) | |
+ { | |
+ fprintf( stderr, "cannot open spamlist\n" ); | |
+ exit( 1 ); | |
+ } | |
+ if( dir == db2text ) | |
+ { | |
+ if( !ptable->export( ptable, "spamlist.txt" ) ) | |
+ { | |
+ fprintf( stderr, "cannot export spamlist\n" ); | |
+ exit( 1 ); | |
+ } | |
+ } | |
+ else | |
+ { | |
+ if( !ptable->import( ptable, "spamlist.txt" ) ) | |
+ { | |
+ fprintf( stderr, "cannot import spamlist\n" ); | |
+ exit( 1 ); | |
+ } | |
+ } | |
+ ptable->close( ptable ); | |
+ free( ptable ); | |
+ | |
+ ptable = pdb->opentable( pdb, "goodlist", rdonly ); | |
+ if( ptable == NULL ) | |
+ { | |
+ fprintf( stderr, "cannot open goodlist\n" ); | |
+ exit( 1 ); | |
+ } | |
+ if( dir == db2text ) | |
+ { | |
+ if( !ptable->export( ptable, "goodlist.txt" ) ) | |
+ { | |
+ fprintf( stderr, "cannot export goodlist\n" ); | |
+ exit( 1 ); | |
+ } | |
+ } | |
+ else | |
+ { | |
+ if( !ptable->import( ptable, "goodlist.txt" ) ) | |
+ { | |
+ fprintf( stderr, "cannot import goodlist\n" ); | |
+ exit( 1 ); | |
+ } | |
+ } | |
+ ptable->close( ptable ); | |
+ free( ptable ); | |
+ | |
+ pdb->close( pdb ); | |
+ free( pdb ); | |
+ | |
+ return 0; | |
+} | |
diff --git a/config.h b/config.h | |
@@ -0,0 +1,80 @@ | |
+/* $Id: config.h,v 1.8 2002/10/20 07:16:57 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _CONFIG_H | |
+#define _CONFIG_H | |
+ | |
+/************************************** | |
+ * Standard headers | |
+ */ | |
+#include <stdlib.h> | |
+#include <stdio.h> | |
+#include <string.h> | |
+#include <errno.h> | |
+#include <math.h> | |
+#include <ctype.h> | |
+#include <assert.h> | |
+ | |
+/************************************** | |
+ * System headers | |
+ */ | |
+#include <sys/types.h> | |
+#include <limits.h> | |
+#include <unistd.h> | |
+#include <sys/stat.h> | |
+#include <fcntl.h> | |
+#include <sys/file.h> | |
+#include <time.h> | |
+ | |
+/************************************** | |
+ * For convenience | |
+ */ | |
+typedef unsigned char byte; | |
+typedef const char* cpchar; | |
+typedef const byte* cpbyte; | |
+typedef const void* cpvoid; | |
+typedef enum { false, true } bool_t; | |
+ | |
+#define min(a,b) ( (a)<(b) ? (a) : (b) ) | |
+#define max(a,b) ( (a)<(b) ? (b) : (a) ) | |
+#define minmax(v,a,b) ( (v)<(a)?(a) : (v)>(b)?(b) : (v) ) | |
+ | |
+/* XXX: need to figure out MH and any others (MMDF?) */ | |
+typedef enum { detect, mbox, maildir } mbox_t; | |
+ | |
+/************************************** | |
+ * Tweakables | |
+ */ | |
+ | |
+/* If you have the mysql client libs installed and wish to use them... */ | |
+/* #define HAVE_MYSQL */ | |
+ | |
+#define MSGCOUNT_KEY ".MSGCOUNT" | |
+#define MSGCOUNT_KEY_LEN (sizeof(MSGCOUNT_KEY)-1) | |
+ | |
+#define DB_USER "username" | |
+#define DB_PASS "password" | |
+ | |
+#define IOBUFSIZE 4096 /* chunk size for file buffers */ | |
+#define MAXWORDLEN 20 /* max word length, inclusive */ | |
+#define MAXFREQ 4 /* max times to count word per email */ | |
+#define GOOD_BIAS 2.0 /* give good words more weight */ | |
+#define DEF_KEEPERS 15 /* how many extrema to keep by default */ | |
+#define MINIMUM_FREQ 5 /* min word count for consideration in filter … | |
+#define UNKNOWN_WORD 0.4 /* odds that unknown word is spammish */ | |
+#define SPAM_CUTOFF 0.9 /* if it's spammier than this... */ | |
+ | |
+/* | |
+ * If NON_EQUIPROBABLE is defined, use ratio of spamcount/goodcount instead | |
+ * of UNKNOWN_WORDS, and as a factor in the known word calculation. This is | |
+ * merely copied from bogofilter. I didn't write it and I cannot explain the | |
+ * relative merits of using it or not. Please don't ask. :-) | |
+ */ | |
+ | |
+#endif /* ndef _CONFIG_H */ | |
diff --git a/configure b/configure | |
@@ -0,0 +1,354 @@ | |
+#!/bin/sh | |
+ | |
+echo "" | |
+ | |
+# defaults | |
+DEBUG=no | |
+with_libdb=test | |
+with_mysql=test | |
+ | |
+# parse options | |
+# --with/--without parsing copied from autoconf's output | |
+while [ $# -gt 0 ]; do | |
+ case $1 in | |
+ --with-*) | |
+ ac_option=$1 | |
+ ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` | |
+ ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` | |
+ # Reject names that are not valid shell variable names. | |
+ expr "x$ac_package" : ".*[^A-Za-z0-9_-]" >/dev/null && | |
+ { echo "error: invalid package name: $ac_package"; exit 1; } | |
+ ac_package=`echo $ac_package | sed 's/-/_/g'` | |
+ case $ac_option in | |
+ *=*) | |
+ ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` | |
+ ;; | |
+ *) | |
+ ac_optarg=yes | |
+ ;; | |
+ esac | |
+ eval "with_$ac_package='$ac_optarg'" | |
+ ;; | |
+ --without-*) | |
+ ac_option=$1 | |
+ ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` | |
+ ac_package=`expr "x$ac_option" : 'x-*without-\([^=]*\)'` | |
+ # Reject names that are not valid shell variable names. | |
+ expr "x$ac_package" : ".*[^A-Za-z0-9_-]" >/dev/null && | |
+ { echo "error: invalid package name: $ac_package"; exit 1; } | |
+ ac_package=`echo $ac_package | sed 's/-/_/g'` | |
+ eval "with_$ac_package=no" | |
+ ;; | |
+ --debug=yes|--debug|-d) | |
+ echo "Debug mode enabled." | |
+ with_debug=yes | |
+ ;; | |
+ --debug=no) | |
+ echo "Debug mode disabled." | |
+ with_debug=no | |
+ ;; | |
+ --help|-help|-h) | |
+ echo "usage: $0 [ options ]" | |
+ echo "available options:" | |
+ echo " --debug=yes (or -d) Enable debugging support." | |
+ echo " --debug=no Disable debugging support." | |
+ echo " --with-package Enable support for package in default lo… | |
+ echo " --with-package=path Enable support for package installed in … | |
+ echo " --without-package Disable support for package." | |
+ echo " --help (or -h) Show this message." | |
+ echo "" | |
+ echo "relevant packages:" | |
+ echo " libdb = BerkeleyDB" | |
+ echo " mysql = MySQL database" | |
+ exit 1 | |
+ ;; | |
+ *) | |
+ echo "Unknown option '$1', try -h for help" | |
+ exit 1 | |
+ ;; | |
+ esac | |
+ shift | |
+done | |
+ | |
+echo "Examining system setup..." | |
+ | |
+# Some known configs: | |
+# | |
+# uname -s uname -r uname -m uname -p | |
+# ======== =========== ======== ======== | |
+# SunOS 5.6 sun4u sparc | |
+# Linux 2.2.17 i686 unknown | |
+# FreeBSD 4.1-RELEASE i386 i386 | |
+ | |
+UNAME_S=`uname -s` | |
+UNAME_R=`uname -r` | |
+ | |
+case "$UNAME_S" in | |
+ Linux) | |
+ if [ "$with_debug" = "yes" ]; then | |
+ CCDBG="-g" | |
+ LDDBG="-g" | |
+ else | |
+ CCDBG="-DNDEBUG" | |
+ LDDBG="" | |
+ fi | |
+ CC=gcc | |
+ CFLAGS="$CCDBG -D_UNIX -D_LINUX -Wall" | |
+ CXX=g++ | |
+ CXXFLAGS=${CFLAGS} | |
+ AR=ar | |
+ ARFLAGS="-rc" | |
+ LD=gcc | |
+ LDFLAGS="$LDDBG" | |
+ SYSLIBS="" | |
+ LIBDB_LIB="-ldb" | |
+ MYSQL_LIB="-lmysqlclient" | |
+ ;; | |
+ FreeBSD) | |
+ case "$UNAME_R" in | |
+ 2.*) | |
+ OSVER=20 | |
+ ;; | |
+ 3.*) | |
+ OSVER=30 | |
+ ;; | |
+ 4.*) | |
+ OSVER=40 | |
+ ;; | |
+ *) | |
+ # Assume 5.0 + | |
+ OSVER=50 | |
+ ;; | |
+ esac | |
+ if [ "$with_debug" = "yes" ]; then | |
+ CCDBG="-g" | |
+ LDDBG="-g" | |
+ else | |
+ CCDBG="-DNDEBUG" | |
+ LDDBG="" | |
+ fi | |
+ CC=gcc | |
+ CFLAGS="$CCDBG -D_UNIX -D_BSD=$OSVER -Wall" | |
+ CXX=g++ | |
+ CXXFLAGS=${CFLAGS} | |
+ AR=ar | |
+ ARFLAGS="-rc" | |
+ LD=gcc | |
+ LDFLAGS="$LDDBG" | |
+ SYSLIBS="" | |
+ LIBDB_LIB="" | |
+ MYSQL_LIB="-lmysqlclient" | |
+ ;; | |
+ OpenBSD) | |
+ # I'm guessing OpenBSD looks mostly like FreeBSD 4.x | |
+ OSVER=40 | |
+ if [ "$with_debug" = "yes" ]; then | |
+ CCDBG="-g" | |
+ LDDBG="-g" | |
+ else | |
+ CCDBG="-DNDEBUG" | |
+ LDDBG="" | |
+ fi | |
+ CC=gcc | |
+ CFLAGS="$CCDBG -D_UNIX -D_BSD=$OSVER -Wall" | |
+ CXX=g++ | |
+ CXXFLAGS=${CFLAGS} | |
+ AR=ar | |
+ ARFLAGS="-rc" | |
+ LD=gcc | |
+ LDFLAGS="$LDDBG" | |
+ SYSLIBS="" | |
+ LIBDB_LIB="" | |
+ MYSQL_LIB="-lmysqlclient" | |
+ ;; | |
+ Darwin) | |
+ # I'm guessing Darwin looks mostly like FreeBSD 4.x | |
+ OSVER=40 | |
+ if [ "$with_debug" = "yes" ]; then | |
+ CCDBG="-g" | |
+ LDDBG="-g" | |
+ else | |
+ CCDBG="-DNDEBUG" | |
+ LDDBG="" | |
+ fi | |
+ CC=cc | |
+ CFLAGS="$CCDBG -D_UNIX -D_BSD=$OSVER -DNOLOCK -Wall" | |
+ CXX=c++ | |
+ CXXFLAGS=${CFLAGS} | |
+ AR=ar | |
+ ARFLAGS="-rc" | |
+ LD=cc | |
+ LDFLAGS="$LDDBG" | |
+ SYSLIBS="" | |
+ LIBDB_LIB="" | |
+ MYSQL_LIB="-lmysqlclient" | |
+ ;; | |
+ SunOS) | |
+ case "$UNAME_R" in | |
+ 5.6) | |
+ OSVER=56 | |
+ ;; | |
+ 5.7) | |
+ OSVER=57 | |
+ ;; | |
+ 5.8) | |
+ OSVER=58 | |
+ ;; | |
+ *) | |
+ # Assume 9.0+ | |
+ OSVER=59 | |
+ ;; | |
+ esac | |
+ # Prefer gcc to the native cc here because I haven't tested with the | |
+ # native compiler yet. | |
+ type gcc >/dev/null 2>&1 | |
+ if [ $? -eq 0 ]; then | |
+ if [ "$with_debug" = "yes" ]; then | |
+ CCDBG="-g" | |
+ LDDBG="-g" | |
+ else | |
+ CCDBG="-DNDEBUG" | |
+ LDDBG="" | |
+ fi | |
+ CC=gcc | |
+ CFLAGS="$CCDBG -D_UNIX -D_SOLARIS=$OSVER -Wall" | |
+ CXX=g++ | |
+ CXXFLAGS=${CFLAGS} | |
+ LD=gcc | |
+ LDFLAGS="$LDDBG" | |
+ else | |
+ if [ "$with_debug" = "yes" ]; then | |
+ CCDBG="-g" | |
+ LDDBG="-g" | |
+ else | |
+ CCDBG="-DNDEBUG" | |
+ LDDBG="" | |
+ fi | |
+ CC=cc | |
+ CFLAGS="-DNDEBUG -D_UNIX -D_SOLARIS=$OSVER" | |
+ CXX=c++ | |
+ CXXFLAGS=${CFLAGS} | |
+ LD=ld | |
+ LDFLAGS="" | |
+ fi | |
+ AR=ar | |
+ ARFLAGS="-rc" | |
+ SYSLIBS="" | |
+ LIBDB_LIB="-ldb" | |
+ MYSQL_LIB="-lmysqlclient" | |
+ ;; | |
+ OSF1) | |
+ if [ "$with_debug" = "yes" ]; then | |
+ CCDBG="-g2" | |
+ LDDBG="-g2" | |
+ else | |
+ CCDBG="-DNDEBUG" | |
+ LDDBG="" | |
+ fi | |
+ CC=cc | |
+ CFLAGS="$CCDBG -D_UNIX -D_OSF" | |
+ CXX=cxx | |
+ CXXFLAGS="-noexceptions ${CFLAGS}" | |
+ AR=ar | |
+ ARFLAGS="-rc" | |
+ LD=ld | |
+ LDFLAGS="" | |
+ SYSLIBS="-lm" | |
+ LIBDB_LIB="-ldb" | |
+ MYSQL_LIB="-lmysqlclient" | |
+ ;; | |
+ *) | |
+ if [ "$with_debug" = "yes" ]; then | |
+ CCDBG="-g" | |
+ LDDBG="-g" | |
+ else | |
+ CCDBG="-DNDEBUG" | |
+ LDDBG="" | |
+ fi | |
+ CC=cc | |
+ CFLAGS="$CCDBG -DNDEBUG -D_UNIX" | |
+ CXX=c++ | |
+ CXXFLAGS=${CFLAGS} | |
+ AR=ar | |
+ ARFLAGS="-rc" | |
+ LD=ld | |
+ LDFLAGS="$LDDBG" | |
+ SYSLIBS="" | |
+ LIBDB_LIB="-ldb" | |
+ MYSQL_LIB="-lmysqlclient" | |
+ ;; | |
+esac | |
+ | |
+echo -n "Looking for compiler... " | |
+type $CC >/dev/null 2>&1 | |
+if [ $? -ne 0 ]; then | |
+ echo "compiler '$CC' not found!" | |
+ exit 1 | |
+else | |
+ echo "$CC is executable." | |
+fi | |
+ | |
+echo -n "Checking for BerkeleyDB... " | |
+if [ "$with_libdb" = "test" ]; then | |
+ if [ -e /usr/local/BerkeleyDB.4.1/include/db.h ]; then | |
+ with_libdb=/usr/local/BerkeleyDB.4.1 | |
+ LIBDB_LIB="-ldb" | |
+ elif [ -e /usr/include/db.h -o \ | |
+ -e /usr/local/include/db.h ]; then | |
+ with_libdb=yes | |
+ else | |
+ with_libdb=no | |
+ fi | |
+fi | |
+if [ "$with_libdb" = "no" ]; then | |
+ echo "disabled." | |
+else | |
+ if [ "$with_libdb" != "yes" ]; then | |
+ CFLAGS="$CFLAGS -I$with_libdb/include" | |
+ LDFLAGS="$LDFLAGS -L$with_libdb/lib" | |
+ fi | |
+ CFLAGS="$CFLAGS -DHAVE_LIBDB" | |
+ LDFLAGS="$LDFLAGS $LIBDB_LIB" | |
+ echo "enabled." | |
+fi | |
+ | |
+echo -n "Checking for MySQL... " | |
+if [ "$with_mysql" = "test" ]; then | |
+ type mysql_config >/dev/null 2>&1 | |
+ if [ $? -eq 0 ]; then | |
+ with_mysql=yes | |
+ CFLAGS="$CFLAGS `mysql_config --cflags`" | |
+ LDFLAGS="$LDFLAGS `mysql_config --libs`" | |
+ # mysql_config will add -lmysqlclient, don't add it twice | |
+ MYSQL_LIB="" | |
+ else | |
+ with_mysql=no | |
+ fi | |
+fi | |
+if [ "$with_mysql" = "no" ]; then | |
+ echo "disabled." | |
+else | |
+ if [ "$with_mysql" != "yes" ]; then | |
+ CFLAGS="$CFLAGS -I$with_mysql/include" | |
+ LDFLAGS="$LDFLAGS -L$with_mysql/lib" | |
+ fi | |
+ CFLAGS="$CFLAGS -DHAVE_MYSQL" | |
+ LDFLAGS="$LDFLAGS $MYSQL_LIB" | |
+ echo "enabled." | |
+fi | |
+ | |
+for D in .; do | |
+ cat $D/Makefile.in | sed "s%@CC@%${CC}%g | |
+ s%@CXX@%${CXX}%g | |
+ s%@AR@%${AR}%g | |
+ s%@LD@%${LD}%g | |
+ s%@CFLAGS@%${CFLAGS}%g | |
+ s%@CXXFLAGS@%${CXXFLAGS}%g | |
+ s%@ARFLAGS@%${ARFLAGS}%g | |
+ s%@LDFLAGS@%${LDFLAGS}%g | |
+ s%@SYSLIBS@%${SYSLIBS}%" > $D/Makefile || exit 1 | |
+done | |
+ | |
+echo "Configuration successful." | |
+echo "Now run 'make all' and 'make install'." | |
+echo "" | |
diff --git a/dbdb.c b/dbdb.c | |
@@ -0,0 +1,684 @@ | |
+/* $Id: dbdb.c,v 1.22 2002/10/19 09:59:35 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * dbdb.c: berkeley database handler | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "lex.h" | |
+#include "vec.h" | |
+ | |
+#include "dbh.h" | |
+#include "dbdb.h" | |
+ | |
+#ifdef HAVE_LIBDB | |
+ | |
+#define DBT_init( pdbt ) memset( pdbt, 0, sizeof(DBT) ) | |
+ | |
+#if !defined(DB_VERSION_MAJOR) /* v1 */ | |
+#define dbx_get(dbp,kp,vp) dbp->get( dbp, kp, vp, 0 ) | |
+#define dbx_put(dbp,kp,vp) dbp->put( dbp, kp, vp, 0 ) | |
+#define dbx_fd(dbp,fd) fd = dbp->fd( dbp ) | |
+#else /* v2+ */ | |
+#define dbx_get(dbp,kp,vp) dbp->get( dbp, NULL, kp, vp, 0 ) | |
+#define dbx_put(dbp,kp,vp) dbp->put( dbp, NULL, kp, vp, 0 ) | |
+#define dbx_fd(dbp,fd) dbp->fd( dbp, &fd ) | |
+#endif /* DB_VERSION_MAJOR */ | |
+ | |
+#if !defined(DB_VERSION_MAJOR) /* v1 */ | |
+typedef DB DBC; /* no separate cursor type */ | |
+#define dbx_createcursor(dbp,dbcp) ((dbcp = dbp) ? 0 : -1) | |
+#define dbx_destroycursor(dbcp) (dbcp = NULL) | |
+#define dbx_first(dbcp,kp,vp) dbcp->seq(dbcp,kp,vp,R_FIRST) | |
+#define dbx_next(dbcp,kp,vp) dbcp->seq(dbcp,kp,vp,R_NEXT) | |
+#define dbx_prev(dbcp,kp,vp) dbcp->seq(dbcp,kp,vp,R_PREV) | |
+#define dbx_last(dbcp,kp,vp) dbcp->seq(dbcp,kp,vp,R_LAST) | |
+#elif DB_VERSION_MAJOR == 2 | |
+#define dbx_createcursor(dbp,dbcp) dbp->cursor(dbp,NULL,&csrp) | |
+#define dbx_destroycursor(dbcp) dbcp->c_close(dbcp) | |
+#define dbx_first(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_FIRST) | |
+#define dbx_next(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_NEXT) | |
+#define dbx_prev(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_PREV) | |
+#define dbx_last(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_LAST) | |
+#else /* v3+ */ | |
+#define dbx_createcursor(dbp,dbcp) dbp->cursor(dbp,NULL,&csrp,0) | |
+#define dbx_destroycursor(dbcp) dbcp->c_close(dbcp) | |
+#define dbx_first(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_FIRST) | |
+#define dbx_next(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_NEXT) | |
+#define dbx_prev(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_PREV) | |
+#define dbx_last(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_LAST) | |
+#endif /* DB_VERSION_MAJOR */ | |
+ | |
+static void char2DBT( DBT* pdbt, char* p ) | |
+{ | |
+ pdbt->data = p; | |
+ pdbt->size = strlen(p); | |
+} | |
+ | |
+static void uint2DBT( DBT* pdbt, uint* p ) | |
+{ | |
+ pdbt->data = p; | |
+ pdbt->size = sizeof(uint); | |
+} | |
+ | |
+static uint DBT2uint( DBT* pdbt ) | |
+{ | |
+ uint n; | |
+ memcpy( &n, pdbt->data, sizeof(n) ); | |
+ return n; | |
+} | |
+ | |
+dbh_t* dbdb_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpas… | |
+{ | |
+ dbhdb_t* pthis; | |
+ | |
+ uint dirlen; | |
+ cpchar phome; | |
+ struct stat st; | |
+ | |
+ pthis = (dbhdb_t*)malloc( sizeof(dbhdb_t) ); | |
+ if( pthis == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ pthis->close = dbdb_db_close; | |
+ pthis->opentable = dbdb_db_opentable; | |
+ if( dbname != NULL && *dbname != '\0' ) | |
+ { | |
+ dirlen = strlen( dbname ); | |
+ pthis->dir = strdup( dbname ); | |
+ if( pthis->dir[dirlen-1] == '/' ) | |
+ { | |
+ pthis->dir[dirlen-1] = '\0'; | |
+ } | |
+ } | |
+ else | |
+ { | |
+ phome = getenv( "HOME" ); | |
+ if( phome == NULL || *phome == '\0' ) | |
+ { | |
+ phome = "."; | |
+ } | |
+ pthis->dir = (char*)malloc( strlen(phome)+5+1 ); | |
+ if( pthis->dir == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ sprintf( pthis->dir, "%s/.bmf", phome ); | |
+ } | |
+ | |
+ /* ensure config directory exists */ | |
+ if( stat( pthis->dir, &st ) != 0 ) | |
+ { | |
+ if( errno == ENOENT ) | |
+ { | |
+ if( mkdir( pthis->dir, S_IRUSR|S_IWUSR|S_IXUSR ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ else | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ else | |
+ { | |
+ if( !S_ISDIR( st.st_mode ) ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ | |
+#if !defined(DB_VERSION_MAJOR) || DB_VERSION_MAJOR < 3 | |
+ /* no initialization */ | |
+#else /* DB_VERSION_MAJOR >= 3 */ | |
+ if( db_env_create( &pthis->envp, 0 ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( pthis->envp->open( pthis->envp, pthis->dir, DB_INIT_LOCK|DB_INIT_MPOOL… | |
+ { | |
+ goto bail; | |
+ } | |
+#endif /* DB_VERSION_MAJOR */ | |
+ | |
+ return (dbh_t*)pthis; | |
+ | |
+bail: | |
+ free( pthis ); | |
+ return NULL; | |
+} | |
+ | |
+bool_t dbdb_db_close( dbhdb_t* pthis ) | |
+{ | |
+#if !defined(DB_VERSION_MAJOR) || DB_VERSION_MAJOR < 3 | |
+ /* no cleanup */ | |
+#else /* DB_VERSION_MAJOR >= 3 */ | |
+ pthis->envp->close( pthis->envp, 0 ); | |
+#endif /* DB_VERSION_MAJOR */ | |
+ | |
+ free( pthis->dir ); | |
+ pthis->dir = NULL; | |
+ | |
+ return true; | |
+} | |
+ | |
+dbt_t* dbdb_db_opentable( dbhdb_t* pthis, cpchar table, bool_t rdonly ) | |
+{ | |
+ dbtdb_t* ptable; | |
+ DB* dbp; | |
+ DBT key; | |
+ DBT val; | |
+ | |
+ char szpath[PATH_MAX]; | |
+ | |
+ ptable = (dbtdb_t*)malloc( sizeof(dbtdb_t) ); | |
+ if( ptable == NULL ) | |
+ { | |
+ return NULL; | |
+ } | |
+ ptable->close = dbdb_table_close; | |
+ ptable->mergeclose = dbdb_table_mergeclose; | |
+ ptable->unmergeclose = dbdb_table_unmergeclose; | |
+ ptable->import = dbdb_table_import; | |
+ ptable->export = dbdb_table_export; | |
+ ptable->getmsgcount = dbdb_table_getmsgcount; | |
+ ptable->getcount = dbdb_table_getcount; | |
+ ptable->dbp = NULL; | |
+ | |
+ sprintf( szpath, "%s/%s.db", pthis->dir, table ); | |
+#if !defined(DB_VERSION_MAJOR) | |
+ if( (dbp = dbopen( szpath, O_CREAT|O_RDWR, 0644, DB_BTREE, NULL)) == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+#elif DB_VERSION_MAJOR == 2 | |
+ if( db_open( szpath, DB_BTREE, DB_CREATE, 0644, NULL, NULL, &dbp ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+#elif (DB_VERSION_MAJOR == 3) || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR ==… | |
+ ptable->envp = pthis->envp; | |
+ if( db_create( &dbp, NULL, 0 ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( dbp->open( dbp, szpath, NULL, DB_BTREE, DB_CREATE, 0644 ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+#else /* v4.1+ */ | |
+ ptable->envp = pthis->envp; | |
+ if( db_create( &dbp, NULL, 0 ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( dbp->open( dbp, NULL, szpath, NULL, DB_BTREE, DB_CREATE, 0644 ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+#endif /* DB_VERSION_MAJOR */ | |
+ ptable->dbp = dbp; | |
+ | |
+ DBT_init( &key ); | |
+ DBT_init( &val ); | |
+ ptable->nmsgs = 0; | |
+ char2DBT( &key, MSGCOUNT_KEY ); | |
+ if( dbx_get( dbp, &key, &val ) == 0 ) | |
+ { | |
+ ptable->nmsgs = DBT2uint( &val ); | |
+ } | |
+ | |
+ return (dbt_t*)ptable; | |
+ | |
+bail: | |
+ free( ptable ); | |
+ return NULL; | |
+} | |
+ | |
+static bool_t dbdb_table_lock( dbtdb_t* pthis ) | |
+{ | |
+#ifndef NOLOCK | |
+ struct flock lock; | |
+ int fd; | |
+ | |
+ dbx_fd( pthis->dbp, fd ); | |
+ memset( &lock, 0, sizeof(lock) ); | |
+ lock.l_type = F_WRLCK; | |
+ lock.l_start = 0; | |
+ lock.l_whence = SEEK_SET; | |
+ lock.l_len = 0; | |
+ if( fcntl( fd, F_SETLKW, &lock ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+#endif /* ndef NOLOCK */ | |
+ return true; | |
+} | |
+ | |
+static bool_t dbdb_table_unlock( dbtdb_t* pthis ) | |
+{ | |
+#ifndef NOLOCK | |
+ struct flock lock; | |
+ int fd; | |
+ | |
+ dbx_fd( pthis->dbp, fd ); | |
+ memset( &lock, 0, sizeof(lock) ); | |
+ lock.l_type = F_UNLCK; | |
+ lock.l_start = 0; | |
+ lock.l_whence = SEEK_SET; | |
+ lock.l_len = 0; | |
+ if( fcntl( fd, F_SETLK, &lock ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+#endif /* ndef NOLOCK */ | |
+ return true; | |
+} | |
+ | |
+bool_t dbdb_table_close( dbtdb_t* pthis ) | |
+{ | |
+ DB* dbp = pthis->dbp; | |
+ | |
+ if( dbp != NULL ) | |
+ { | |
+#if !defined(DB_VERSION_MAJOR) /* v1 */ | |
+ dbp->close( dbp ); | |
+#else /* v2+ */ | |
+ dbp->close( dbp, 0 ); | |
+#endif /* DB_VERSION_MAJOR */ | |
+ pthis->dbp = NULL; | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+bool_t dbdb_table_mergeclose( dbtdb_t* pthis, vec_t* pmsg ) | |
+{ | |
+ DB* dbp = pthis->dbp; | |
+ DBT key; | |
+ DBT val; | |
+ | |
+ char szword[MAXWORDLEN+1]; | |
+ uint count; | |
+ veciter_t msgiter; | |
+ str_t* pmsgstr; | |
+ | |
+ if( pthis->dbp == NULL ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ if( !dbdb_table_lock( pthis ) ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ pthis->nmsgs++; | |
+ | |
+ DBT_init( &key ); | |
+ DBT_init( &val ); | |
+ | |
+ char2DBT( &key, MSGCOUNT_KEY ); | |
+ uint2DBT( &val, &pthis->nmsgs ); | |
+ dbx_put( dbp, &key, &val ); | |
+ | |
+ vec_first( pmsg, &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ | |
+ while( pmsgstr != NULL ) | |
+ { | |
+ assert( pmsgstr->len <= MAXWORDLEN ); | |
+ strncpylwr( szword, pmsgstr->p, pmsgstr->len ); | |
+ szword[pmsgstr->len] = '\0'; | |
+ count = db_getnewcount( &msgiter ); | |
+ | |
+ char2DBT( &key, szword ); | |
+ if( dbx_get( dbp, &key, &val ) == 0 ) | |
+ { | |
+ count += DBT2uint( &val ); | |
+ } | |
+ char2DBT( &key, szword ); | |
+ uint2DBT( &val, &count ); | |
+ if( dbx_put( dbp, &key, &val ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ veciter_next( &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ } | |
+ | |
+ veciter_destroy( &msgiter ); | |
+ dbdb_table_unlock( pthis ); | |
+ return dbdb_table_close( pthis ); | |
+ | |
+bail: | |
+ return false; | |
+} | |
+ | |
+bool_t dbdb_table_unmergeclose( dbtdb_t* pthis, vec_t* pmsg ) | |
+{ | |
+ DB* dbp = pthis->dbp; | |
+ DBT key; | |
+ DBT val; | |
+ | |
+ char szword[MAXWORDLEN+1]; | |
+ uint count; | |
+ veciter_t msgiter; | |
+ str_t* pmsgstr; | |
+ | |
+ if( pthis->dbp == NULL ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ if( pthis->nmsgs > 0 ) | |
+ { | |
+ pthis->nmsgs--; | |
+ } | |
+ | |
+ if( !dbdb_table_lock( pthis ) ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ DBT_init( &key ); | |
+ DBT_init( &val ); | |
+ | |
+ char2DBT( &key, MSGCOUNT_KEY ); | |
+ uint2DBT( &val, &pthis->nmsgs ); | |
+ dbx_put( dbp, &key, &val ); | |
+ | |
+ vec_first( pmsg, &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ | |
+ while( pmsgstr != NULL ) | |
+ { | |
+ assert( pmsgstr->len <= MAXWORDLEN ); | |
+ strncpylwr( szword, pmsgstr->p, pmsgstr->len ); | |
+ szword[pmsgstr->len] = '\0'; | |
+ count = db_getnewcount( &msgiter ); | |
+ | |
+ char2DBT( &key, szword ); | |
+ if( dbx_get( dbp, &key, &val ) == 0 ) | |
+ { | |
+ uint n = DBT2uint( &val ); | |
+ n = (n > count) ? (n - count) : 0; | |
+ char2DBT( &key, szword ); | |
+ uint2DBT( &val, &n ); | |
+ if( dbx_put( dbp, &key, &val ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ | |
+ veciter_next( &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ } | |
+ | |
+ veciter_destroy( &msgiter ); | |
+ dbdb_table_unlock( pthis ); | |
+ return dbdb_table_close( pthis ); | |
+ | |
+bail: | |
+ return false; | |
+} | |
+ | |
+bool_t dbdb_table_import( dbtdb_t* pthis, cpchar filename ) | |
+{ | |
+ DB* dbp = pthis->dbp; | |
+ int fd; | |
+ struct stat st; | |
+ char* pbuf; | |
+ char* pbegin; | |
+ char* pend; | |
+ rec_t r; | |
+ DBT key; | |
+ DBT val; | |
+ char szword[MAXWORDLEN+1]; | |
+ | |
+ if( pthis->dbp == NULL ) | |
+ { | |
+ return false; | |
+ } | |
+ if( (fd = open( filename, O_RDONLY, 0644 )) < 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ if( fstat( fd, &st ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( st.st_size == 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ pbuf = (char*)malloc( st.st_size ); | |
+ if( pbuf == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( read( fd, pbuf, st.st_size ) != st.st_size ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ DBT_init( &key ); | |
+ DBT_init( &val ); | |
+ | |
+ if( sscanf( pbuf, BOGOFILTER_HEADER, &pthis->nmsgs ) != 1 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ pbegin = pbuf; | |
+ while( *pbegin != '\n' ) pbegin++; | |
+ pbegin++; | |
+ | |
+ char2DBT( &key, MSGCOUNT_KEY ); | |
+ uint2DBT( &val, &pthis->nmsgs ); | |
+ if( dbx_put( dbp, &key, &val ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ while( pbegin < pbuf + st.st_size ) | |
+ { | |
+ pend = pbegin; | |
+ r.w.p = pbegin; | |
+ r.w.len = 0; | |
+ r.n = 0; | |
+ | |
+ while( *pend != '\n' ) | |
+ { | |
+ if( pend >= pbuf + st.st_size ) | |
+ { | |
+ goto bail; | |
+ } | |
+ *pend = tolower(*pend); | |
+ if( *pend == ' ' ) | |
+ { | |
+ r.w.len = (pend-pbegin); | |
+ r.n = strtol( pend+1, NULL, 10 ); | |
+ } | |
+ pend++; | |
+ } | |
+ if( pend > pbegin && *pbegin != '#' && *pbegin != ';' ) | |
+ { | |
+ if( r.w.len == 0 || r.w.len > MAXWORDLEN ) | |
+ { | |
+ fprintf( stderr, "dbh_loadfile: bad file format\n" ); | |
+ goto bail; | |
+ } | |
+ strncpylwr( szword, r.w.p, r.w.len ); | |
+ szword[r.w.len] = '\0'; | |
+ char2DBT( &key, szword ); | |
+ uint2DBT( &val, &r.n ); | |
+ if( dbx_put( dbp, &key, &val ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ pbegin = pend+1; | |
+ } | |
+ | |
+ free( pbuf ); | |
+ close( fd ); | |
+ | |
+ return true; | |
+ | |
+bail: | |
+ return false; | |
+} | |
+ | |
+bool_t dbdb_table_export( dbtdb_t* pthis, cpchar filename ) | |
+{ | |
+ DB* dbp = pthis->dbp; | |
+ int fd; | |
+ char iobuf[IOBUFSIZE]; | |
+ char* p; | |
+ | |
+ DBC* csrp; | |
+ int rc; | |
+ DBT key; | |
+ DBT val; | |
+ | |
+ if( (fd = open( filename, O_CREAT|O_WRONLY|O_TRUNC, 0644 )) < 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( dbx_createcursor( dbp, csrp ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ DBT_init( &key ); | |
+ DBT_init( &val ); | |
+ | |
+ p = iobuf; | |
+ p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs ); | |
+ | |
+ rc = dbx_first( csrp, &key, &val ); | |
+ while( rc == 0 ) | |
+ { | |
+ assert( key.data != NULL && key.size <= MAXWORDLEN ); | |
+ assert( val.data != NULL && val.size == sizeof(uint) ); | |
+ if( key.size != MSGCOUNT_KEY_LEN || | |
+ memcmp( key.data, MSGCOUNT_KEY, MSGCOUNT_KEY_LEN ) != 0 ) | |
+ { | |
+ memcpy( p, key.data, key.size ); p += key.size; | |
+ *p++ = ' '; | |
+ p += sprintf( p, "%u\n", DBT2uint(&val) ); | |
+ if( p+TEXTDB_MAXLINELEN > (iobuf+1) ) | |
+ { | |
+ write( fd, iobuf, p-iobuf ); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ rc = dbx_next( csrp, &key, &val ); | |
+ } | |
+ dbx_destroycursor( csrp ); | |
+ if( p != iobuf ) | |
+ { | |
+ write( fd, iobuf, p-iobuf ); | |
+ } | |
+ close( fd ); | |
+ return true; | |
+ | |
+bail: | |
+ return false; | |
+} | |
+ | |
+uint dbdb_table_getmsgcount( dbtdb_t* pthis ) | |
+{ | |
+ return pthis->nmsgs; | |
+} | |
+ | |
+uint dbdb_table_getcount( dbtdb_t* pthis, str_t* pword ) | |
+{ | |
+ DB* dbp = pthis->dbp; | |
+ DBT key; | |
+ DBT val; | |
+ | |
+ char szword[MAXWORDLEN+1]; | |
+ uint count = 0; | |
+ | |
+ assert( pword->len <= MAXWORDLEN ); | |
+ strncpylwr( szword, pword->p, pword->len ); | |
+ szword[pword->len] = '\0'; | |
+ count = 0; | |
+ | |
+ DBT_init( &key ); | |
+ DBT_init( &val ); | |
+ | |
+ char2DBT( &key, szword ); | |
+ if( dbx_get( dbp, &key, &val ) == 0 ) | |
+ { | |
+ count = DBT2uint( &val ); | |
+ } | |
+ | |
+ return count; | |
+} | |
+ | |
+#else /* def HAVE_LIBDB */ | |
+ | |
+dbh_t* dbdb_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpas… | |
+{ | |
+ return NULL; | |
+} | |
+ | |
+#endif /* def HAVE_LIBDB */ | |
+ | |
+#ifdef UNIT_TEST | |
+int main( int argc, char** argv ) | |
+{ | |
+ dbh_t* pdb; | |
+ veciter_t iter; | |
+ str_t* pstr; | |
+ uint n; | |
+ | |
+ if( argc != 2 ) | |
+ { | |
+ fprintf( stderr, "usage: %s <file>\n", argv[0] ); | |
+ return 1; | |
+ } | |
+ | |
+ for( n = 0; n < 100; n++ ) | |
+ { | |
+ pdb = dbh_open( "testlist", true ); | |
+ | |
+ vec_first( &db, &iter ); | |
+ while( (pstr = veciter_get( &iter )) != NULL ) | |
+ { | |
+ char buf[MAXWORDLEN+32]; | |
+ char* p; | |
+ if( pstr->len > 200 ) | |
+ { | |
+ fprintf( stderr, "str too long: %u chars\n", pstr->len ); | |
+ break; | |
+ } | |
+ p = buf; | |
+ strcpy( buf, "str: " ); | |
+ p += 6; | |
+ memcpy( p, pstr->p, pstr->len ); | |
+ p += pstr->len; | |
+ sprintf( p, " %u", pstr->count ); | |
+ puts( buf ); | |
+ | |
+ veciter_next( &iter ); | |
+ } | |
+ | |
+ dbh_close( &db ); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+#endif /* def UNIT_TEST */ | |
diff --git a/dbdb.h b/dbdb.h | |
@@ -0,0 +1,61 @@ | |
+/* $Id: dbdb.h,v 1.7 2002/10/14 22:17:19 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _DBDB_H | |
+#define _DBDB_H | |
+ | |
+#ifdef HAVE_LIBDB | |
+ | |
+#include <db.h> | |
+ | |
+typedef struct _dbtdb dbtdb_t; | |
+struct _dbtdb | |
+{ | |
+ bool_t (*close)(dbtdb_t*); | |
+ bool_t (*mergeclose)(dbtdb_t*,vec_t*); | |
+ bool_t (*unmergeclose)(dbtdb_t*,vec_t*); | |
+ bool_t (*import)(dbtdb_t*,cpchar); | |
+ bool_t (*export)(dbtdb_t*,cpchar); | |
+ uint (*getmsgcount)(dbtdb_t*); | |
+ uint (*getcount)(dbtdb_t*,str_t*); | |
+ | |
+ DB* dbp; /* db handle */ | |
+#if defined(DB_VERSION_MAJOR) && DB_VERSION_MAJOR >= 3 | |
+ DB_ENV* envp; /* we don't own this */ | |
+#endif /* DB_VERSION_MAJOR */ | |
+ uint nmsgs; /* number of messages in table (cached) */ | |
+}; | |
+ | |
+typedef struct _dbhdb dbhdb_t; | |
+struct _dbhdb | |
+{ | |
+ bool_t (*close)(dbhdb_t*); | |
+ dbt_t* (*opentable)(dbhdb_t*,cpchar,bool_t); | |
+ | |
+ char* dir; /* directory for db files */ | |
+#if defined(DB_VERSION_MAJOR) && DB_VERSION_MAJOR >= 3 | |
+ DB_ENV* envp; /* db environment */ | |
+#endif /* DB_VERSION_MAJOR */ | |
+}; | |
+ | |
+dbh_t* dbdb_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpa… | |
+bool_t dbdb_db_close( dbhdb_t* pthis ); | |
+dbt_t* dbdb_db_opentable( dbhdb_t* pthis, cpchar table, bool_t rdonly ); | |
+ | |
+bool_t dbdb_table_close( dbtdb_t* pthis ); | |
+bool_t dbdb_table_mergeclose( dbtdb_t* pthis, vec_t* pmsg ); | |
+bool_t dbdb_table_unmergeclose( dbtdb_t* pthis, vec_t* pmsg ); | |
+bool_t dbdb_table_import( dbtdb_t* pthis, cpchar filename ); | |
+bool_t dbdb_table_export( dbtdb_t* pthis, cpchar filename ); | |
+uint dbdb_table_getmsgcount( dbtdb_t* pthis ); | |
+uint dbdb_table_getcount( dbtdb_t* pthis, str_t* pword ); | |
+ | |
+#endif /* def HAVE_LIBDB */ | |
+ | |
+#endif /* ndef _DBDB_H */ | |
diff --git a/dbg.c b/dbg.c | |
@@ -0,0 +1,302 @@ | |
+/* $Id: dbg.c,v 1.3 2002/10/19 08:30:57 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * dbg.c: debug functions for bmf. | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include <stdarg.h> | |
+ | |
+ | |
+uint g_verbose = 0; | |
+ | |
+void verbose( int level, const char* fmt, ... ) | |
+{ | |
+ if( g_verbose >= level ) | |
+ { | |
+ char str[4096]; | |
+ va_list v; | |
+ va_start( v, fmt ); | |
+ vsnprintf( str, sizeof(str)-1, fmt, v ); | |
+ str[sizeof(str)-1] = '\0'; | |
+#ifdef _UNIX | |
+ fputs( str, stderr ); | |
+#endif | |
+#ifdef _WIN32 | |
+ ::OutputDebugString( str ); | |
+#endif | |
+ } | |
+} | |
+ | |
+#ifndef NDEBUG | |
+ | |
+void dbgout( const char* fmt, ... ) | |
+{ | |
+ char str[4096]; | |
+ va_list v; | |
+ va_start( v, fmt ); | |
+ vsnprintf( str, sizeof(str)-1, fmt, v ); | |
+ str[sizeof(str)-1] = '\0'; | |
+#ifdef _UNIX | |
+ fputs( str, stderr ); | |
+#endif | |
+#ifdef _WIN32 | |
+ ::OutputDebugString( str ); | |
+#endif | |
+} | |
+ | |
+/* | |
+ * Heap management routines. These routines use unbalanced binary trees to | |
+ * keep track of allocations in an attempt to make them fast yet simple. | |
+ * | |
+ * Each block of memory consists of an alloc_node header, the requested | |
+ * memory block, and guard bytes before and after the requested memory | |
+ * block. The requested memory block is filled with a semi-random byte | |
+ * value to ensure that the caller does not rely on any particular initial | |
+ * bit pattern (eg. a block of zeros or NULLs). It is refilled with a | |
+ * (possibly different) byte value after deallocation to ensure that the | |
+ * caller doesn't attempt to use the freed memory. | |
+ */ | |
+ | |
+/* we need to use the real malloc and free */ | |
+#undef malloc | |
+#undef free | |
+ | |
+typedef struct _alloc_node | |
+{ | |
+ struct _alloc_node* lptr; | |
+ struct _alloc_node* rptr; | |
+ size_t len; | |
+ cpchar file; | |
+ uint line; | |
+} alloc_node; | |
+ | |
+static alloc_node* g_heap = NULL; | |
+ | |
+/* Our magic guard bytes */ | |
+static byte g_guard[] = | |
+{ | |
+ 0xDE, 0xAD, 0xBE, 0xEF, 0xDE, 0xAD, 0xBE, 0xEF, | |
+ 0xDE, 0xAD, 0xBE, 0xEF, 0xDE, 0xAD, 0xBE, 0xEF | |
+}; | |
+ | |
+void* debug_malloc( cpchar file, uint line, size_t n, int fill ) | |
+{ | |
+ byte* pmem = NULL; | |
+ alloc_node* pnode; | |
+ | |
+ pmem = NULL; | |
+ if( n == 0 ) | |
+ { | |
+ n = 1; | |
+ } | |
+ pnode = (alloc_node*)malloc( n + 2*sizeof(g_guard) + sizeof(alloc_node) ); | |
+ if( pnode != NULL ) | |
+ { | |
+ alloc_node** ppuplink; | |
+ alloc_node* pcur; | |
+ | |
+ pmem = (byte*)pnode + sizeof(alloc_node) + sizeof(g_guard); | |
+ memcpy( pmem - sizeof(g_guard), g_guard, sizeof(g_guard) ); | |
+ memset( pmem, fill, n ); | |
+ memcpy( pmem + n, g_guard, sizeof(g_guard) ); | |
+ | |
+ pnode->lptr = pnode->rptr = NULL; | |
+ pnode->len = n; | |
+ pnode->file = file; | |
+ pnode->line = line; | |
+ ppuplink = &g_heap; | |
+ pcur = g_heap; | |
+ while( pcur != NULL ) | |
+ { | |
+ if( pnode == pcur ) | |
+ { | |
+ dbgout( "%s(%u): *** FATAL: duplicate memory allocated ***\n",… | |
+ assert( false ); | |
+ exit( -1 ); | |
+ } | |
+ if( pnode < pcur ) | |
+ { | |
+ ppuplink = &pcur->lptr; | |
+ pcur = pcur->lptr; | |
+ } | |
+ else | |
+ { | |
+ ppuplink = &pcur->rptr; | |
+ pcur = pcur->rptr; | |
+ } | |
+ } | |
+ *ppuplink = pnode; | |
+ } | |
+ | |
+ return pmem; | |
+} | |
+ | |
+void debug_free( cpchar file, uint line, void* p ) | |
+{ | |
+ alloc_node** ppuplink; | |
+ alloc_node* pcur; | |
+ | |
+ if( p == NULL ) | |
+ { | |
+ return; | |
+ } | |
+ if( g_heap == NULL ) | |
+ { | |
+ dbgout( "%s(%u): *** FATAL: delete with empty heap ***\n", file, line … | |
+ assert( false ); | |
+ exit( -1 ); | |
+ } | |
+ | |
+ ppuplink = &g_heap; | |
+ pcur = g_heap; | |
+ while( pcur != NULL ) | |
+ { | |
+ void* pcurblk = (char*)pcur + sizeof(alloc_node) + sizeof(g_guard); | |
+ if( p == pcurblk ) | |
+ { | |
+ byte* pmem = (byte*)p; | |
+ if( memcmp( pmem - sizeof(g_guard), g_guard, sizeof(g_guard) ) != … | |
+ memcmp( pmem + pcur->len, g_guard, sizeof(g_guard) ) != 0 ) | |
+ { | |
+ dbgout( "%s(%u): *** FATAL: corrupted memory at %p\n", file, l… | |
+ assert( false ); | |
+ exit( -1 ); | |
+ } | |
+ memset( pmem, rand(), pcur->len ); | |
+ if( pcur->lptr && pcur->rptr ) | |
+ { | |
+ /* | |
+ * node has both ptrs so replace it with left child and move | |
+ * right child to bottom right of left child's tree | |
+ */ | |
+ alloc_node* pend = pcur->lptr; | |
+ while( pend->rptr ) pend = pend->rptr; | |
+ *ppuplink = pcur->lptr; | |
+ pend->rptr = pcur->rptr; | |
+ } | |
+ else | |
+ { | |
+ /* move child up */ | |
+ *ppuplink = (pcur->lptr) ? pcur->lptr : pcur->rptr; | |
+ } | |
+ free( pcur ); | |
+ return; | |
+ } | |
+ if( p < pcurblk ) | |
+ { | |
+ ppuplink = &pcur->lptr; | |
+ pcur = pcur->lptr; | |
+ } | |
+ else | |
+ { | |
+ ppuplink = &pcur->rptr; | |
+ pcur = pcur->rptr; | |
+ } | |
+ } | |
+ | |
+ dbgout( "%s(%u): *** FATAL: delete on unalloced memory ***\n", file, line … | |
+ assert( false ); | |
+ exit( -1 ); | |
+} | |
+ | |
+void* debug_realloc( cpchar file, uint line, void* p, size_t n ) | |
+{ | |
+ void* pnew; | |
+ | |
+ if( p == NULL ) | |
+ { | |
+ pnew = debug_malloc( file, line, n, rand() ); | |
+ } | |
+ else if( n == 0 ) | |
+ { | |
+ debug_free( file, line, p ); | |
+ pnew = NULL; | |
+ } | |
+ else | |
+ { | |
+ alloc_node* pnode = (alloc_node*)((char*)p-sizeof(g_guard)-sizeof(allo… | |
+ pnew = debug_malloc( file, line, n, rand() ); | |
+ if( pnew != NULL ) | |
+ { | |
+ memcpy( pnew, p, pnode->len ); | |
+ debug_free( file, line, p ); | |
+ } | |
+ } | |
+ | |
+ return pnew; | |
+} | |
+ | |
+char* debug_strdup( cpchar file, uint line, cpchar s ) | |
+{ | |
+ char* s2; | |
+ uint sl = strlen(s); | |
+ | |
+ s2 = (char*)debug_malloc( file, line, sl+1, 0 ); | |
+ memcpy( s2, s, sl ); | |
+ s2[sl] = '\0'; | |
+ | |
+ return s2; | |
+} | |
+ | |
+char* debug_strndup( cpchar file, uint line, cpchar s, size_t n ) | |
+{ | |
+ char* s2; | |
+ uint sl = strlen(s); | |
+ | |
+ sl = min( n-1, sl ); | |
+ s2 = (char*)debug_malloc( file, line, sl+1, 0 ); | |
+ memcpy( s2, s, sl ); | |
+ s2[sl] = '\0'; | |
+ | |
+ return s2; | |
+} | |
+ | |
+static void walk_alloc_tree( alloc_node* pcur, size_t* pttl ) | |
+{ | |
+ if( pcur != NULL ) | |
+ { | |
+ walk_alloc_tree( pcur->lptr, pttl ); | |
+ dbgout( "%s(%u): %u bytes at %p\n", pcur->file, pcur->line, | |
+ pcur->len, pcur+sizeof(alloc_node)+sizeof(g_guard) ); | |
+ *pttl += pcur->len; | |
+ walk_alloc_tree( pcur->rptr, pttl ); | |
+ } | |
+} | |
+ | |
+void dump_alloc_heap( void ) | |
+{ | |
+ if( g_heap != NULL ) | |
+ { | |
+ size_t ttl = 0; | |
+ dbgout( "\n" ); | |
+ dbgout( "Memory leaks detected\n" ); | |
+ dbgout( "=====================\n" ); | |
+ dbgout( "\n" ); | |
+ walk_alloc_tree( g_heap, &ttl ); | |
+ dbgout( "\n" ); | |
+ dbgout( "=====================\n" ); | |
+ dbgout( "Total bytes: %u\n", ttl ); | |
+ dbgout( "=====================\n" ); | |
+ } | |
+} | |
+ | |
+#else /* ndef NDEBUG */ | |
+ | |
+void dbgout( const char* fmt, ... ) | |
+{ | |
+ /* empty */ | |
+} | |
+ | |
+void dump_alloc_heap( void ) | |
+{ | |
+ /* empty */ | |
+} | |
+ | |
+#endif /* ndef NDEBUG */ | |
diff --git a/dbg.h b/dbg.h | |
@@ -0,0 +1,35 @@ | |
+/* $Id: dbg.h,v 1.1 2002/10/14 07:09:51 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _DBG_H | |
+#define _DBG_H | |
+ | |
+extern uint g_verbose; | |
+ | |
+void verbose( int level, const char* fmt, ... ); | |
+ | |
+void dbgout( const char* fmt, ... ); | |
+void dump_alloc_heap( void ); | |
+ | |
+#ifndef NDEBUG | |
+void* debug_malloc ( cpchar file, uint line, size_t n, int fill ); | |
+void debug_free ( cpchar file, uint line, void* p ); | |
+void* debug_realloc ( cpchar file, uint line, void* p, size_t n ); | |
+char* debug_strdup ( cpchar file, uint line, cpchar s ); | |
+char* debug_strndup ( cpchar file, uint line, cpchar s, size_t n ); | |
+ | |
+#define malloc(n) debug_malloc (__FILE__,__LINE__,n,rand()) | |
+#define calloc(n) debug_calloc (__FILE__,__LINE__,n,0) | |
+#define free(p) debug_free (__FILE__,__LINE__,p) | |
+#define realloc(p,n) debug_realloc (__FILE__,__LINE__,p,n) | |
+#define strdup(s) debug_strdup (__FILE__,__LINE__,s) | |
+#define strndup(s,n) debug_strndup (__FILE__,__LINE__,s,n) | |
+#endif /* ndef NDEBUG */ | |
+ | |
+#endif /* ndef _DBG_H */ | |
diff --git a/dbh.c b/dbh.c | |
@@ -0,0 +1,74 @@ | |
+/* $Id: dbh.c,v 1.2 2002/10/14 07:09:51 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * dbh.c: database handler interface | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "lex.h" | |
+#include "vec.h" | |
+ | |
+#include "dbh.h" | |
+#include "dbtext.h" | |
+#include "dbdb.h" | |
+#include "dbmysql.h" | |
+ | |
+/* | |
+ * get count for new (incoming) word. there may be duplicate entries for the | |
+ * str, so sum the counts and leave the iterator at the last one. | |
+ * | |
+ * the list referenced in the iterator must be sorted. | |
+ */ | |
+uint db_getnewcount( veciter_t* piter ) | |
+{ | |
+ str_t* pstr; | |
+ uint count; | |
+ veciter_t curiter; | |
+ str_t* pcurstr; | |
+ | |
+ pstr = &piter->plist->pitems[piter->index]; | |
+ count = 0; | |
+ | |
+ curiter.plist = piter->plist; | |
+ curiter.index = piter->index; | |
+ pcurstr = &curiter.plist->pitems[curiter.index]; | |
+ | |
+ while( curiter.index < curiter.plist->nitems && str_casecmp( pstr, pcurstr… | |
+ { | |
+ piter->index = curiter.index; | |
+ count = min( MAXFREQ, count + 1 ); | |
+ veciter_next( &curiter ); | |
+ pcurstr = &curiter.plist->pitems[curiter.index]; | |
+ } | |
+ | |
+ return count; | |
+} | |
+ | |
+dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, c… | |
+{ | |
+ dbh_t* pdb = NULL; | |
+ | |
+ switch( dbfmt ) | |
+ { | |
+ case db_text: | |
+ pdb = (dbh_t*)dbtext_db_open( dbhost, dbname, dbuser, dbpass ); | |
+ break; | |
+ case db_db: | |
+ pdb = (dbh_t*)dbdb_db_open( dbhost, dbname, dbuser, dbpass ); | |
+ break; | |
+ case db_mysql: | |
+ pdb = (dbh_t*) dbmysql_db_open( dbhost, dbname, dbuser, dbpass ); | |
+ break; | |
+ default: | |
+ assert(false); | |
+ } | |
+ | |
+ return pdb; | |
+} | |
diff --git a/dbh.h b/dbh.h | |
@@ -0,0 +1,56 @@ | |
+/* $Id: dbh.h,v 1.3 2002/10/02 04:45:40 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _DBH_H | |
+#define _DBH_H | |
+ | |
+/* database formats */ | |
+typedef enum | |
+{ | |
+ db_text, /* flat text */ | |
+ db_db, /* libdb */ | |
+ db_mysql /* mysql */ | |
+} dbfmt_t; | |
+ | |
+/* record/field structure */ | |
+typedef struct _rec | |
+{ | |
+ str_t w; | |
+ uint n; | |
+} rec_t; | |
+ | |
+/* database table */ | |
+typedef struct _dbt dbt_t; | |
+struct _dbt | |
+{ | |
+ bool_t (*close)(dbt_t*); | |
+ bool_t (*mergeclose)(dbt_t*,vec_t*); | |
+ bool_t (*unmergeclose)(dbt_t*,vec_t*); | |
+ bool_t (*import)(dbt_t*,cpchar); | |
+ bool_t (*export)(dbt_t*,cpchar); | |
+ uint (*getmsgcount)(dbt_t*); | |
+ uint (*getcount)(dbt_t*,str_t*); | |
+}; | |
+ | |
+/* database instance */ | |
+typedef struct _dbh dbh_t; | |
+struct _dbh | |
+{ | |
+ bool_t (*close)(dbh_t*); | |
+ dbt_t* (*opentable)(dbh_t*,cpchar,bool_t); | |
+}; | |
+ | |
+dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, … | |
+ | |
+#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n" | |
+#define TEXTDB_MAXLINELEN (MAXWORDLEN+32) | |
+ | |
+uint db_getnewcount( veciter_t* piter ); | |
+ | |
+#endif /* ndef _DBH_H */ | |
diff --git a/dbmysql.c b/dbmysql.c | |
@@ -0,0 +1,545 @@ | |
+/* $Id: dbmysql.c,v 1.9 2002/10/14 07:09:51 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * dbmysql.c: mysql database handler | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "lex.h" | |
+#include "vec.h" | |
+ | |
+#include "dbh.h" | |
+#include "dbmysql.h" | |
+ | |
+#ifdef HAVE_MYSQL | |
+ | |
+#define MAXQUERY 256 | |
+ | |
+static MYSQL* g_mysql = NULL; | |
+ | |
+static void sql_escape( char* d, const char* s ) | |
+{ | |
+ while( *s != '\0' ) | |
+ { | |
+ if( *s == '\'' ) | |
+ { | |
+ *d++ = '\''; | |
+ } | |
+ *d++ = tolower(*s++); | |
+ } | |
+} | |
+ | |
+dbh_t* dbmysql_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db… | |
+{ | |
+ dbhmysql_t* pthis; | |
+ | |
+ if( g_mysql == NULL ) | |
+ { | |
+ g_mysql = mysql_init( NULL ); | |
+ if( g_mysql == NULL ) | |
+ { | |
+ return NULL; | |
+ } | |
+ } | |
+ | |
+ pthis = (dbhmysql_t*)malloc( sizeof(dbhmysql_t) ); | |
+ if( pthis == NULL ) | |
+ { | |
+ perror( "malloc()" ); | |
+ goto bail; | |
+ } | |
+ pthis->close = dbmysql_db_close; | |
+ pthis->opentable = dbmysql_db_opentable; | |
+ | |
+ pthis->dbh = mysql_real_connect( g_mysql, dbhost, dbuser, dbpass, dbname, … | |
+ if( pthis->dbh == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ | |
+ return (dbh_t*)pthis; | |
+ | |
+bail: | |
+ fprintf( stderr, "cannot open mysql database '%s': %s\n", dbname, mysql_er… | |
+ free( pthis ); | |
+ return NULL; | |
+} | |
+ | |
+bool_t dbmysql_db_close( dbhmysql_t* pthis ) | |
+{ | |
+ if( pthis->dbh != NULL ) | |
+ { | |
+ mysql_close( pthis->dbh ); | |
+ pthis->dbh = NULL; | |
+ } | |
+ return true; | |
+} | |
+ | |
+dbt_t* dbmysql_db_opentable( dbhmysql_t* pthis, cpchar table, bool_t rdonly ) | |
+{ | |
+ dbtmysql_t* ptable; | |
+ | |
+ char query[MAXQUERY]; | |
+ MYSQL_RES* res; | |
+ MYSQL_ROW row; | |
+ | |
+ ptable = (dbtmysql_t*)malloc( sizeof(dbtmysql_t) ); | |
+ if( ptable == NULL ) | |
+ { | |
+ return NULL; | |
+ } | |
+ ptable->close = dbmysql_table_close; | |
+ ptable->mergeclose = dbmysql_table_mergeclose; | |
+ ptable->unmergeclose = dbmysql_table_unmergeclose; | |
+ ptable->import = dbmysql_table_import; | |
+ ptable->export = dbmysql_table_export; | |
+ ptable->getmsgcount = dbmysql_table_getmsgcount; | |
+ ptable->getcount = dbmysql_table_getcount; | |
+ ptable->pdb = pthis; | |
+ ptable->table = strdup( table ); | |
+ ptable->nmsgs = 0; | |
+ | |
+ sprintf( query, "SELECT count FROM %s WHERE name='%s'", | |
+ table, MSGCOUNT_KEY ); | |
+ if( mysql_query( pthis->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( (res = mysql_store_result( pthis->dbh )) == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ while( (row = mysql_fetch_row( res )) ) | |
+ { | |
+ ptable->nmsgs = atoi( row[0] ); | |
+ } | |
+ | |
+ return (dbt_t*)ptable; | |
+ | |
+bail: | |
+ free( ptable->table ); | |
+ free( ptable ); | |
+ return NULL; | |
+} | |
+ | |
+bool_t dbmysql_table_close( dbtmysql_t* pthis ) | |
+{ | |
+ if( pthis->pdb != NULL ) | |
+ { | |
+ free( pthis->table ); | |
+ pthis->table = NULL; | |
+ pthis->pdb = NULL; | |
+ } | |
+ return true; | |
+} | |
+ | |
+bool_t dbmysql_table_mergeclose( dbtmysql_t* pthis, vec_t* pmsg ) | |
+{ | |
+ char szword[MAXWORDLEN+1]; | |
+ char szsqlword[MAXWORDLEN*2+1]; | |
+ veciter_t msgiter; | |
+ str_t* pmsgstr; | |
+ | |
+ char query[MAXQUERY]; | |
+ uint count; | |
+ | |
+ if( pthis->pdb == NULL || pthis->pdb->dbh == NULL ) | |
+ { | |
+ assert( false ); | |
+ return false; | |
+ } | |
+ | |
+ pthis->nmsgs++; | |
+ | |
+ sprintf( query, "UPDATE %s SET count=%u WHERE name='%s'", | |
+ pthis->table, pthis->nmsgs, MSGCOUNT_KEY ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u )", | |
+ pthis->table, MSGCOUNT_KEY, pthis->nmsgs ); | |
+ mysql_query( pthis->pdb->dbh, query ); | |
+ } | |
+ | |
+ vec_first( pmsg, &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ | |
+ while( pmsgstr != NULL ) | |
+ { | |
+ assert( pmsgstr->len <= MAXWORDLEN ); | |
+ strncpylwr( szword, pmsgstr->p, pmsgstr->len ); | |
+ szword[pmsgstr->len] = '\0'; | |
+ sql_escape( szsqlword, szword ); | |
+ count = db_getnewcount( &msgiter ); | |
+ | |
+ sprintf( query, "UPDATE %s SET count=count+%u WHERE name='%s'", | |
+ pthis->table, count, szsqlword ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u … | |
+ pthis->table, szsqlword, count ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ | |
+ veciter_next( &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ } | |
+ | |
+ veciter_destroy( &msgiter ); | |
+ return dbmysql_table_close( pthis ); | |
+ | |
+bail: | |
+ return false; | |
+} | |
+ | |
+bool_t dbmysql_table_unmergeclose( dbtmysql_t* pthis, vec_t* pmsg ) | |
+{ | |
+ char szword[MAXWORDLEN+1]; | |
+ char szsqlword[MAXWORDLEN*2+1]; | |
+ veciter_t msgiter; | |
+ str_t* pmsgstr; | |
+ | |
+ char query[MAXQUERY]; | |
+ uint count; | |
+ | |
+ if( pthis->pdb == NULL || pthis->pdb->dbh == NULL ) | |
+ { | |
+ assert( false ); | |
+ return false; | |
+ } | |
+ | |
+ if( pthis->nmsgs > 0 ) | |
+ { | |
+ pthis->nmsgs--; | |
+ } | |
+ | |
+ sprintf( query, "UPDATE %s SET count=%u WHERE name='%s'", | |
+ pthis->table, pthis->nmsgs, MSGCOUNT_KEY ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u )", | |
+ pthis->table, MSGCOUNT_KEY, pthis->nmsgs ); | |
+ mysql_query( pthis->pdb->dbh, query ); | |
+ } | |
+ | |
+ vec_first( pmsg, &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ | |
+ while( pmsgstr != NULL ) | |
+ { | |
+ assert( pmsgstr->len <= MAXWORDLEN ); | |
+ strncpylwr( szword, pmsgstr->p, pmsgstr->len ); | |
+ szword[pmsgstr->len] = '\0'; | |
+ sql_escape( szsqlword, szword ); | |
+ count = db_getnewcount( &msgiter ); | |
+ | |
+ sprintf( query, "UPDATE %s SET count=GREATEST(0,count-%u) WHERE name='… | |
+ pthis->table, count, szsqlword ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ /* this should not happen, so write with count=0 */ | |
+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', 0 )… | |
+ pthis->table, szsqlword ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ | |
+ veciter_next( &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ } | |
+ | |
+ veciter_destroy( &msgiter ); | |
+ return dbmysql_table_close( pthis ); | |
+ | |
+bail: | |
+ return false; | |
+} | |
+ | |
+bool_t dbmysql_table_import( dbtmysql_t* pthis, cpchar filename ) | |
+{ | |
+ int fd; | |
+ struct stat st; | |
+ char* pbuf; | |
+ char* pbegin; | |
+ char* pend; | |
+ rec_t r; | |
+ char szword[MAXWORDLEN+1]; | |
+ char szsqlword[MAXWORDLEN*2+1]; | |
+ char query[MAXQUERY]; | |
+ | |
+ if( pthis->pdb == NULL || pthis->pdb->dbh == NULL ) | |
+ { | |
+ return false; | |
+ } | |
+ if( (fd = open( filename, O_RDONLY, 0644 )) < 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ if( fstat( fd, &st ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( st.st_size == 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ pbuf = (char*)malloc( st.st_size ); | |
+ if( pbuf == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( read( fd, pbuf, st.st_size ) != st.st_size ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ if( sscanf( pbuf, BOGOFILTER_HEADER, &pthis->nmsgs ) != 1 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ pbegin = pbuf; | |
+ while( *pbegin != '\n' ) pbegin++; | |
+ pbegin++; | |
+ | |
+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u )", | |
+ pthis->table, MSGCOUNT_KEY, pthis->nmsgs ); | |
+ mysql_query( pthis->pdb->dbh, query ); | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ while( pbegin < pbuf + st.st_size ) | |
+ { | |
+ pend = pbegin; | |
+ r.w.p = pbegin; | |
+ r.w.len = 0; | |
+ r.n = 0; | |
+ | |
+ while( *pend != '\n' ) | |
+ { | |
+ if( pend >= pbuf + st.st_size ) | |
+ { | |
+ goto bail; | |
+ } | |
+ *pend = tolower(*pend); | |
+ if( *pend == ' ' ) | |
+ { | |
+ r.w.len = (pend-pbegin); | |
+ r.n = strtol( pend+1, NULL, 10 ); | |
+ } | |
+ pend++; | |
+ } | |
+ if( pend > pbegin && *pbegin != '#' && *pbegin != ';' ) | |
+ { | |
+ if( r.w.len == 0 || r.w.len > MAXWORDLEN ) | |
+ { | |
+ fprintf( stderr, "dbh_loadfile: bad file format\n" ); | |
+ goto bail; | |
+ } | |
+ strncpylwr( szword, r.w.p, r.w.len ); | |
+ szword[r.w.len] = '\0'; | |
+ sql_escape( szsqlword, szword ); | |
+ | |
+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u … | |
+ pthis->table, szsqlword, r.n ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ pbegin = pend+1; | |
+ } | |
+ | |
+ return true; | |
+ | |
+bail: | |
+ return false; | |
+} | |
+ | |
+bool_t dbmysql_table_export( dbtmysql_t* pthis, cpchar filename ) | |
+{ | |
+ int fd; | |
+ char iobuf[IOBUFSIZE]; | |
+ char* p; | |
+ | |
+ char query[MAXQUERY]; | |
+ MYSQL_RES* res; | |
+ MYSQL_ROW row; | |
+ | |
+ if( (fd = open( filename, O_CREAT|O_WRONLY|O_TRUNC, 0644 )) < 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs ); | |
+ | |
+ sprintf( query, "SELECT name, count FROM %s", | |
+ pthis->table ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ while( (row = mysql_fetch_row( res )) ) | |
+ { | |
+ if( strcmp( row[0], MSGCOUNT_KEY ) == 0 ) | |
+ { | |
+ continue; | |
+ } | |
+ | |
+ p += sprintf( p, "%s %s\n", row[0], row[1] ); | |
+ if( p+TEXTDB_MAXLINELEN > (iobuf+1) ) | |
+ { | |
+ write( fd, iobuf, p-iobuf ); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ if( p != iobuf ) | |
+ { | |
+ write( fd, iobuf, p-iobuf ); | |
+ } | |
+ close( fd ); | |
+ | |
+ return true; | |
+ | |
+bail: | |
+ return false; | |
+} | |
+ | |
+uint dbmysql_table_getmsgcount( dbtmysql_t* pthis ) | |
+{ | |
+ return pthis->nmsgs; | |
+} | |
+ | |
+uint dbmysql_table_getcount( dbtmysql_t* pthis, str_t* pword ) | |
+{ | |
+ uint count = 0; | |
+ char szword[MAXWORDLEN+1]; | |
+ char szsqlword[MAXWORDLEN*2+1]; | |
+ | |
+ char query[MAXQUERY]; | |
+ MYSQL_RES* res; | |
+ MYSQL_ROW row; | |
+ | |
+ assert( pword->len <= MAXWORDLEN ); | |
+ strncpylwr( szword, pword->p, pword->len ); | |
+ szword[pword->len] = '\0'; | |
+ sql_escape( szsqlword, szword ); | |
+ sprintf( query, "SELECT count FROM %s WHERE name='%s'", | |
+ pthis->table, szsqlword ); | |
+ if( mysql_query( pthis->pdb->dbh, query ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ if( (res = mysql_store_result( pthis->pdb->dbh )) == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ while( (row = mysql_fetch_row( res )) ) | |
+ { | |
+ count = atoi( row[0] ); | |
+ } | |
+ | |
+bail: | |
+ return count; | |
+} | |
+ | |
+#else /* def HAVE_MYSQL */ | |
+ | |
+dbh_t* dbmysql_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db… | |
+{ | |
+ return NULL; | |
+} | |
+ | |
+#endif /* def HAVE_MYSQL */ | |
+ | |
+#ifdef UNIT_TEST | |
+int main( int argc, char** argv ) | |
+{ | |
+ dbh_t* pdb; | |
+ veciter_t iter; | |
+ str_t* pstr; | |
+ uint n; | |
+ | |
+ if( argc != 2 ) | |
+ { | |
+ fprintf( stderr, "usage: %s <file>\n", argv[0] ); | |
+ return 1; | |
+ } | |
+ | |
+ for( n = 0; n < 100; n++ ) | |
+ { | |
+ pdb = dbh_open( "testlist", true ); | |
+ | |
+ vec_first( &db, &iter ); | |
+ while( (pstr = veciter_get( &iter )) != NULL ) | |
+ { | |
+ char buf[MAXWORDLEN+32]; | |
+ char* p; | |
+ if( pstr->len > 200 ) | |
+ { | |
+ fprintf( stderr, "str too long: %u chars\n", pstr->len ); | |
+ break; | |
+ } | |
+ p = buf; | |
+ strcpy( buf, "str: " ); | |
+ p += 6; | |
+ memcpy( p, pstr->p, pstr->len ); | |
+ p += pstr->len; | |
+ sprintf( p, " %u", pstr->count ); | |
+ puts( buf ); | |
+ | |
+ veciter_next( &iter ); | |
+ } | |
+ | |
+ dbh_close( &db ); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+#endif /* def UNIT_TEST */ | |
diff --git a/dbmysql.h b/dbmysql.h | |
@@ -0,0 +1,60 @@ | |
+/* $Id: dbmysql.h,v 1.4 2002/10/06 06:46:53 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _DBMYSQL_H | |
+#define _DBMYSQL_H | |
+ | |
+#ifdef HAVE_MYSQL | |
+ | |
+#include "mysql.h" | |
+ | |
+typedef struct _dbtmysql dbtmysql_t; | |
+struct _dbtmysql | |
+{ | |
+ bool_t (*close)(dbtmysql_t*); | |
+ bool_t (*mergeclose)(dbtmysql_t*,vec_t*); | |
+ bool_t (*unmergeclose)(dbtmysql_t*,vec_t*); | |
+ bool_t (*import)(dbtmysql_t*,cpchar); | |
+ bool_t (*export)(dbtmysql_t*,cpchar); | |
+ uint (*getmsgcount)(dbtmysql_t*); | |
+ uint (*getcount)(dbtmysql_t*,str_t*); | |
+ | |
+ struct _dbhmysql* pdb; | |
+ char* table; /* table name */ | |
+ uint nmsgs; /* number of messages in table (cached) */ | |
+}; | |
+ | |
+typedef struct _dbhmysql dbhmysql_t; | |
+struct _dbhmysql | |
+{ | |
+ bool_t (*close)(dbhmysql_t*); | |
+ dbt_t* (*opentable)(dbhmysql_t*,cpchar,bool_t); | |
+ | |
+ MYSQL* dbh; /* database handle, if currently open */ | |
+}; | |
+ | |
+dbh_t* dbmysql_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar d… | |
+bool_t dbmysql_db_close( dbhmysql_t* pthis ); | |
+dbt_t* dbmysql_db_opentable( dbhmysql_t* pthis, cpchar table, bool_t rdonly ); | |
+ | |
+bool_t dbmysql_table_close( dbtmysql_t* pthis ); | |
+bool_t dbmysql_table_mergeclose( dbtmysql_t* pthis, vec_t* pmsg ); | |
+bool_t dbmysql_table_unmergeclose( dbtmysql_t* pthis, vec_t* pmsg ); | |
+bool_t dbmysql_table_import( dbtmysql_t* pthis, cpchar filename ); | |
+bool_t dbmysql_table_export( dbtmysql_t* pthis, cpchar filename ); | |
+uint dbmysql_table_getmsgcount( dbtmysql_t* pthis ); | |
+uint dbmysql_table_getcount( dbtmysql_t* pthis, str_t* pword ); | |
+ | |
+#else /* def HAVE_MYSQL */ | |
+ | |
+dbh_t* dbmysql_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar d… | |
+ | |
+#endif /* def HAVE_MYSQL */ | |
+ | |
+#endif /* ndef _DBMYSQL_H */ | |
diff --git a/dbtext.c b/dbtext.c | |
@@ -0,0 +1,591 @@ | |
+/* $Id: dbtext.c,v 1.12 2002/10/19 09:59:35 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * dbtext.c: flatfile database handler | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "lex.h" | |
+#include "vec.h" | |
+ | |
+#include "dbh.h" | |
+#include "dbtext.h" | |
+ | |
+static void dbtext_table_setsize( dbttext_t* pthis, uint nsize ) | |
+{ | |
+ if( nsize > pthis->nalloc ) | |
+ { | |
+ uint nnewalloc; | |
+ rec_t* pnewitems; | |
+ uint n; | |
+ | |
+ nnewalloc = pthis->nalloc * 2; | |
+ if( nnewalloc < nsize ) nnewalloc = nsize; | |
+ pnewitems = (rec_t*)realloc( pthis->pitems, nnewalloc*sizeof(rec_t) ); | |
+ if( pnewitems == NULL ) | |
+ { | |
+ exit( 2 ); | |
+ } | |
+ for( n = pthis->nitems; n < nsize; n++ ) | |
+ { | |
+ str_create( &pnewitems[n].w ); | |
+ pnewitems[n].n = 0; | |
+ } | |
+ pthis->pitems = pnewitems; | |
+ pthis->nalloc = nnewalloc; | |
+ } | |
+} | |
+ | |
+dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbp… | |
+{ | |
+ dbhtext_t* pthis; | |
+ | |
+ uint dirlen; | |
+ cpchar phome; | |
+ struct stat st; | |
+ | |
+ pthis = (dbhtext_t*)malloc( sizeof(dbhtext_t) ); | |
+ if( pthis == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ pthis->close = dbtext_db_close; | |
+ pthis->opentable = dbtext_db_opentable; | |
+ if( dbname != NULL && *dbname != '\0' ) | |
+ { | |
+ dirlen = strlen( dbname ); | |
+ pthis->dir = strdup( dbname ); | |
+ if( pthis->dir[dirlen-1] == '/' ) | |
+ { | |
+ pthis->dir[dirlen-1] = '\0'; | |
+ } | |
+ } | |
+ else | |
+ { | |
+ phome = getenv( "HOME" ); | |
+ if( phome == NULL || *phome == '\0' ) | |
+ { | |
+ phome = "."; | |
+ } | |
+ pthis->dir = (char*)malloc( strlen(phome)+5+1 ); | |
+ if( pthis->dir == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ sprintf( pthis->dir, "%s/.bmf", phome ); | |
+ } | |
+ | |
+ /* ensure config directory exists */ | |
+ if( stat( pthis->dir, &st ) != 0 ) | |
+ { | |
+ if( errno == ENOENT ) | |
+ { | |
+ if( mkdir( pthis->dir, S_IRUSR|S_IWUSR|S_IXUSR ) != 0 ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ else | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ else | |
+ { | |
+ if( !S_ISDIR( st.st_mode ) ) | |
+ { | |
+ goto bail; | |
+ } | |
+ } | |
+ | |
+ return (dbh_t*)pthis; | |
+ | |
+bail: | |
+ return NULL; | |
+} | |
+ | |
+bool_t dbtext_db_close( dbhtext_t* pthis ) | |
+{ | |
+ free( pthis->dir ); | |
+ pthis->dir = NULL; | |
+ return true; | |
+} | |
+ | |
+dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly ) | |
+{ | |
+ dbttext_t* ptable = NULL; | |
+ | |
+#ifndef NOLOCK | |
+ struct flock lock; | |
+#endif /* ndef NOLOCK */ | |
+ char szpath[PATH_MAX]; | |
+ int flags; | |
+ struct stat st; | |
+ | |
+ char* pbegin; | |
+ char* pend; | |
+ rec_t r; | |
+ uint pos; | |
+ | |
+ if( pthis->dir == NULL ) | |
+ { | |
+ goto bail; | |
+ } | |
+ | |
+ ptable = (dbttext_t*)malloc( sizeof(dbttext_t) ); | |
+ if( ptable == NULL ) | |
+ { | |
+ perror( "malloc()" ); | |
+ goto bail; | |
+ } | |
+ ptable->close = dbtext_table_close; | |
+ ptable->mergeclose = dbtext_table_mergeclose; | |
+ ptable->unmergeclose = dbtext_table_unmergeclose; | |
+ ptable->import = dbtext_table_import; | |
+ ptable->export = dbtext_table_export; | |
+ ptable->getmsgcount = dbtext_table_getmsgcount; | |
+ ptable->getcount = dbtext_table_getcount; | |
+ ptable->fd = -1; | |
+ ptable->pbuf = NULL; | |
+ ptable->nmsgs = 0; | |
+ ptable->nalloc = 0; | |
+ ptable->nitems = 0; | |
+ ptable->pitems = NULL; | |
+ | |
+ sprintf( szpath, "%s/%s.txt", pthis->dir, table ); | |
+ flags = (rdonly ? O_RDONLY|O_CREAT : O_RDWR|O_CREAT); | |
+ ptable->fd = open( szpath, flags, 0644 ); | |
+ if( ptable->fd == -1 ) | |
+ { | |
+ perror( "open()" ); | |
+ goto bail; | |
+ } | |
+ | |
+#ifndef NOLOCK | |
+ memset( &lock, 0, sizeof(lock) ); | |
+ lock.l_type = rdonly ? F_RDLCK : F_WRLCK; | |
+ lock.l_start = 0; | |
+ lock.l_whence = SEEK_SET; | |
+ lock.l_len = 0; | |
+ fcntl( ptable->fd, F_SETLKW, &lock ); | |
+#endif /* ndef NOLOCK */ | |
+ | |
+ if( fstat( ptable->fd, &st ) != 0 ) | |
+ { | |
+ perror( "fstat()" ); | |
+ goto bail_uc; | |
+ } | |
+ | |
+ if( st.st_size == 0 ) | |
+ { | |
+ return (dbt_t*)ptable; | |
+ } | |
+ | |
+ ptable->pbuf = (char*)malloc( st.st_size ); | |
+ if( ptable->pbuf == NULL ) | |
+ { | |
+ perror( "malloc()" ); | |
+ goto bail_uc; | |
+ } | |
+ | |
+ if( read( ptable->fd, ptable->pbuf, st.st_size ) != st.st_size ) | |
+ { | |
+ perror( "read()" ); | |
+ goto bail_fuc; | |
+ } | |
+ | |
+ /* XXX: bogofilter compatibility */ | |
+ if( sscanf( ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs ) != 1 ) | |
+ { | |
+ goto bail_fuc; | |
+ } | |
+ pbegin = ptable->pbuf; | |
+ while( *pbegin != '\n' ) pbegin++; | |
+ pbegin++; | |
+ | |
+ pos = 0; | |
+ while( pbegin < ptable->pbuf + st.st_size ) | |
+ { | |
+ pend = pbegin; | |
+ r.w.p = pbegin; | |
+ r.w.len = 0; | |
+ r.n = 0; | |
+ | |
+ while( *pend != '\n' ) | |
+ { | |
+ if( pend >= ptable->pbuf + st.st_size ) | |
+ { | |
+ goto bail_fuc; | |
+ } | |
+ *pend = tolower(*pend); | |
+ if( *pend == ' ' ) | |
+ { | |
+ r.w.len = (pend-pbegin); | |
+ r.n = strtol( pend+1, NULL, 10 ); | |
+ } | |
+ pend++; | |
+ } | |
+ if( pend > pbegin && *pbegin != '#' && *pbegin != ';' ) | |
+ { | |
+ if( r.w.len == 0 || r.w.len > MAXWORDLEN ) | |
+ { | |
+ fprintf( stderr, "dbh_loadfile: bad file format\n" ); | |
+ goto bail_fuc; | |
+ } | |
+ dbtext_table_setsize( ptable, pos+1 ); | |
+ ptable->pitems[pos++] = r; | |
+ ptable->nitems = pos; | |
+ } | |
+ pbegin = pend+1; | |
+ } | |
+ | |
+ if( rdonly ) | |
+ { | |
+#ifndef NOLOCK | |
+ lock.l_type = F_UNLCK; | |
+ fcntl( ptable->fd, F_SETLKW, &lock ); | |
+#endif /* ndef NOLOCK */ | |
+ close( ptable->fd ); | |
+ ptable->fd = -1; | |
+ } | |
+ | |
+ return (dbt_t*)ptable; | |
+ | |
+bail_fuc: | |
+ free( ptable->pbuf ); | |
+ | |
+bail_uc: | |
+#ifndef NOLOCK | |
+ lock.l_type = F_UNLCK; | |
+ fcntl( ptable->fd, F_SETLKW, &lock ); | |
+#endif /* ndef NOLOCK */ | |
+ | |
+ close( ptable->fd ); | |
+ ptable->fd = -1; | |
+ | |
+bail: | |
+ free( ptable ); | |
+ return NULL; | |
+} | |
+ | |
+bool_t dbtext_table_close( dbttext_t* pthis ) | |
+{ | |
+ struct flock lockall; | |
+ | |
+ free( pthis->pbuf ); | |
+ pthis->pbuf = NULL; | |
+ free( pthis->pitems ); | |
+ pthis->pitems = NULL; | |
+ | |
+ if( pthis->fd != -1 ) | |
+ { | |
+#ifndef NOLOCK | |
+ memset( &lockall, 0, sizeof(lockall) ); | |
+ lockall.l_type = F_UNLCK; | |
+ lockall.l_start = 0; | |
+ lockall.l_whence = SEEK_SET; | |
+ lockall.l_len = 0; | |
+ fcntl( pthis->fd, F_SETLKW, &lockall ); | |
+#endif /* ndef NOLOCK */ | |
+ close( pthis->fd ); | |
+ pthis->fd = -1; | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg ) | |
+{ | |
+ /* note that we require both vectors to be sorted */ | |
+ | |
+ uint pos; | |
+ rec_t* prec; | |
+ veciter_t msgiter; | |
+ str_t* pmsgstr; | |
+ uint count; | |
+ char iobuf[IOBUFSIZE]; | |
+ char* p; | |
+ | |
+ if( pthis->fd == -1 ) | |
+ { | |
+ return false; | |
+ } | |
+ ftruncate( pthis->fd, 0 ); | |
+ lseek( pthis->fd, 0, SEEK_SET ); | |
+ | |
+ pthis->nmsgs++; | |
+ | |
+ p = iobuf; | |
+ p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs ); | |
+ | |
+ vec_first( pmsg, &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ | |
+ pos = 0; | |
+ while( pos < pthis->nitems || pmsgstr != NULL ) | |
+ { | |
+ int cmp = 0; | |
+ prec = &pthis->pitems[pos]; | |
+ if( pmsgstr != NULL && pos < pthis->nitems ) | |
+ { | |
+ cmp = str_casecmp( &prec->w, pmsgstr ); | |
+ } | |
+ else | |
+ { | |
+ /* we exhausted one list or the other (but not both) */ | |
+ cmp = (pos < pthis->nitems) ? -1 : 1; | |
+ } | |
+ if( cmp < 0 ) | |
+ { | |
+ /* write existing str */ | |
+ assert( prec->w.p != NULL && prec->w.len > 0 ); | |
+ assert( prec->w.len <= MAXWORDLEN ); | |
+ count = prec->n; | |
+ strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf( p, "%u\n", count ); | |
+ | |
+ pos++; | |
+ } | |
+ else if( cmp == 0 ) | |
+ { | |
+ /* same str, merge and write sum */ | |
+ assert( prec->w.p != NULL && prec->w.len > 0 ); | |
+ assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); | |
+ assert( prec->w.len <= MAXWORDLEN ); | |
+ assert( pmsgstr->len <= MAXWORDLEN ); | |
+ count = db_getnewcount( &msgiter ); | |
+ count += prec->n; | |
+ strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf( p, "%u\n", count ); | |
+ | |
+ pos++; | |
+ veciter_next( &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ } | |
+ else /* cmp > 0 */ | |
+ { | |
+ /* write new str */ | |
+ assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); | |
+ assert( pmsgstr->len <= MAXWORDLEN ); | |
+ count = db_getnewcount( &msgiter ); | |
+ strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len; | |
+ *p++ = ' '; | |
+ p += sprintf( p, "%u\n", count ); | |
+ | |
+ veciter_next( &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ } | |
+ | |
+ if( p+TEXTDB_MAXLINELEN > (iobuf+1) ) | |
+ { | |
+ write( pthis->fd, iobuf, p-iobuf ); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ if( p != iobuf ) | |
+ { | |
+ write( pthis->fd, iobuf, p-iobuf ); | |
+ } | |
+ | |
+ veciter_destroy( &msgiter ); | |
+ return dbtext_table_close( pthis ); | |
+} | |
+ | |
+bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg ) | |
+{ | |
+ /* note that we require both vectors to be sorted */ | |
+ | |
+ uint pos; | |
+ rec_t* prec; | |
+ veciter_t msgiter; | |
+ str_t* pmsgstr; | |
+ uint count; | |
+ char iobuf[IOBUFSIZE]; | |
+ char* p; | |
+ | |
+ if( pthis->fd == -1 ) | |
+ { | |
+ return false; | |
+ } | |
+ ftruncate( pthis->fd, 0 ); | |
+ lseek( pthis->fd, 0, SEEK_SET ); | |
+ | |
+ pthis->nmsgs--; | |
+ | |
+ p = iobuf; | |
+ p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs ); | |
+ | |
+ vec_first( pmsg, &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ | |
+ pos = 0; | |
+ while( pos < pthis->nitems || pmsgstr != NULL ) | |
+ { | |
+ int cmp = 0; | |
+ prec = &pthis->pitems[pos]; | |
+ if( pmsgstr != NULL && pos < pthis->nitems ) | |
+ { | |
+ cmp = str_casecmp( &prec->w, pmsgstr ); | |
+ } | |
+ else | |
+ { | |
+ /* we exhausted one list or the other (but not both) */ | |
+ cmp = (pos < pthis->nitems) ? -1 : 1; | |
+ } | |
+ if( cmp < 0 ) | |
+ { | |
+ /* write existing str */ | |
+ assert( prec->w.p != NULL && prec->w.len > 0 ); | |
+ assert( prec->w.len <= MAXWORDLEN ); | |
+ count = prec->n; | |
+ strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf( p, "%u\n", count ); | |
+ | |
+ pos++; | |
+ } | |
+ else if( cmp == 0 ) | |
+ { | |
+ /* same str, merge and write difference */ | |
+ assert( prec->w.p != NULL && prec->w.len > 0 ); | |
+ assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); | |
+ assert( prec->w.len <= MAXWORDLEN ); | |
+ assert( pmsgstr->len <= MAXWORDLEN ); | |
+ count = db_getnewcount( &msgiter ); | |
+ count = (prec->n > count) ? (prec->n - count) : 0; | |
+ strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len; | |
+ *p++ = ' '; | |
+ p += sprintf( p, "%u\n", count ); | |
+ | |
+ pos++; | |
+ veciter_next( &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ } | |
+ else /* cmp > 0 */ | |
+ { | |
+ /* this should not happen, so write with count=0 */ | |
+ assert( pmsgstr->p != NULL && pmsgstr->len > 0 ); | |
+ assert( pmsgstr->len <= MAXWORDLEN ); | |
+ db_getnewcount( &msgiter ); | |
+ count = 0; | |
+ strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len; | |
+ *p++ = ' '; | |
+ p += sprintf( p, "%u\n", count ); | |
+ | |
+ veciter_next( &msgiter ); | |
+ pmsgstr = veciter_get( &msgiter ); | |
+ } | |
+ | |
+ if( p+TEXTDB_MAXLINELEN > (iobuf+1) ) | |
+ { | |
+ write( pthis->fd, iobuf, p-iobuf ); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ if( p != iobuf ) | |
+ { | |
+ write( pthis->fd, iobuf, p-iobuf ); | |
+ } | |
+ | |
+ veciter_destroy( &msgiter ); | |
+ return dbtext_table_close( pthis ); | |
+} | |
+ | |
+bool_t dbtext_table_import( dbttext_t* pthis, cpchar filename ) | |
+{ | |
+ return false; | |
+} | |
+ | |
+bool_t dbtext_table_export( dbttext_t* pthis, cpchar filename ) | |
+{ | |
+ return false; | |
+} | |
+ | |
+uint dbtext_table_getmsgcount( dbttext_t* pthis ) | |
+{ | |
+ return pthis->nmsgs; | |
+} | |
+ | |
+uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword ) | |
+{ | |
+ int lo, hi, mid; | |
+ | |
+ if( pthis->nitems == 0 ) | |
+ { | |
+ return 0; | |
+ } | |
+ | |
+ hi = pthis->nitems - 1; | |
+ lo = -1; | |
+ while( hi-lo > 1 ) | |
+ { | |
+ mid = (hi+lo)/2; | |
+ if( str_casecmp( pword, &pthis->pitems[mid].w ) <= 0 ) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ assert( hi >= 0 && hi < pthis->nitems ); | |
+ | |
+ if( str_casecmp( pword, &pthis->pitems[hi].w ) != 0 ) | |
+ { | |
+ return 0; | |
+ } | |
+ | |
+ return pthis->pitems[hi].n; | |
+} | |
+ | |
+#ifdef UNIT_TEST | |
+int main( int argc, char** argv ) | |
+{ | |
+ dbh_t* pdb; | |
+ veciter_t iter; | |
+ str_t* pstr; | |
+ uint n; | |
+ | |
+ if( argc != 2 ) | |
+ { | |
+ fprintf( stderr, "usage: %s <file>\n", argv[0] ); | |
+ return 1; | |
+ } | |
+ | |
+ for( n = 0; n < 100; n++ ) | |
+ { | |
+ pdb = dbh_open( "testlist", true ); | |
+ | |
+ vec_first( &db, &iter ); | |
+ while( (pstr = veciter_get( &iter )) != NULL ) | |
+ { | |
+ char buf[MAXWORDLEN+32]; | |
+ char* p; | |
+ if( pstr->len > 200 ) | |
+ { | |
+ fprintf( stderr, "str too long: %u chars\n", pstr->len ); | |
+ break; | |
+ } | |
+ p = buf; | |
+ strcpy( buf, "str: " ); | |
+ p += 6; | |
+ memcpy( p, pstr->p, pstr->len ); | |
+ p += pstr->len; | |
+ sprintf( p, " %u", pstr->count ); | |
+ puts( buf ); | |
+ | |
+ veciter_next( &iter ); | |
+ } | |
+ | |
+ dbh_close( &db ); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+#endif /* def UNIT_TEST */ | |
diff --git a/dbtext.h b/dbtext.h | |
@@ -0,0 +1,53 @@ | |
+/* $Id: dbtext.h,v 1.3 2002/10/02 04:45:40 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _DBTEXT_H | |
+#define _DBTEXT_H | |
+ | |
+typedef struct _dbttext dbttext_t; | |
+struct _dbttext | |
+{ | |
+ bool_t (*close)(dbttext_t*); | |
+ bool_t (*mergeclose)(dbttext_t*,vec_t*); | |
+ bool_t (*unmergeclose)(dbttext_t*,vec_t*); | |
+ bool_t (*import)(dbttext_t*,cpchar); | |
+ bool_t (*export)(dbttext_t*,cpchar); | |
+ uint (*getmsgcount)(dbttext_t*); | |
+ uint (*getcount)(dbttext_t*,str_t*); | |
+ | |
+ int fd; /* file descriptor, if currently open */ | |
+ char* pbuf; /* data buffer, if currently open */ | |
+ uint nmsgs; /* number of messages represented in list */ | |
+ uint nalloc; /* items alloced in pitems */ | |
+ uint nitems; /* items available */ | |
+ rec_t* pitems; /* growing vector of items */ | |
+}; | |
+ | |
+typedef struct _dbhtext dbhtext_t; | |
+struct _dbhtext | |
+{ | |
+ bool_t (*close)(dbhtext_t*); | |
+ dbt_t* (*opentable)(dbhtext_t*,cpchar,bool_t); | |
+ | |
+ char* dir; | |
+}; | |
+ | |
+dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db… | |
+bool_t dbtext_db_close( dbhtext_t* pthis ); | |
+dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly ); | |
+ | |
+bool_t dbtext_table_close( dbttext_t* pthis ); | |
+bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg ); | |
+bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg ); | |
+bool_t dbtext_table_import( dbttext_t* pthis, cpchar filename ); | |
+bool_t dbtext_table_export( dbttext_t* pthis, cpchar filename ); | |
+uint dbtext_table_getmsgcount( dbttext_t* pthis ); | |
+uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword ); | |
+ | |
+#endif /* ndef _DBTEXT_H */ | |
diff --git a/filt.c b/filt.c | |
@@ -0,0 +1,175 @@ | |
+/* $Id: filt.c,v 1.1 2002/10/20 18:19:17 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * filt.c: The Bayes filter implementation. | |
+ * See http://www.paulgraham.com/spam.html for discussion. | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "lex.h" | |
+#include "vec.h" | |
+#include "dbh.h" | |
+#include "filt.h" | |
+ | |
+#define DEVIATION(n) fabs((n)-0.5f) | |
+ | |
+/* Dump the contents of a statistics structure */ | |
+void statdump( stats_t* pstat, int fd ) | |
+{ | |
+ char iobuf[IOBUFSIZE]; | |
+ char* p; | |
+ discrim_t* pp; | |
+ | |
+ p = iobuf; | |
+ p += sprintf( iobuf, "# Spamicity: %f\n", pstat->spamicity ); | |
+ | |
+ for (pp = pstat->extrema; pp < pstat->extrema + pstat->keepers; pp++) | |
+ { | |
+ if (pp->key.len) | |
+ { | |
+ strcpy( p, "# '" ); p += 3; | |
+ strncpylwr( p, pp->key.p, pp->key.len ); p += pp->key.len; | |
+ p += snprintf( p, 28, "' -> %f\n", pp->prob ); | |
+ if( p+MAXWORDLEN+32 > (iobuf+1) ) | |
+ { | |
+ write( fd, iobuf, p-iobuf ); | |
+ p = iobuf; | |
+ } | |
+ } | |
+ } | |
+ if( p != iobuf ) | |
+ { | |
+ write( fd, iobuf, p-iobuf ); | |
+ } | |
+} | |
+ | |
+void bayesfilt( dbt_t* pglist, dbt_t* pblist, vec_t* pmlist, stats_t* pstats ) | |
+{ | |
+ veciter_t iter; | |
+ str_t* pword; | |
+ | |
+ double prob, product, invproduct, dev; | |
+ double slotdev, hitdev; | |
+ | |
+#ifdef NON_EQUIPROBABLE | |
+ /* There is an argument that we should (go?) by number of *words* here. */ | |
+ double msg_prob = ((double)pblist->nitems / (double)pglist->nitems); | |
+#endif | |
+ | |
+ discrim_t* pp; | |
+ discrim_t* hit; | |
+ | |
+ for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) | |
+ { | |
+ pp->key.p = NULL; | |
+ pp->key.len = 0; | |
+ pp->prob = 0.5f; | |
+ } | |
+ | |
+ vec_first( pmlist, &iter ); | |
+ while( (pword = veciter_get( &iter )) != NULL ) | |
+ { | |
+ double goodness = pglist->getcount( pglist, pword ); | |
+ double spamness = pblist->getcount( pblist, pword ); | |
+ uint goodtotal = pglist->getmsgcount( pglist ); | |
+ uint spamtotal = pblist->getmsgcount( pblist ); | |
+ | |
+ if( goodness + spamness < MINIMUM_FREQ ) | |
+ { | |
+#ifdef NON_EQUIPROBABLE | |
+ /* | |
+ * In the absence of evidence, the probability that a new word will | |
+ * be spam is the historical ratio of spam words to nonspam words. | |
+ */ | |
+ prob = msg_prob; | |
+#else | |
+ prob = UNKNOWN_WORD; | |
+#endif | |
+ } | |
+ else | |
+ { | |
+ double goodprob = goodtotal ? min( 1.0, (goodness / goodtotal) ) :… | |
+ double spamprob = spamtotal ? min( 1.0, (spamness / spamtotal) ) :… | |
+ assert( goodtotal > 0 || spamtotal > 0 ); | |
+ | |
+#ifdef NON_EQUIPROBABLE | |
+ prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_prob)) + (spa… | |
+#else | |
+ prob = spamprob / (goodprob + spamprob); | |
+#endif | |
+ | |
+ prob = minmax( prob, 0.01, 0.99 ); | |
+ } | |
+ | |
+ /* update the list of tokens with maximum deviation */ | |
+ dev = DEVIATION(prob); | |
+ hit = NULL; | |
+ hitdev = 0; | |
+ for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) | |
+ { | |
+ /* don't allow duplicate tokens in the stats.extrema */ | |
+ if( pp->key.len > 0 && str_casecmp( pword, &pp->key ) == 0 ) | |
+ { | |
+ hit = NULL; | |
+ break; | |
+ } | |
+ | |
+ slotdev = DEVIATION(pp->prob); | |
+ if (dev>slotdev && dev>hitdev) | |
+ { | |
+ hit = pp; | |
+ hitdev = slotdev; | |
+ } | |
+ } | |
+ if (hit) | |
+ { | |
+ hit->prob = prob; | |
+ hit->key = *pword; | |
+ } | |
+ | |
+ veciter_next( &iter ); | |
+ } | |
+ veciter_destroy( &iter ); | |
+ | |
+ /* | |
+ * Bayes' theorem. | |
+ * For discussion, see <http://www.mathpages.com/home/kmath267.htm>. | |
+ */ | |
+ product = invproduct = 1.0f; | |
+ for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++) | |
+ { | |
+ if( pp->prob == 0 ) | |
+ { | |
+ break; | |
+ } | |
+ else | |
+ { | |
+ product *= pp->prob; | |
+ invproduct *= (1 - pp->prob); | |
+ } | |
+ } | |
+ pstats->spamicity = product / (product + invproduct); | |
+} | |
+ | |
+bool_t bvec_loadmsg( vec_t* pthis, lex_t* plex, tok_t* ptok ) | |
+{ | |
+ str_t w; | |
+ | |
+ lex_nexttoken( plex, ptok ); | |
+ while( ptok->tt != eof && ptok->tt != from ) | |
+ { | |
+ w.p = ptok->p; | |
+ w.len = ptok->len; | |
+ vec_addtail( pthis, &w ); | |
+ lex_nexttoken( plex, ptok ); | |
+ } | |
+ | |
+ return true; | |
+} | |
diff --git a/filt.h b/filt.h | |
@@ -0,0 +1,31 @@ | |
+/* $Id: filt.h,v 1.1 2002/10/20 18:19:17 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _FILT_H | |
+#define _FILT_H | |
+ | |
+typedef struct | |
+{ | |
+ str_t key; | |
+ double prob; | |
+} discrim_t; | |
+ | |
+typedef struct | |
+{ | |
+ double spamicity; | |
+ uint keepers; | |
+ discrim_t* extrema; | |
+} stats_t; | |
+ | |
+void statdump( stats_t* pstat, int fd ); | |
+void bayesfilt( dbt_t* pglist, dbt_t* pblist, vec_t* pmlist, stats_t* pstats ); | |
+ | |
+bool_t bvec_loadmsg( vec_t* pthis, lex_t* plex, tok_t* ptok ); | |
+ | |
+#endif /* ndef _FILT_H */ | |
diff --git a/lex.c b/lex.c | |
@@ -0,0 +1,787 @@ | |
+/* $Id: lex.c,v 1.18 2002/10/20 20:29:15 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * lex.c: generate token stream for bmf. | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "lex.h" | |
+ | |
+static cpchar g_htmltags[] = | |
+{ | |
+ "abbr", | |
+ "above", | |
+ "accesskey", | |
+ "acronym", | |
+ "align", | |
+ "alink", | |
+ "all", | |
+ "alt", | |
+ "applet", | |
+ "archive", | |
+ "axis", | |
+ "basefont", | |
+ "baseline", | |
+ "below", | |
+ "bgcolor", | |
+ "big", | |
+ "body", | |
+ "border", | |
+ "bottom", | |
+ "box", | |
+ "button", | |
+ "cellpadding", | |
+ "cellspacing", | |
+ "center", | |
+ "char", | |
+ "charoff", | |
+ "charset", | |
+ "circle", | |
+ "cite", | |
+ "class", | |
+ "classid", | |
+ "clear", | |
+ "codebase", | |
+ "codetype", | |
+ "color", | |
+ "cols", | |
+ "colspan", | |
+ "compact", | |
+ "content", | |
+ "coords", | |
+ "data", | |
+ "datetime", | |
+ "declare", | |
+ "default", | |
+ "defer", | |
+ "dfn", | |
+ "dir", | |
+ "disabled", | |
+ "face", | |
+ "font", | |
+ "frameborder", | |
+ "groups", | |
+ "head", | |
+ "headers", | |
+ "height", | |
+ "href", | |
+ "hreflang", | |
+ "hsides", | |
+ "hspace", | |
+ "http-equiv", | |
+ "iframe", | |
+ "img", | |
+ "input", | |
+ "ismap", | |
+ "justify", | |
+ "kbd", | |
+ "label", | |
+ "lang", | |
+ "language", | |
+ "left", | |
+ "lhs", | |
+ "link", | |
+ "longdesc", | |
+ "map", | |
+ "marginheight", | |
+ "marginwidth", | |
+ "media", | |
+ "meta", | |
+ "middle", | |
+ "multiple", | |
+ "name", | |
+ "nohref", | |
+ "none", | |
+ "noresize", | |
+ "noshade", | |
+ "nowrap", | |
+ "object", | |
+ "onblur", | |
+ "onchange", | |
+ "onclick", | |
+ "ondblclick", | |
+ "onfocus", | |
+ "onkeydown", | |
+ "onkeypress", | |
+ "onkeyup", | |
+ "onload", | |
+ "onmousedown", | |
+ "onmousemove", | |
+ "onmouseout", | |
+ "onmouseover", | |
+ "onmouseup", | |
+ "onselect", | |
+ "onunload", | |
+ "param", | |
+ "poly", | |
+ "profile", | |
+ "prompt", | |
+ "readonly", | |
+ "rect", | |
+ "rel", | |
+ "rev", | |
+ "rhs", | |
+ "right", | |
+ "rows", | |
+ "rowspan", | |
+ "rules", | |
+ "samp", | |
+ "scheme", | |
+ "scope", | |
+ "script", | |
+ "scrolling", | |
+ "select", | |
+ "selected", | |
+ "shape", | |
+ "size", | |
+ "small", | |
+ "span", | |
+ "src", | |
+ "standby", | |
+ "strike", | |
+ "strong", | |
+ "style", | |
+ "sub", | |
+ "summary", | |
+ "sup", | |
+ "tabindex", | |
+ "table", | |
+ "target", | |
+ "textarea", | |
+ "title", | |
+ "top", | |
+ "type", | |
+ "usemap", | |
+ "valign", | |
+ "value", | |
+ "valuetype", | |
+ "var", | |
+ "vlink", | |
+ "void", | |
+ "vsides", | |
+ "vspace", | |
+ "width" | |
+}; | |
+static const uint g_nhtmltags = sizeof(g_htmltags)/sizeof(cpchar); | |
+ | |
+static cpchar g_ignoredheaders[] = | |
+{ | |
+ "Date:", | |
+ "Delivery-date:", | |
+ "Message-ID:", | |
+ "X-Sorted:", | |
+ "X-Spam-" | |
+}; | |
+static const uint g_nignoredheaders = sizeof(g_ignoredheaders)/sizeof(cpchar); | |
+ | |
+static inline bool_t is_whitespace( int c ) | |
+{ | |
+ return ( c == ' ' || c == '\t' || c == '\r' ); | |
+} | |
+ | |
+static inline bool_t is_base64char(c) | |
+{ | |
+ return ( isalnum(c) || (c == '/' || c == '+') ); | |
+} | |
+ | |
+static inline bool_t is_wordmidchar(c) | |
+{ | |
+ return ( isalnum(c) || c == '$' || c == '\'' || c == '.' || c == '-' ); | |
+} | |
+ | |
+static inline bool_t is_wordendchar(c) | |
+{ | |
+ return ( isalnum(c) || c == '$' ); | |
+} | |
+ | |
+static inline bool_t is_htmltag( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ int lo, hi, mid, minlen, cmp; | |
+ | |
+ *ptoklen = 0; | |
+ | |
+ hi = g_nhtmltags-1; | |
+ lo = -1; | |
+ while( hi-lo > 1 ) | |
+ { | |
+ mid = (hi+lo)/2; | |
+ minlen = min( strlen(g_htmltags[mid]), len ); | |
+ cmp = strncmp( g_htmltags[mid], p, minlen ); | |
+ if( cmp > 0 || (cmp == 0 && minlen < len && !islower(p[minlen])) ) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ minlen = min( strlen(g_htmltags[hi]), len ); | |
+ if( len == minlen || strncmp(g_htmltags[hi], p, minlen) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ /* check if is_word() will have a longer match */ | |
+ if( is_wordendchar(p[minlen]) ) | |
+ { | |
+ return false; | |
+ } | |
+ if( is_wordmidchar(p[minlen]) && is_wordendchar(p[minlen+1]) ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ *ptoklen = strlen(g_htmltags[hi]); | |
+ | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_htmlcomment( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ *ptoklen = 0; | |
+ | |
+ if( len >=4 && memcmp( p, "<!--", 4 ) == 0 ) | |
+ { | |
+ *ptoklen = 4; | |
+ return true; | |
+ } | |
+ if( len >= 3 && memcmp( p, "-->", 3 ) == 0 ) | |
+ { | |
+ *ptoklen = 3; | |
+ return true; | |
+ } | |
+ | |
+ return false; | |
+} | |
+ | |
+static inline bool_t is_base64( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ *ptoklen = 0; | |
+ while( len > 0 ) | |
+ { | |
+ if( *p != '\n' && *p != '\r' && !is_base64char(*p) ) | |
+ { | |
+ return false; | |
+ } | |
+ p++; | |
+ len--; | |
+ (*ptoklen)++; | |
+ } | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_mimeboundary( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ *ptoklen = 0; | |
+ | |
+ if( len < 3 || p[0] != '-' || p[1] != '-' ) | |
+ { | |
+ return false; | |
+ } | |
+ p += 2; | |
+ len -= 2; | |
+ *ptoklen += 2; | |
+ while( len > 0 ) | |
+ { | |
+ if( is_whitespace(*p) ) | |
+ { | |
+ return false; | |
+ } | |
+ if( *p == '\n' || *p == '\r' ) | |
+ { | |
+ break; | |
+ } | |
+ p++; | |
+ len--; | |
+ (*ptoklen)++; | |
+ } | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_ipaddr( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ uint noctets, ndigits; | |
+ | |
+ *ptoklen = 0; | |
+ | |
+ noctets = 0; | |
+ while( len > 0 && noctets < 4 ) | |
+ { | |
+ ndigits = 0; | |
+ while( len > 0 && isdigit(*p) ) | |
+ { | |
+ ndigits++; | |
+ p++; | |
+ len--; | |
+ (*ptoklen)++; | |
+ } | |
+ if( ndigits == 0 || ndigits > 3 ) | |
+ { | |
+ return false; | |
+ } | |
+ noctets++; | |
+ if( noctets < 4 ) | |
+ { | |
+ if( *p != '.' ) | |
+ { | |
+ return false; | |
+ } | |
+ p++; | |
+ len--; | |
+ (*ptoklen)++; | |
+ } | |
+ } | |
+ if( noctets < 4 ) | |
+ { | |
+ return false; | |
+ } | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_word( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ if( len < 3 ) | |
+ { | |
+ return false; | |
+ } | |
+ if( !(isalpha(*p) || *p == '$') ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = 1; | |
+ p++; | |
+ len--; | |
+ while( len > 0 ) | |
+ { | |
+ if( !is_wordmidchar(*p) ) | |
+ { | |
+ break; | |
+ } | |
+ (*ptoklen)++; | |
+ p++; | |
+ len--; | |
+ } | |
+ while( *ptoklen >= 3 && !is_wordendchar(*(p-1)) ) | |
+ { | |
+ (*ptoklen)--; | |
+ p--; | |
+ len++; | |
+ } | |
+ if( *ptoklen < 3 ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_ignoredheader( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ int lo, hi, mid, minlen, cmp; | |
+ | |
+ hi = g_nignoredheaders-1; | |
+ lo = -1; | |
+ while( hi-lo > 1 ) | |
+ { | |
+ mid = (hi+lo)/2; | |
+ minlen = min( strlen(g_ignoredheaders[mid]), len ); | |
+ cmp = strncasecmp( g_ignoredheaders[mid], p, minlen ); | |
+ if( cmp >= 0 ) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ minlen = min( strlen(g_ignoredheaders[hi]), len ); | |
+ if( len == minlen || strncasecmp(g_ignoredheaders[hi], p, minlen) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_mailerid( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ if( len < 4 || strncmp( p, "\tid ", 4 ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_spamtext( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ if( len < 5 || strncmp( p, "SPAM:", 5 ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_smtpid( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ if( len < 8 || strncmp( p, "SMTP id ", 8 ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_boundaryequal( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ if( len < 9 || strncmp( p, "boundary=", 9 ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = len; | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_nameequal( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ if( len < 6 || strncmp( p, "name=\"", 6 ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = 6; | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_filenameequal( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ if( len < 10 || strncmp( p, "filename=\"", 10 ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = 10; | |
+ return true; | |
+} | |
+ | |
+static inline bool_t is_from( cpchar p, uint len, uint* ptoklen ) | |
+{ | |
+ if( len < 5 || strncmp( p, "From ", 5 ) != 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ *ptoklen = 5; | |
+ return true; | |
+} | |
+ | |
+/*****************************************************************************/ | |
+ | |
+void lex_create( lex_t* pthis, mbox_t mboxtype ) | |
+{ | |
+ pthis->mboxtype = mboxtype; | |
+ pthis->section = envelope; | |
+ pthis->pos = 0; | |
+ pthis->bom = 0; | |
+ pthis->eom = 0; | |
+ pthis->lineend = 0; | |
+ pthis->buflen = 0; | |
+ pthis->pbuf = NULL; | |
+} | |
+ | |
+void lex_destroy( lex_t* pthis ) | |
+{ | |
+ free( pthis->pbuf ); | |
+} | |
+ | |
+bool_t lex_load( lex_t* pthis, int fd ) | |
+{ | |
+ uint nalloc; | |
+ ssize_t nread; | |
+ | |
+ nalloc = IOBUFSIZE; | |
+ pthis->pbuf = (char*)malloc( IOBUFSIZE ); | |
+ if( pthis->pbuf == NULL ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ while( (nread = read( fd, pthis->pbuf + pthis->buflen, nalloc - pthis->buf… | |
+ { | |
+ pthis->buflen += nread; | |
+ if( pthis->buflen == nalloc ) | |
+ { | |
+ char* pnewbuf; | |
+ nalloc += IOBUFSIZE; | |
+ pnewbuf = (char*)realloc( pthis->pbuf, nalloc ); | |
+ if( pnewbuf == NULL ) | |
+ { | |
+ free( pthis->pbuf ); | |
+ pthis->pbuf = NULL; | |
+ return false; | |
+ } | |
+ pthis->pbuf = pnewbuf; | |
+ } | |
+ } | |
+ if( nread < 0 ) | |
+ { | |
+ free( pthis->pbuf ); | |
+ pthis->pbuf = NULL; | |
+ return false; | |
+ } | |
+ if( pthis->mboxtype == detect ) | |
+ { | |
+ if( pthis->buflen > 5 && memcmp( pthis->pbuf, "From ", 5 ) == 0 ) | |
+ { | |
+ verbose( 1, "Input looks like an mbox\n" ); | |
+ pthis->mboxtype = mbox; | |
+ } | |
+ else | |
+ { | |
+ verbose( 1, "Input looks like a maildir\n" ); | |
+ pthis->mboxtype = maildir; | |
+ } | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+static bool_t lex_nextline( lex_t* pthis ) | |
+{ | |
+ cpchar pbuf; | |
+ uint len; | |
+ uint toklen; | |
+ | |
+again: | |
+ /* XXX: use and update pthis->section */ | |
+ pthis->pos = pthis->lineend; | |
+ if( pthis->lineend == pthis->buflen ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ pbuf = pthis->pbuf + pthis->pos; | |
+ len = 0; | |
+ while( pthis->pos + len < pthis->buflen && pbuf[len] != '\n' ) | |
+ { | |
+ len++; | |
+ } | |
+ if( pthis->pos + len < pthis->buflen ) | |
+ { | |
+ len++; /* bump past the LF */ | |
+ } | |
+ | |
+ pthis->lineend = pthis->pos + len; | |
+ | |
+ /* check beginning-of-line patterns */ | |
+ if( is_base64( pbuf, len, &toklen ) || | |
+ is_ignoredheader( pbuf, len, &toklen ) || | |
+ is_mailerid( pbuf, len, &toklen ) || | |
+ is_mimeboundary( pbuf, len, &toklen ) || | |
+ is_spamtext( pbuf, len, &toklen ) ) | |
+ { | |
+ /* ignore line */ | |
+ pthis->pos += toklen; | |
+ goto again; | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+void lex_nexttoken( lex_t* pthis, tok_t* ptok ) | |
+{ | |
+ cpchar pbuf; | |
+ uint len; | |
+ uint toklen; | |
+ | |
+ assert( pthis->pbuf != NULL ); | |
+ | |
+ if( pthis->pos == pthis->eom ) | |
+ { | |
+ pthis->bom = pthis->pos; | |
+ } | |
+ | |
+again: | |
+ /* skip whitespace between tokens */ | |
+ while( pthis->pos != pthis->lineend && is_whitespace(pthis->pbuf[pthis->po… | |
+ { | |
+ pthis->pos++; | |
+ } | |
+ | |
+ pbuf = pthis->pbuf + pthis->pos; | |
+ len = pthis->lineend - pthis->pos; | |
+ | |
+ /* possibilities: end-of-line, html-comment, ipaddr, word, junk */ | |
+ | |
+ if( pthis->pos == pthis->lineend ) | |
+ { | |
+ if( !lex_nextline( pthis ) ) | |
+ { | |
+ pthis->eom = pthis->pos; | |
+ ptok->tt = eof; | |
+ return; | |
+ } | |
+ | |
+ pbuf = pthis->pbuf + pthis->pos; | |
+ len = pthis->lineend - pthis->pos; | |
+ | |
+ if( pthis->mboxtype == mbox ) | |
+ { | |
+ if( is_from( pbuf, len, &toklen ) ) | |
+ { | |
+ pthis->eom = pthis->pos; | |
+ ptok->tt = from; | |
+ ptok->p = pthis->pbuf + pthis->pos; | |
+ ptok->len = toklen; | |
+ pthis->pos += toklen; | |
+ return; | |
+ } | |
+ } | |
+ | |
+ goto again; /* skip lws */ | |
+ } | |
+ | |
+ if( is_htmltag( pbuf, len, &toklen ) || | |
+ is_htmlcomment( pbuf, len, &toklen ) || | |
+ is_smtpid( pbuf, len, &toklen ) || | |
+ is_boundaryequal( pbuf, len, &toklen ) || | |
+ is_nameequal( pbuf, len, &toklen ) || | |
+ is_filenameequal( pbuf, len, &toklen ) ) | |
+ { | |
+ /* ignore it */ | |
+ pthis->pos += toklen; | |
+ goto again; | |
+ } | |
+ | |
+ if( is_ipaddr( pbuf, len, &toklen ) ) | |
+ { | |
+ ptok->tt = word; | |
+ ptok->p = pthis->pbuf + pthis->pos; | |
+ ptok->len = toklen; | |
+ pthis->pos += toklen; | |
+ return; | |
+ } | |
+ if( is_word( pbuf, len, &toklen ) ) | |
+ { | |
+ ptok->tt = word; | |
+ ptok->p = pthis->pbuf + pthis->pos; | |
+ ptok->len = toklen; | |
+ pthis->pos += toklen; | |
+ if( toklen > MAXWORDLEN ) | |
+ { | |
+ goto again; | |
+ } | |
+ return; | |
+ } | |
+ | |
+ /* junk */ | |
+ pthis->pos++; | |
+ goto again; | |
+} | |
+ | |
+/* SpamAssassin style passthru */ | |
+void lex_passthru( lex_t* pthis, bool_t is_spam, double hits ) | |
+{ | |
+ char szbuf[256]; | |
+ bool_t in_headers = true; | |
+ | |
+ assert( pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen ); | |
+ assert( pthis->bom <= pthis->eom ); | |
+ | |
+ pthis->pos = pthis->bom; | |
+ if( is_spam ) | |
+ { | |
+ sprintf( szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests=bmf\n" | |
+ "X-Spam-Flag: YES\n", | |
+ hits, SPAM_CUTOFF ); | |
+ } | |
+ else | |
+ { | |
+ sprintf( szbuf, "X-Spam-Status: No, hits=%f required=%f\n", | |
+ hits, SPAM_CUTOFF ); | |
+ } | |
+ | |
+ /* existing headers */ | |
+ while( in_headers && pthis->pos < pthis->eom ) | |
+ { | |
+ cpchar pbuf = pthis->pbuf + pthis->pos; | |
+ uint len = 0; | |
+ while( pthis->pos + len < pthis->buflen && pbuf[len] != '\n' ) | |
+ { | |
+ len++; | |
+ } | |
+ if( pthis->pos + len < pthis->buflen ) | |
+ { | |
+ len++; /* bump past the LF */ | |
+ } | |
+ | |
+ /* check for end of headers */ | |
+ if( pbuf[0] == '\n' || (pbuf[0] == '\r' && pbuf[1] == '\n') ) | |
+ { | |
+ /* end of headers */ | |
+ break; | |
+ } | |
+ | |
+ /* write header, ignoring existing spam headers */ | |
+ if( strncasecmp( pbuf, "X-Spam-", 7 ) != 0 ) | |
+ { | |
+ write( STDOUT_FILENO, pbuf, len ); | |
+ } | |
+ | |
+ pthis->pos += len; | |
+ } | |
+ | |
+ /* new headers */ | |
+ write( STDOUT_FILENO, szbuf, strlen(szbuf) ); | |
+ | |
+ /* remainder */ | |
+ if( pthis->pos < pthis->eom ) | |
+ { | |
+ write( STDOUT_FILENO, pthis->pbuf+pthis->pos, pthis->eom-pthis->pos ); | |
+ } | |
+ pthis->bom = pthis->eom; | |
+} | |
+ | |
+#ifdef UNIT_TEST | |
+ | |
+int main( int argc, char** argv ) | |
+{ | |
+ int fd; | |
+ lex_t lex; | |
+ tok_t tok; | |
+ | |
+ fd = STDIN_FILENO; | |
+ if( argc == 2 ) | |
+ { | |
+ fd = open( argv[1], O_RDONLY ); | |
+ } | |
+ | |
+ lex_create( &lex ); | |
+ if( ! lex_load( &lex, fd ) ) | |
+ { | |
+ fprintf( stderr, "cannot load file\n" ); | |
+ exit( 1 ); | |
+ } | |
+ | |
+ lex_nexttoken( &lex, &tok ); | |
+ while( tok.tt != eof ) | |
+ { | |
+ char sztok[64]; | |
+ if( tok.len > MAXWORDLEN ) | |
+ { | |
+ printf( "*** token too long! ***\n" ); | |
+ exit( 1 ); | |
+ } | |
+ | |
+ memcpy( sztok, tok.p, tok.len ); | |
+ strlwr( sztok ); | |
+ sztok[tok.len] = '\0'; | |
+ printf( "get_token: %d '%s'\n", tok.tt, sztok ); | |
+ | |
+ lex_nexttoken( &lex, &tok ); | |
+ } | |
+ | |
+ lex_destroy( &lex ); | |
+ return 0; | |
+} | |
+ | |
+#endif /* def UNIT_TEST */ | |
diff --git a/lex.h b/lex.h | |
@@ -0,0 +1,44 @@ | |
+/* $Id: lex.h,v 1.4 2002/10/12 17:36:41 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _TOK_H | |
+#define _TOK_H | |
+ | |
+typedef enum { from, eof, word } toktype_t; | |
+ | |
+typedef struct _tok | |
+{ | |
+ toktype_t tt; /* token type */ | |
+ char* p; | |
+ uint len; | |
+} tok_t; | |
+ | |
+typedef enum { envelope, hdrs, body } msgsec_t; | |
+ | |
+typedef struct _lex | |
+{ | |
+ mbox_t mboxtype; | |
+ msgsec_t section; /* current section (envelope, headers, body) */ | |
+ uint pos; /* current position */ | |
+ uint bom; /* beginning of message */ | |
+ uint eom; /* end of current message (start of next) */ | |
+ uint lineend; /* line end (actually, start of next line) */ | |
+ uint buflen; /* length of buffer */ | |
+ char* pbuf; | |
+} lex_t; | |
+ | |
+void lex_create ( lex_t* plex, mbox_t mboxtype ); | |
+void lex_destroy ( lex_t* plex ); | |
+ | |
+bool_t lex_load ( lex_t* plex, int fd ); | |
+void lex_nexttoken( lex_t* plex, tok_t* ptok ); | |
+ | |
+void lex_passthru ( lex_t* plex, bool_t is_spam, double hits ); | |
+ | |
+#endif /* ndef TOK_H */ | |
diff --git a/str.c b/str.c | |
@@ -0,0 +1,78 @@ | |
+/* $Id: str.c,v 1.2 2002/10/14 07:09:51 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+ | |
+void strlwr( char* s ) | |
+{ | |
+ while( *s != '\0' ) | |
+ { | |
+ *s = tolower(*s); | |
+ s++; | |
+ } | |
+} | |
+ | |
+void strcpylwr( char* d, const char* s ) | |
+{ | |
+ while( *s != '\0' ) | |
+ { | |
+ *d++ = tolower(*s++); | |
+ } | |
+} | |
+ | |
+void strncpylwr( char* d, const char* s, int n ) | |
+{ | |
+ while( n-- ) | |
+ { | |
+ *d++ = tolower(*s++); | |
+ } | |
+} | |
+ | |
+void str_create( str_t* pstr ) | |
+{ | |
+ pstr->p = NULL; | |
+ pstr->len = 0; | |
+} | |
+ | |
+void str_destroy( str_t* pstr ) | |
+{ | |
+ /* empty */ | |
+} | |
+ | |
+int str_cmp( const str_t* pthis, const str_t* pother ) | |
+{ | |
+ uint minlen = min( pthis->len, pother->len ); | |
+ int cmp; | |
+ assert( pthis->p != NULL && pother->p != NULL && minlen != 0 ); | |
+ | |
+ cmp = strncmp( pthis->p, pother->p, minlen ); | |
+ | |
+ if( cmp == 0 && pthis->len != pother->len ) | |
+ { | |
+ cmp = (pthis->len < pother->len) ? -1 : 1; | |
+ } | |
+ return cmp; | |
+} | |
+ | |
+int str_casecmp( const str_t* pthis, const str_t* pother ) | |
+{ | |
+ uint minlen = min( pthis->len, pother->len ); | |
+ int cmp; | |
+ assert( pthis->p != NULL && pother->p != NULL && minlen != 0 ); | |
+ | |
+ cmp = strncasecmp( pthis->p, pother->p, minlen ); | |
+ | |
+ if( cmp == 0 && pthis->len != pother->len ) | |
+ { | |
+ cmp = (pthis->len < pother->len) ? -1 : 1; | |
+ } | |
+ return cmp; | |
+} | |
diff --git a/str.h b/str.h | |
@@ -0,0 +1,30 @@ | |
+/* $Id: str.h,v 1.1.1.1 2002/09/30 21:08:29 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _STR_H | |
+#define _STR_H | |
+ | |
+/* a couple of generic string functions... */ | |
+void strlwr( char* s ); | |
+void strcpylwr( char* d, const char* s ); | |
+void strncpylwr( char* d, const char* s, int n ); | |
+ | |
+typedef struct _str | |
+{ | |
+ char* p; | |
+ uint len; | |
+} str_t; | |
+ | |
+void str_create ( str_t* pthis ); | |
+void str_destroy( str_t* pthis ); | |
+ | |
+int str_cmp ( const str_t* pthis, const str_t* pother ); | |
+int str_casecmp( const str_t* pthis, const str_t* pother ); | |
+ | |
+#endif /* ndef _STR_H */ | |
diff --git a/vec.c b/vec.c | |
@@ -0,0 +1,345 @@ | |
+/* $Id: vec.c,v 1.4 2002/10/20 18:19:17 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ * | |
+ * vec.c: vector functions for bmf. | |
+ * Vectors are used to hold token lists for input data and flatfile database | |
+ * entries in standalone mode. They dramatically reduce the number of small | |
+ * mallocs and, if used properly, have no performance penalty over fancier | |
+ * data structures. | |
+ */ | |
+ | |
+#include "config.h" | |
+#include "dbg.h" | |
+#include "str.h" | |
+#include "lex.h" | |
+#include "vec.h" | |
+ | |
+/***************************************************************************** | |
+ * vector | |
+ */ | |
+ | |
+void vec_create( vec_t* pthis ) | |
+{ | |
+ pthis->nalloc = VEC_INITIAL_SIZE; | |
+ pthis->nitems = 0; | |
+ pthis->pitems = (str_t*)malloc( VEC_INITIAL_SIZE*sizeof(str_t) ); | |
+} | |
+ | |
+void vec_destroy( vec_t* pthis ) | |
+{ | |
+ free( pthis->pitems ); | |
+} | |
+ | |
+static void vec_setsize( vec_t* pthis, uint nsize ) | |
+{ | |
+ if( nsize > pthis->nalloc ) | |
+ { | |
+ uint nnewalloc; | |
+ str_t* pnewitems; | |
+ uint n; | |
+ | |
+ nnewalloc = pthis->nalloc * 2; | |
+ if( nnewalloc < nsize ) nnewalloc = nsize; | |
+ pnewitems = (str_t*)realloc( pthis->pitems, nnewalloc*sizeof(str_t) ); | |
+ if( pnewitems == NULL ) | |
+ { | |
+ exit( 2 ); | |
+ } | |
+ for( n = pthis->nitems; n < nsize; n++ ) | |
+ { | |
+ str_create( &pnewitems[n] ); | |
+ } | |
+ pthis->pitems = pnewitems; | |
+ pthis->nalloc = nnewalloc; | |
+ } | |
+} | |
+ | |
+void vec_addhead( vec_t* pthis, str_t* pstr ) | |
+{ | |
+ assert( pstr->p != NULL && pstr->len > 0 ); | |
+ | |
+ vec_setsize( pthis, pthis->nitems+1 ); | |
+ memmove( &pthis->pitems[1], &pthis->pitems[0], pthis->nitems*sizeof(str_t)… | |
+ pthis->pitems[0] = *pstr; | |
+ pthis->nitems++; | |
+} | |
+ | |
+void vec_addtail( vec_t* pthis, str_t* pstr ) | |
+{ | |
+ assert( pstr->p != NULL && pstr->len > 0 ); | |
+ | |
+ vec_setsize( pthis, pthis->nitems+1 ); | |
+ pthis->pitems[pthis->nitems] = *pstr; | |
+ pthis->nitems++; | |
+} | |
+ | |
+void vec_delhead( vec_t* pthis ) | |
+{ | |
+ assert( pthis->nitems > 0 ); | |
+ pthis->nitems--; | |
+ memmove( &pthis->pitems[0], &pthis->pitems[1], pthis->nitems*sizeof(str_t)… | |
+} | |
+ | |
+void vec_deltail( vec_t* pthis ) | |
+{ | |
+ assert( pthis->nitems > 0 ); | |
+ pthis->nitems--; | |
+} | |
+ | |
+void vec_first( vec_t* pthis, veciter_t* piter ) | |
+{ | |
+ piter->plist = pthis; | |
+ piter->index = 0; | |
+} | |
+ | |
+void vec_last( vec_t* pthis, veciter_t* piter ) | |
+{ | |
+ piter->plist = pthis; | |
+ piter->index = pthis->nitems; | |
+} | |
+ | |
+/***************************************************************************** | |
+ * sorted vector | |
+ */ | |
+ | |
+static int svec_compare( const void* p1, const void* p2 ) | |
+{ | |
+ return str_casecmp( (const str_t*)p1, (const str_t*)p2 ); | |
+} | |
+ | |
+void svec_add( vec_t* pthis, str_t* pstr ) | |
+{ | |
+ int lo, hi, mid; | |
+ veciter_t iter; | |
+ | |
+ if( pthis->nitems == 0 ) | |
+ { | |
+ vec_addtail( pthis, pstr ); | |
+ return; | |
+ } | |
+ | |
+ if( str_casecmp( pstr, &pthis->pitems[0] ) < 0 ) | |
+ { | |
+ vec_addhead( pthis, pstr ); | |
+ return; | |
+ } | |
+ | |
+ hi = pthis->nitems - 1; | |
+ lo = -1; | |
+ while( hi-lo > 1 ) | |
+ { | |
+ mid = (hi+lo)/2; | |
+ if( str_casecmp( pstr, &pthis->pitems[mid] ) <= 0 ) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ assert( hi < pthis->nitems ); | |
+ | |
+ iter.plist = pthis; | |
+ iter.index = hi; | |
+ | |
+ if( str_casecmp( pstr, &pthis->pitems[hi] ) < 0 ) | |
+ { | |
+ veciter_addbefore( &iter, pstr ); | |
+ } | |
+ else | |
+ { | |
+ veciter_addafter( &iter, pstr ); | |
+ } | |
+} | |
+ | |
+str_t* svec_find( vec_t* pthis, str_t* pstr ) | |
+{ | |
+ int lo, hi, mid; | |
+ | |
+ if( pthis->nitems == 0 ) | |
+ { | |
+ return NULL; | |
+ } | |
+ | |
+ hi = pthis->nitems - 1; | |
+ lo = -1; | |
+ while( hi-lo > 1 ) | |
+ { | |
+ mid = (hi+lo)/2; | |
+ if( str_casecmp( pstr, &pthis->pitems[mid] ) <= 0 ) | |
+ hi = mid; | |
+ else | |
+ lo = mid; | |
+ } | |
+ assert( hi >= 0 && hi < pthis->nitems ); | |
+ | |
+ if( str_casecmp( pstr, &pthis->pitems[hi] ) != 0 ) | |
+ { | |
+ return NULL; | |
+ } | |
+ | |
+ return &pthis->pitems[hi]; | |
+} | |
+ | |
+void svec_sort( vec_t* pthis ) | |
+{ | |
+ if( pthis->nitems > 1 ) | |
+ { | |
+ qsort( pthis->pitems, pthis->nitems, sizeof(str_t), svec_compare ); | |
+ } | |
+} | |
+ | |
+/***************************************************************************** | |
+ * vector iterator | |
+ */ | |
+ | |
+void veciter_destroy( veciter_t* pthis ) | |
+{ | |
+ /* empty */ | |
+} | |
+ | |
+str_t* veciter_get( veciter_t* pthis ) | |
+{ | |
+ if( pthis->plist == NULL || pthis->index >= pthis->plist->nitems ) | |
+ { | |
+ return NULL; | |
+ } | |
+ | |
+ return &pthis->plist->pitems[pthis->index]; | |
+} | |
+ | |
+bool_t veciter_equal( veciter_t* pthis, veciter_t* pthat ) | |
+{ | |
+ if( pthis->plist != pthat->plist || | |
+ pthis->index != pthat->index ) | |
+ { | |
+ return false; | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+bool_t veciter_hasitem( veciter_t* pthis ) | |
+{ | |
+ if( pthis->plist == NULL || pthis->index >= pthis->plist->nitems ) | |
+ { | |
+ return false; | |
+ } | |
+ return true; | |
+} | |
+ | |
+bool_t veciter_prev( veciter_t* pthis ) | |
+{ | |
+ if( pthis->index == 0 ) | |
+ { | |
+ return false; | |
+ } | |
+ pthis->index--; | |
+ return true; | |
+} | |
+ | |
+bool_t veciter_next( veciter_t* pthis ) | |
+{ | |
+ pthis->index++; | |
+ if( pthis->index == pthis->plist->nitems ) | |
+ { | |
+ return false; | |
+ } | |
+ return true; | |
+} | |
+ | |
+void veciter_addafter( veciter_t* pthis, str_t* pstr ) | |
+{ | |
+ str_t* pitems; | |
+ | |
+ vec_setsize( pthis->plist, pthis->plist->nitems+1 ); | |
+ assert( pthis->index < pthis->plist->nitems ); | |
+ pitems = pthis->plist->pitems; | |
+ | |
+ if( pthis->index != pthis->plist->nitems-1 ) | |
+ { | |
+ memmove( &pitems[pthis->index+2], &pitems[pthis->index+1], | |
+ (pthis->plist->nitems-pthis->index-1) * sizeof(str_t) ); | |
+ } | |
+ | |
+ pitems[pthis->index+1] = *pstr; | |
+ pthis->plist->nitems++; | |
+} | |
+ | |
+void veciter_addbefore( veciter_t* pthis, str_t* pstr ) | |
+{ | |
+ str_t* pitems; | |
+ | |
+ vec_setsize( pthis->plist, pthis->plist->nitems+1 ); | |
+ assert( pthis->index < pthis->plist->nitems ); | |
+ pitems = pthis->plist->pitems; | |
+ | |
+ memmove( &pitems[pthis->index+1], &pitems[pthis->index], | |
+ (pthis->plist->nitems-pthis->index) * sizeof(str_t) ); | |
+ | |
+ pitems[pthis->index] = *pstr; | |
+ pthis->plist->nitems++; | |
+} | |
+ | |
+void veciter_del( veciter_t* pthis ) | |
+{ | |
+ str_t* pitems; | |
+ | |
+ assert( pthis->plist->nitems > 0 ); | |
+ pthis->plist->nitems--; | |
+ if( pthis->index < pthis->plist->nitems ) | |
+ { | |
+ pitems = pthis->plist->pitems; | |
+ memmove( &pitems[pthis->index], &pitems[pthis->index+1], | |
+ (pthis->plist->nitems-pthis->index) * sizeof(str_t) ); | |
+ } | |
+} | |
+ | |
+#ifdef UNIT_TEST | |
+int main( int argc, char** argv ) | |
+{ | |
+ vec_t vl; | |
+ veciter_t iter; | |
+ str_t* pstr; | |
+ uint n; | |
+ | |
+ if( argc != 2 ) | |
+ { | |
+ fprintf( stderr, "usage: %s <file>\n", argv[0] ); | |
+ return 1; | |
+ } | |
+ | |
+ for( n = 0; n < 100; n++ ) | |
+ { | |
+ vec_create( &vl ); | |
+ vec_load( &vl, argv[1] ); | |
+ | |
+ vec_first( &vl, &iter ); | |
+ while( (pstr = veciter_get( &iter )) != NULL ) | |
+ { | |
+ char buf[256]; | |
+ char* p; | |
+ if( pstr->len > 200 ) | |
+ { | |
+ fprintf( stderr, "str too long: %u chars\n", pstr->len ); | |
+ break; | |
+ } | |
+ p = buf; | |
+ strcpy( buf, "str: " ); | |
+ p += 6; | |
+ memcpy( p, pstr->p, pstr->len ); | |
+ p += pstr->len; | |
+ sprintf( p, " %u", pstr->count ); | |
+ puts( buf ); | |
+ | |
+ veciter_next( &iter ); | |
+ } | |
+ | |
+ vec_destroy( &vl ); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+#endif /* def UNIT_TEST */ | |
diff --git a/vec.h b/vec.h | |
@@ -0,0 +1,58 @@ | |
+/* $Id: vec.h,v 1.3 2002/10/20 18:19:17 tommy Exp $ */ | |
+ | |
+/* | |
+ * Copyright (c) 2002 Tom Marshall <[email protected]> | |
+ * | |
+ * This program is free software. It may be distributed under the terms | |
+ * in the file LICENSE, found in the top level of the distribution. | |
+ */ | |
+ | |
+#ifndef _VEC_H | |
+#define _VEC_H | |
+ | |
+/* item count for initial alloc */ | |
+#define VEC_INITIAL_SIZE 256 | |
+ | |
+typedef struct _vec | |
+{ | |
+ uint nalloc; /* items alloced in pitems */ | |
+ uint nitems; /* items available */ | |
+ str_t* pitems; /* growing vector of items */ | |
+} vec_t; | |
+ | |
+typedef struct _veciter | |
+{ | |
+ struct _vec* plist; | |
+ uint index; | |
+} veciter_t; | |
+ | |
+/* class vector */ | |
+void vec_create ( vec_t* pthis ); | |
+void vec_destroy ( vec_t* pthis ); | |
+ | |
+void vec_addhead ( vec_t* pthis, str_t* pstr ); | |
+void vec_addtail ( vec_t* pthis, str_t* pstr ); | |
+void vec_delhead ( vec_t* pthis ); | |
+void vec_deltail ( vec_t* pthis ); | |
+ | |
+void vec_first ( vec_t* pthis, veciter_t* piter ); | |
+void vec_last ( vec_t* pthis, veciter_t* piter ); | |
+ | |
+/* class sorted_vector */ | |
+void svec_add ( vec_t* pthis, str_t* pstr ); | |
+str_t* svec_find ( vec_t* pthis, str_t* pstr ); | |
+void svec_sort ( vec_t* ptthis ); | |
+ | |
+/* veciter_create not needed */ | |
+void veciter_destroy ( veciter_t* pthis ); | |
+ | |
+str_t* veciter_get ( veciter_t* pthis ); | |
+bool_t veciter_equal ( veciter_t* pthis, veciter_t* pthat ); | |
+bool_t veciter_hasitem ( veciter_t* pthis ); | |
+bool_t veciter_prev ( veciter_t* pthis ); | |
+bool_t veciter_next ( veciter_t* pthis ); | |
+void veciter_addafter ( veciter_t* pthis, str_t* pstr ); | |
+void veciter_addbefore( veciter_t* pthis, str_t* pstr ); | |
+void veciter_del ( veciter_t* pthis ); | |
+ | |
+#endif /* ndef _VEC_H */ |