GopherProxy

	import bmf 0.9.4 - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches
	git clone git://git.codemadness.org/bmf
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 0983b0f64c3e1bf7fa03f2a4060e6f25e9e79cef
	Author: Hiltjo Posthuma <[email protected]>
	Date: Sat, 22 Sep 2018 17:46:14 +0200

	import bmf 0.9.4

	Diffstat:
	A AUTHORS \| 4 ++++
	A ChangeLog \| 95 ++++++++++++++++++++++++++++++
	A LICENSE \| 340 +++++++++++++++++++++++++++++…
	A Makefile.in \| 81 ++++++++++++++++++++++++++++++
	A README \| 130 +++++++++++++++++++++++++++++…
	A TODO \| 4 ++++
	A bmf.1 \| 148 +++++++++++++++++++++++++++++…
	A bmf.c \| 339 +++++++++++++++++++++++++++++…
	A bmf.spec.in \| 64 +++++++++++++++++++++++++++++…
	A bmfconv.1 \| 81 ++++++++++++++++++++++++++++++
	A bmfconv.c \| 169 +++++++++++++++++++++++++++++…
	A config.h \| 80 +++++++++++++++++++++++++++++…
	A configure \| 354 +++++++++++++++++++++++++++++…
	A dbdb.c \| 684 +++++++++++++++++++++++++++++…
	A dbdb.h \| 61 +++++++++++++++++++++++++++++…
	A dbg.c \| 302 +++++++++++++++++++++++++++++…
	A dbg.h \| 35 +++++++++++++++++++++++++++++…
	A dbh.c \| 74 +++++++++++++++++++++++++++++…
	A dbh.h \| 56 +++++++++++++++++++++++++++++…
	A dbmysql.c \| 545 +++++++++++++++++++++++++++++…
	A dbmysql.h \| 60 +++++++++++++++++++++++++++++…
	A dbtext.c \| 591 +++++++++++++++++++++++++++++…
	A dbtext.h \| 53 ++++++++++++++++++++++++++++++
	A filt.c \| 175 +++++++++++++++++++++++++++++…
	A filt.h \| 31 +++++++++++++++++++++++++++++…
	A lex.c \| 787 +++++++++++++++++++++++++++++…
	A lex.h \| 44 +++++++++++++++++++++++++++++…
	A str.c \| 78 +++++++++++++++++++++++++++++…
	A str.h \| 30 ++++++++++++++++++++++++++++++
	A vec.c \| 345 +++++++++++++++++++++++++++++…
	A vec.h \| 58 ++++++++++++++++++++++++++++++

	31 files changed, 5898 insertions(+), 0 deletions(-)
	---
	diff --git a/AUTHORS b/AUTHORS
	@@ -0,0 +1,4 @@
	+# $Id: AUTHORS,v 1.1.1.1 2002/09/30 21:08:29 tommy Exp $
	+
	+Tom Marshall <[email protected]>
	+ Initial version
	diff --git a/ChangeLog b/ChangeLog
	@@ -0,0 +1,95 @@
	+Revision history for bmf:
	+
	+0.9.4: 20 Oct 2002
	+ * Remove X-RBL-Warning from ignored headers.
	+
	+0.9.4pre7: 20 Oct 2002
	+ * Update documentation.
	+
	+0.9.4pre6: 20 Oct 2002
	+ * Move Bayes stuff into its own file.
	+
	+0.9.4pre5: 20 Oct 2002
	+ * Fix NaN exception: if list is empty, use zero for probability.
	+ * Make extrema array (keepers) variable size. Needs more work.
	+
	+0.9.4pre4: 19 Oct 2002
	+ * Add configure section for Darwin.
	+ * Don't use file locking on Darwin, it is not supported.
	+
	+0.9.4pre3: 19 Oct 2002
	+ * Fixup configure script for OSF1.
	+ * which(1) always returns 0 on OSF1, use type(1) instead.
	+ * Add SYSLIBS to the makefile.
	+ * Fix gcc-ism in dbg.c (ptr arithmetic on void*).
	+ * Fix off-by-one in html tag check.
	+ * Fix unaligned access in libdb.
	+
	+0.9.4pre2: 18 Oct 2002
	+ * Fix bug in -d handling for text and libdb.
	+ (Found by Bj�rn Kalkbrenn)
	+
	+0.9.4pre1: 17 Oct 2002
	+ * Autodetect mailbox type and deprecate the -m option.
	+
	+0.9.3: 14 Oct 2002
	+ * Ditch the builtin libdb locks, use fcntl instead.
	+ * Fix memory leak in dbtext.
	+ * Fix some trivial issues with the lexer:
	+ - Be more strict about recognizing IP addresses.
	+ - Do case-insensitive header name comparisons.
	+ * Fix multiple database closure with mbox format.
	+ * Fix a bogus assert in passthrough.
	+ * Add verbose flag (no functionality yet).
	+ * Add heap checking in debug mode.
	+ * Fix bug in -N mode which made it act the same as -S.
	+ * Add X-RBL-Warning to ignored headers.
	+ * Support maildir style folders.
	+
	+0.9.2: 12 Oct 2002
	+ * Fix bug in multiple message registration.
	+
	+0.9.1: 12 Oct 2002
	+ * Improve error reporting and clarify some messages.
	+ * Package preformatted manpage instead of XML.
	+ * Remove single message per invocation restriction.
	+
	+0.84: 09 Oct 2002
	+ * Fix linker flags for autodetected libdb 4.1 in /usr/local on BSD.
	+
	+0.84pre3: 07 Oct 2002
	+ * Yet another libdb api fix. DB->open() in 4.0 is the same as 3.x.
	+
	+0.84pre2: 07 Oct 2002
	+ * Fix bug in dbdb (v1 only) that prevented file locking on FreeBSD.
	+
	+0.84pre1: 07 Oct 2002
	+ * Fix bug in dbtext that caused segfault searching an empty list.
	+
	+0.83: 07 Oct 2002
	+ * Fix bug preventing creation of libdb files when using -n or -s.
	+ * Fix bug in libdb unmergeclose function logic. Users are strongly
	+ encouraged to delete and rebuild word lists if possible. If not
	+ possible, at least export to text and remove entries
	+ that represent unsigned underflow (eg. 4.2 billion).
	+
	+0.82: 06 Oct 2002
	+ * Support GNU style --with-package=path options.
	+ (copied from autoconf output)
	+ * Use mysql_config in configure script.
	+ * Support libdb v1 and v4.
	+ * Find and use BerkeleyDB 4.1 in *BSD.
	+ (thanks to [email protected])
	+ * Clarify and robustify argument handling (fixes pr618875).
	+
	+0.81: 03 Oct 2002
	+ * Add configure script and conditionally compile libdb and mysql.
	+ * Add manpage for bmfconv.
	+ * Cleanup manpage for bmf.
	+ * Rearrange makefile a bit.
	+ * Remove -f text options in bmfconv, it is not supported.
	+ * Fix mysql typo that prevented linking.
	+ * Tweak X-Spam headers a bit.
	+
	+0.80: 02 Oct 2002
	+ * Initial release.
	diff --git a/LICENSE b/LICENSE
	@@ -0,0 +1,340 @@
	+ GNU GENERAL PUBLIC LICENSE
	+ Version 2, June 1991
	+
	+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
	+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	+ Everyone is permitted to copy and distribute verbatim copies
	+ of this license document, but changing it is not allowed.
	+
	+ Preamble
	+
	+ The licenses for most software are designed to take away your
	+freedom to share and change it. By contrast, the GNU General Public
	+License is intended to guarantee your freedom to share and change free
	+software--to make sure the software is free for all its users. This
	+General Public License applies to most of the Free Software
	+Foundation's software and to any other program whose authors commit to
	+using it. (Some other Free Software Foundation software is covered by
	+the GNU Library General Public License instead.) You can apply it to
	+your programs, too.
	+
	+ When we speak of free software, we are referring to freedom, not
	+price. Our General Public Licenses are designed to make sure that you
	+have the freedom to distribute copies of free software (and charge for
	+this service if you wish), that you receive source code or can get it
	+if you want it, that you can change the software or use pieces of it
	+in new free programs; and that you know you can do these things.
	+
	+ To protect your rights, we need to make restrictions that forbid
	+anyone to deny you these rights or to ask you to surrender the rights.
	+These restrictions translate to certain responsibilities for you if you
	+distribute copies of the software, or if you modify it.
	+
	+ For example, if you distribute copies of such a program, whether
	+gratis or for a fee, you must give the recipients all the rights that
	+you have. You must make sure that they, too, receive or can get the
	+source code. And you must show them these terms so they know their
	+rights.
	+
	+ We protect your rights with two steps: (1) copyright the software, and
	+(2) offer you this license which gives you legal permission to copy,
	+distribute and/or modify the software.
	+
	+ Also, for each author's protection and ours, we want to make certain
	+that everyone understands that there is no warranty for this free
	+software. If the software is modified by someone else and passed on, we
	+want its recipients to know that what they have is not the original, so
	+that any problems introduced by others will not reflect on the original
	+authors' reputations.
	+
	+ Finally, any free program is threatened constantly by software
	+patents. We wish to avoid the danger that redistributors of a free
	+program will individually obtain patent licenses, in effect making the
	+program proprietary. To prevent this, we have made it clear that any
	+patent must be licensed for everyone's free use or not licensed at all.
	+
	+ The precise terms and conditions for copying, distribution and
	+modification follow.
	+
	+ GNU GENERAL PUBLIC LICENSE
	+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
	+
	+ 0. This License applies to any program or other work which contains
	+a notice placed by the copyright holder saying it may be distributed
	+under the terms of this General Public License. The "Program", below,
	+refers to any such program or work, and a "work based on the Program"
	+means either the Program or any derivative work under copyright law:
	+that is to say, a work containing the Program or a portion of it,
	+either verbatim or with modifications and/or translated into another
	+language. (Hereinafter, translation is included without limitation in
	+the term "modification".) Each licensee is addressed as "you".
	+
	+Activities other than copying, distribution and modification are not
	+covered by this License; they are outside its scope. The act of
	+running the Program is not restricted, and the output from the Program
	+is covered only if its contents constitute a work based on the
	+Program (independent of having been made by running the Program).
	+Whether that is true depends on what the Program does.
	+
	+ 1. You may copy and distribute verbatim copies of the Program's
	+source code as you receive it, in any medium, provided that you
	+conspicuously and appropriately publish on each copy an appropriate
	+copyright notice and disclaimer of warranty; keep intact all the
	+notices that refer to this License and to the absence of any warranty;
	+and give any other recipients of the Program a copy of this License
	+along with the Program.
	+
	+You may charge a fee for the physical act of transferring a copy, and
	+you may at your option offer warranty protection in exchange for a fee.
	+
	+ 2. You may modify your copy or copies of the Program or any portion
	+of it, thus forming a work based on the Program, and copy and
	+distribute such modifications or work under the terms of Section 1
	+above, provided that you also meet all of these conditions:
	+
	+ a) You must cause the modified files to carry prominent notices
	+ stating that you changed the files and the date of any change.
	+
	+ b) You must cause any work that you distribute or publish, that in
	+ whole or in part contains or is derived from the Program or any
	+ part thereof, to be licensed as a whole at no charge to all third
	+ parties under the terms of this License.
	+
	+ c) If the modified program normally reads commands interactively
	+ when run, you must cause it, when started running for such
	+ interactive use in the most ordinary way, to print or display an
	+ announcement including an appropriate copyright notice and a
	+ notice that there is no warranty (or else, saying that you provide
	+ a warranty) and that users may redistribute the program under
	+ these conditions, and telling the user how to view a copy of this
	+ License. (Exception: if the Program itself is interactive but
	+ does not normally print such an announcement, your work based on
	+ the Program is not required to print an announcement.)
	+
	+These requirements apply to the modified work as a whole. If
	+identifiable sections of that work are not derived from the Program,
	+and can be reasonably considered independent and separate works in
	+themselves, then this License, and its terms, do not apply to those
	+sections when you distribute them as separate works. But when you
	+distribute the same sections as part of a whole which is a work based
	+on the Program, the distribution of the whole must be on the terms of
	+this License, whose permissions for other licensees extend to the
	+entire whole, and thus to each and every part regardless of who wrote it.
	+
	+Thus, it is not the intent of this section to claim rights or contest
	+your rights to work written entirely by you; rather, the intent is to
	+exercise the right to control the distribution of derivative or
	+collective works based on the Program.
	+
	+In addition, mere aggregation of another work not based on the Program
	+with the Program (or with a work based on the Program) on a volume of
	+a storage or distribution medium does not bring the other work under
	+the scope of this License.
	+
	+ 3. You may copy and distribute the Program (or a work based on it,
	+under Section 2) in object code or executable form under the terms of
	+Sections 1 and 2 above provided that you also do one of the following:
	+
	+ a) Accompany it with the complete corresponding machine-readable
	+ source code, which must be distributed under the terms of Sections
	+ 1 and 2 above on a medium customarily used for software interchange; or,
	+
	+ b) Accompany it with a written offer, valid for at least three
	+ years, to give any third party, for a charge no more than your
	+ cost of physically performing source distribution, a complete
	+ machine-readable copy of the corresponding source code, to be
	+ distributed under the terms of Sections 1 and 2 above on a medium
	+ customarily used for software interchange; or,
	+
	+ c) Accompany it with the information you received as to the offer
	+ to distribute corresponding source code. (This alternative is
	+ allowed only for noncommercial distribution and only if you
	+ received the program in object code or executable form with such
	+ an offer, in accord with Subsection b above.)
	+
	+The source code for a work means the preferred form of the work for
	+making modifications to it. For an executable work, complete source
	+code means all the source code for all modules it contains, plus any
	+associated interface definition files, plus the scripts used to
	+control compilation and installation of the executable. However, as a
	+special exception, the source code distributed need not include
	+anything that is normally distributed (in either source or binary
	+form) with the major components (compiler, kernel, and so on) of the
	+operating system on which the executable runs, unless that component
	+itself accompanies the executable.
	+
	+If distribution of executable or object code is made by offering
	+access to copy from a designated place, then offering equivalent
	+access to copy the source code from the same place counts as
	+distribution of the source code, even though third parties are not
	+compelled to copy the source along with the object code.
	+
	+ 4. You may not copy, modify, sublicense, or distribute the Program
	+except as expressly provided under this License. Any attempt
	+otherwise to copy, modify, sublicense or distribute the Program is
	+void, and will automatically terminate your rights under this License.
	+However, parties who have received copies, or rights, from you under
	+this License will not have their licenses terminated so long as such
	+parties remain in full compliance.
	+
	+ 5. You are not required to accept this License, since you have not
	+signed it. However, nothing else grants you permission to modify or
	+distribute the Program or its derivative works. These actions are
	+prohibited by law if you do not accept this License. Therefore, by
	+modifying or distributing the Program (or any work based on the
	+Program), you indicate your acceptance of this License to do so, and
	+all its terms and conditions for copying, distributing or modifying
	+the Program or works based on it.
	+
	+ 6. Each time you redistribute the Program (or any work based on the
	+Program), the recipient automatically receives a license from the
	+original licensor to copy, distribute or modify the Program subject to
	+these terms and conditions. You may not impose any further
	+restrictions on the recipients' exercise of the rights granted herein.
	+You are not responsible for enforcing compliance by third parties to
	+this License.
	+
	+ 7. If, as a consequence of a court judgment or allegation of patent
	+infringement or for any other reason (not limited to patent issues),
	+conditions are imposed on you (whether by court order, agreement or
	+otherwise) that contradict the conditions of this License, they do not
	+excuse you from the conditions of this License. If you cannot
	+distribute so as to satisfy simultaneously your obligations under this
	+License and any other pertinent obligations, then as a consequence you
	+may not distribute the Program at all. For example, if a patent
	+license would not permit royalty-free redistribution of the Program by
	+all those who receive copies directly or indirectly through you, then
	+the only way you could satisfy both it and this License would be to
	+refrain entirely from distribution of the Program.
	+
	+If any portion of this section is held invalid or unenforceable under
	+any particular circumstance, the balance of the section is intended to
	+apply and the section as a whole is intended to apply in other
	+circumstances.
	+
	+It is not the purpose of this section to induce you to infringe any
	+patents or other property right claims or to contest validity of any
	+such claims; this section has the sole purpose of protecting the
	+integrity of the free software distribution system, which is
	+implemented by public license practices. Many people have made
	+generous contributions to the wide range of software distributed
	+through that system in reliance on consistent application of that
	+system; it is up to the author/donor to decide if he or she is willing
	+to distribute software through any other system and a licensee cannot
	+impose that choice.
	+
	+This section is intended to make thoroughly clear what is believed to
	+be a consequence of the rest of this License.
	+
	+ 8. If the distribution and/or use of the Program is restricted in
	+certain countries either by patents or by copyrighted interfaces, the
	+original copyright holder who places the Program under this License
	+may add an explicit geographical distribution limitation excluding
	+those countries, so that distribution is permitted only in or among
	+countries not thus excluded. In such case, this License incorporates
	+the limitation as if written in the body of this License.
	+
	+ 9. The Free Software Foundation may publish revised and/or new versions
	+of the General Public License from time to time. Such new versions will
	+be similar in spirit to the present version, but may differ in detail to
	+address new problems or concerns.
	+
	+Each version is given a distinguishing version number. If the Program
	+specifies a version number of this License which applies to it and "any
	+later version", you have the option of following the terms and conditions
	+either of that version or of any later version published by the Free
	+Software Foundation. If the Program does not specify a version number of
	+this License, you may choose any version ever published by the Free Software
	+Foundation.
	+
	+ 10. If you wish to incorporate parts of the Program into other free
	+programs whose distribution conditions are different, write to the author
	+to ask for permission. For software which is copyrighted by the Free
	+Software Foundation, write to the Free Software Foundation; we sometimes
	+make exceptions for this. Our decision will be guided by the two goals
	+of preserving the free status of all derivatives of our free software and
	+of promoting the sharing and reuse of software generally.
	+
	+ NO WARRANTY
	+
	+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
	+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
	+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
	+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
	+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
	+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
	+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
	+REPAIR OR CORRECTION.
	+
	+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
	+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
	+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
	+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
	+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
	+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
	+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
	+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
	+POSSIBILITY OF SUCH DAMAGES.
	+
	+ END OF TERMS AND CONDITIONS
	+
	+ How to Apply These Terms to Your New Programs
	+
	+ If you develop a new program, and you want it to be of the greatest
	+possible use to the public, the best way to achieve this is to make it
	+free software which everyone can redistribute and change under these terms.
	+
	+ To do so, attach the following notices to the program. It is safest
	+to attach them to the start of each source file to most effectively
	+convey the exclusion of warranty; and each file should have at least
	+the "copyright" line and a pointer to where the full notice is found.
	+
	+ <one line to give the program's name and a brief idea of what it does.>
	+ Copyright (C) <year> <name of author>
	+
	+ This program is free software; you can redistribute it and/or modify
	+ it under the terms of the GNU General Public License as published by
	+ the Free Software Foundation; either version 2 of the License, or
	+ (at your option) any later version.
	+
	+ This program is distributed in the hope that it will be useful,
	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	+ GNU General Public License for more details.
	+
	+ You should have received a copy of the GNU General Public License
	+ along with this program; if not, write to the Free Software
	+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	+
	+
	+Also add information on how to contact you by electronic and paper mail.
	+
	+If the program is interactive, make it output a short notice like this
	+when it starts in an interactive mode:
	+
	+ Gnomovision version 69, Copyright (C) year name of author
	+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
	+ This is free software, and you are welcome to redistribute it
	+ under certain conditions; type `show c' for details.
	+
	+The hypothetical commands `show w' and `show c' should show the appropriate
	+parts of the General Public License. Of course, the commands you use may
	+be called something other than `show w' and `show c'; they could even be
	+mouse-clicks or menu items--whatever suits your program.
	+
	+You should also get your employer (if you work as a programmer) or your
	+school, if any, to sign a "copyright disclaimer" for the program, if
	+necessary. Here is a sample; alter the names:
	+
	+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
	+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
	+
	+ <signature of Ty Coon>, 1 April 1989
	+ Ty Coon, President of Vice
	+
	+This General Public License does not permit incorporating your program into
	+proprietary programs. If your program is a subroutine library, you may
	+consider it more useful to permit linking proprietary applications with the
	+library. If this is what you want to do, use the GNU Library General
	+Public License instead of this License.
	diff --git a/Makefile.in b/Makefile.in
	@@ -0,0 +1,81 @@
	+# Makefile for bmf
	+
	+BINDIR=/usr/bin
	+MANDIR=/usr/share/man
	+
	+VERSION=0.9.4
	+
	+CC=@CC@
	+CFLAGS=@CFLAGS@
	+LDFLAGS=@LDFLAGS@
	+SYSLIBS=@SYSLIBS@
	+
	+# For creating rpm packages
	+RPMROOT=/usr/src/rpm
	+RPM = rpm
	+RPMFLAGS = -ba
	+ARCH=`arch\|sed 's/i[4-9]86/i386/'`
	+
	+all: bmf bmfconv
	+
	+bmf: bmf.o filt.o dbmysql.o dbdb.o dbtext.o dbh.o lex.o vec.o str.o dbg.o
	+ $(CC) -o $@ bmf.o filt.o dbmysql.o dbdb.o dbtext.o dbh.o lex.o vec.o s…
	+
	+bmf.o: bmf.c
	+ $(CC) $(CFLAGS) -DPACKAGE=\"bmf\" -DVERSION=\"$(VERSION)\" -c $<
	+
	+bmfconv: bmfconv.o dbmysql.o dbdb.o dbtext.o dbh.o vec.o str.o dbg.o
	+ $(CC) -o $@ bmfconv.o dbmysql.o dbdb.o dbtext.o dbh.o vec.o str.o dbg.…
	+
	+bmfconv.o: bmfconv.c
	+ $(CC) $(CFLAGS) -DPACKAGE=\"bmfconv\" -DVERSION=\"$(VERSION)\" -c $<
	+
	+install: checkroot bmf bmf.1 bmfconv bmfconv.1
	+ [ -d $(DESTDIR)$(BINDIR) ] \|\| mkdir -p $(DESTDIR)$(BINDIR)
	+ [ -d $(DESTDIR)$(MANDIR)/man1 ] \|\| mkdir -p $(DESTDIR)$(MANDIR)/man1
	+ cp bmf $(DESTDIR)$(BINDIR)
	+ cp bmf.1 $(DESTDIR)$(MANDIR)/man1
	+ cp bmfconv $(DESTDIR)$(BINDIR)
	+ cp bmfconv.1 $(DESTDIR)$(MANDIR)/man1
	+
	+uninstall: checkroot
	+ rm -f $(DESTDIR)$(BINDIR)/bmf
	+ rm -f $(DESTDIR)$(MANDIR)/man1/bmf.1
	+ rm -f $(DESTDIR)$(BINDIR)/bmfconv
	+ rm -f $(DESTDIR)$(MANDIR)/man1/bmfconv.1
	+
	+clean:
	+ rm -f core *.o bmf bmfconv
	+
	+distclean: clean
	+ rm -f Makefile
	+
	+dist: tarball rpmpkg debpkg
	+
	+tarball: distclean
	+ (cd ..; \
	+ cp -ar bmf bmf-$(VERSION); \
	+ tar czvf bmf-$(VERSION).tar.gz `find bmf-$(VERSION) -type f \| egrep -…
	+ rm -rf bmf-$(VERSION); \
	+ cd $(PWD))
	+
	+debpkg: checkroot
	+ debian/rules binary
	+ rm -rf debian/tmp
	+
	+rpmpkg: checkroot
	+ (mkdir -p $(RPMROOT); \
	+ mkdir -p $(RPMROOT)/SOURCES; \
	+ mkdir -p $(RPMROOT)/SPECS; \
	+ mkdir -p $(RPMROOT)/RPMS; \
	+ mkdir -p $(RPMROOT)/SRPMS; \
	+ cp ../bmf-$(VERSION).tar.gz $(RPMROOT)/SOURCES; \
	+ cat bmf.spec.in \| sed 's/VERSION/$(VERSION)/' > $(RPMROOT)/SPECS/bmf.…
	+ cd $(RPMROOT)/SPECS; \
	+ $(RPM) $(RPMFLAGS) bmf.spec; \
	+ cp $(RPMROOT)/RPMS/$(ARCH)/bmf-$(VERSION)-*.rpm $(PWD)/..; \
	+ cp $(RPMROOT)/SRPMS/bmf-$(VERSION)-*.src.rpm $(PWD)/..; \
	+ cd $(PWD))
	+
	+checkroot:
	+ [ "`whoami`" = root ] \|\| (echo Need root; exit 1)
	diff --git a/README b/README
	@@ -0,0 +1,130 @@
	+ bmf -- Bayesian Mail Filter
	+
	+About bmf
	+=========
	+
	+This is a mail filter which uses the Bayes algorithm as explained in Paul
	+Graham's article "A Plan for Spam". It aims to be faster, smaller, and more
	+versatile than similar applicatios. Implementation is ANSI C and uses POSIX
	+functions. Supported platforms are (in theory) all POSIX systems. Support
	+for win32 is undecided.
	+
	+This project provides features which are not available in other filters:
	+
	+(1) Independence from external programs and libraries. Tokens are stored in
	+memory using simple vectors which require no heavyweight external data
	+structure libraries. Multiple token database formats are supported,
	+including flat files, libdb, and mysql. Conversion between formats will
	+always be possible with the included import/export utility and flat files
	+will always remain an option.
	+
	+(2) Efficient processing. Input data is parsed by a handcrafted parser
	+which weighs in under 3% of the equivalent code generated by flex. No
	+portion of the input is ever copied and all i/o and memory allocation are
	+done in large chunks. Updated token lists are merged and written in one
	+step. Hashing is being considered for the next version to improve lookup
	+speed.
	+
	+(3) Simple and elegant implementation. No heavyweight, copy-intensive mime
	+decoding routines are used. Decoding of quoted-printable text for selected
	+mime types is being considered for the next version.
	+
	+Note: the core filter function is from esr's bogofilter v0.6 (available at
	+http://sourceforge.net/projects/bogofilter/) with bugfix updates.
	+
	+For the most recent version of this software, see:
	+
	+ http://sourceforge.net/projects/bmf/
	+
	+How to integrate bmf
	+====================
	+
	+The following procmail recipes will invoke bmf for each incoming email and
	+place spam into $MAILDIR/spam. The first sample invokes bmf in its normal
	+mode of operation and the second invokes bmf as a filter.
	+
	+ ### begin sample one ###
	+ # Invoke bmf and use return code to filter spam in one step
	+ :0HB
	+ * ? bmf
	+ \| formail -A"X-Spam-Status: Yes, tests=bmf" >>$MAILDIR/spam
	+
	+ ### begin sample two ###
	+ # Invoke bmf as a filter
	+ :0 fw
	+ \| bmf -p
	+
	+ # Filter spam
	+ :0:
	+ ^X-Spam-Status: Yes
	+ $MAILDIR/spam
	+
	+The following maildrop equivalents are suggested by Christian Kurz.
	+
	+ ### begin sample one ###
	+ # Invoke bmf and use return code to filter spam in one step
	+ exception {
	+ `bmf`
	+ if ( $RETURNCODE == 0 )
	+ to $MAILDIR/spam
	+ }
	+
	+ ### begin sample two ###
	+ # Invoke bmf as a filter
	+ exception {
	+ xfilter "bmf -p"
	+ if (/^X-Stam-Status: Yes/)
	+ to $MAILDIR/spam
	+ }
	+
	+
	+If you put bmf in your procmail or maildrop scripts as suggested above, it
	+will always register an email as either spam or non-spam. To reverse this
	+registration and train bmf, the following mutt macros may be useful:
	+
	+ macro index \ed "<enter-command>unset wait_key\n<pipe-entry>bmf -S\n<enter-c…
	+ macro index \et "<enter-command>unset wait_key\n<pipe-entry>bmf -t\n<enter-c…
	+ macro index \eu "<enter-command>unset wait_key\n<pipe-entry>bmf -N\n<enter-c…
	+
	+These will override these commands:
	+
	+ <Esc>d = de-register as non-spam, register as spam, and move to spam folder.
	+ <Esc>t = test for spamicity.
	+ <Esc>u = de-register as spam, register as non-spam, and move to inbox folder.
	+
	+How to train bmf
	+================
	+
	+First, please keep in mind that bmf "learns" how to recognize spam from the
	+input that you give it. It works best if you give it exactly the email that
	+you receive, or have received in the recent past.
	+
	+Here are some good techniques for training bmf:
	+
	+ - If you keep a history of email that you have received, use your current
	+ and/or saved emails. It is fairly easy to create a small shell script
	+ that will pass all of your normal email to "bmf -n" and all of your spam
	+ to "bmf -s". Note that if you do not use the mbox storage format, you
	+ MUST invoke bmf exactly once per email. Using "cat * \| bmf -n" will NOT
	+ work properly because bmf sees the entire input as one big email.
	+
	+ - If you already use spamassassin, you can use it to train bmf for a
	+ couple of days or weeks. If spamassassin tags it as spam, run it
	+ through "bmf -s". If not, run it through "bmf -n". This can be
	+ automated with procmail or maildrop recipes.
	+
	+Here are some things that you should NOT do:
	+
	+ - Get impatient with the training process and repeatedly pass one email
	+ through "bmf -s".
	+
	+ - Manually move words around between lists and/or adjust the word counts.
	+
	+Final words
	+===========
	+
	+Thanks for trying bmf. If you have any problems, comments, or suggestions,
	+please direct them to the bmf mailing list, [email protected].
	+
	+ Tom Marshall
	+ 20 Oct 2002
	diff --git a/TODO b/TODO
	@@ -0,0 +1,4 @@
	+* Make extrema size configurable, and default to ~5% of tokens.
	+* Teach lexer about multiline MIME headers and case (in)sensitivity.
	+* Teach lexer about MIME quoted-printable and base64 encodings.
	+* Make a pop3 proxy (?)
	diff --git a/bmf.1 b/bmf.1
	@@ -0,0 +1,148 @@
	+.\"Generated by db2man.xsl. Don't modify this, modify the source.
	+.de Sh \" Subsection
	+.br
	+.if t .Sp
	+.ne 5
	+.PP
	+\fB\\$1\fR
	+.PP
	+..
	+.de Sp \" Vertical space (when we can't use .PP)
	+.if t .sp .5v
	+.if n .sp
	+..
	+.de Ip \" List item
	+.br
	+.ie \\n(.$>=3 .ne \\$3
	+.el .ne 3
	+.IP "\\$1" \\$2
	+..
	+.TH "BMF" 1 "" "" ""
	+.SH NAME
	+bmf \- efficient Bayesian mail filter
	+.SH "SYNOPSIS"
	+
	+.nf
	+\fBbmf\fR [-t] [-n] [-s] [-N] [-S] [-f fmt] [-d db] [-i file] [-k n] [-m type]…
	+ [-v] [-V] [-h]
	+.fi
	+
	+.SH "DESCRIPTION"
	+
	+.PP
	+bmf is a Bayesian mail filter. In its normal mode of operation, it takes an em…
	+
	+.PP
	+bmf supports both mbox and maildir mail storage formats. It will automatically…
	+
	+.SH "OPTIONS"
	+
	+.PP
	+Without command-line options, bmf processes the input, registers it as either …
	+
	+.PP
	+\fB-t\fR Test to see if the input is spam. The word lists are not updated. A r…
	+
	+.PP
	+\fB-n\fR Register the input as non-spam.
	+
	+.PP
	+\fB-s\fR Register the input as spam.
	+
	+.PP
	+\fB-N\fR Register the input as non-spam and undo a prior registration as spam.
	+
	+.PP
	+\fB-S\fR Register the input as spam and undo a prior registration as non-spam.
	+
	+.PP
	+\fB-f fmt\fR Specify database format. Valid formats are text, db, and mysql. T…
	+
	+.PP
	+\fB-d db\fR Specify database or directory for loading and saving word lists. T…
	+
	+.PP
	+\fB-i file\fR Use file for input instead of stdin.
	+
	+.PP
	+\fB-k n\fR Specify the number of extrema (keepers) to use in the Bayes calcula…
	+
	+.PP
	+\fB-m fmt\fR Specify mail storage format. Valid formats are mbox and maildir. …
	+
	+.PP
	+\fB-p\fR Copy the input to the output (passthrough) and insert spam headers in…
	+
	+.PP
	+\fB-v\fR Be more verbose. This option is not well supported yet.
	+
	+.PP
	+\fB-V\fR Display version information.
	+
	+.PP
	+\fB-h\fR Display usage information.
	+
	+.SH "THEORY OF OPERATION"
	+
	+.PP
	+bmf treats its input as a bag of tokens. Each token is checked against "good" …
	+
	+.PP
	+While this method sounds crude compared to the more usual pattern-matching app…
	+
	+.PP
	+bmf improves on Paul's proposal by doing smarter lexical analysis. In particul…
	+
	+.PP
	+MIME and other attachments are not decoded. Experience from watching the token…
	+
	+.SH "INTEGRATION WITH OTHER TOOLS"
	+
	+.PP
	+Please see the README for samples and suggestions.
	+
	+.SH "RETURN VALUES"
	+
	+.PP
	+In passthrough mode: zero for success, nonzero for failure.
	+
	+.PP
	+In non-passthrough mode: 0 for spam; 1 for non-spam; 2 for I/O or other errors.
	+
	+.SH "FILES"
	+
	+.TP
	+\fI~/.bmf/goodlist.txt\fR
	+List of good tokens for text mode.
	+
	+.TP
	+\fI~/.bmf/spamlist.txt\fR
	+List of bad tokens for text mode.
	+
	+.TP
	+\fI~/.bmf/goodlist.db\fR
	+List of good tokens for libdb mode.
	+
	+.TP
	+\fI~/.bmf/spamlist.db\fR
	+List of bad tokens for libdb mode.
	+
	+.SH "BUGS"
	+
	+.PP
	+The lexer should recognize multiline headers.
	+
	+.PP
	+The lexer should recognize MIME attachments.
	+
	+.PP
	+Content-Transfer-Encoding is not decoded.
	+
	+.SH "AUTHOR"
	+
	+.PP
	+Tom Marshall <[email protected]>.
	+
	+.PP
	+The Bayes algorithm is from bogofilter by Eric S. Raymond <[email protected]>. b…
	+
	diff --git a/bmf.c b/bmf.c
	@@ -0,0 +1,339 @@
	+/* $Id: bmf.c,v 1.20 2002/10/20 18:19:17 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * bmf.c: top level Bayesian mail filter app.
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "lex.h"
	+#include "vec.h"
	+#include "dbh.h"
	+#include "filt.h"
	+
	+/* modes of operation (mutually exclusive) */
	+typedef enum
	+{
	+ mode_test, /* test and produce report */
	+ mode_normal, /* test and register result */
	+ mode_reg_s, /* register as spam */
	+ mode_reg_n, /* register as non-spam */
	+ mode_n_to_s, /* undo non-spam registration and register as spam */
	+ mode_s_to_n /* undo spam registration and register as non-spam */
	+} runmode_t;
	+
	+static void usage( void )
	+{
	+ printf( "\n"
	+ "Usage: " PACKAGE " [mode] [options]\n"
	+ "\n"
	+ "Modes of operation (mutually exclusive; the last one specified is…
	+ "\t\tRegister message using historical data if no mode is specifie…
	+ "\t-n\tRegister message as non-spam.\n"
	+ "\t-s\tRegister message as spam.\n"
	+ "\t-N\tRegister message as non-spam and undo prior registration as…
	+ "\t-S\tRegister message as spam and undo prior registration as non…
	+ "\t-t\tTest mode, print report and do not save results.\n"
	+ "\n"
	+ "Other options:\n"
	+ "\t-f fmt\tSpecify database format (text\|db\|mysql).\n"
	+ "\t-d db\tSpecify database or directory name.\n"
	+ "\t-i file\tSpecify file to read instead of stdin.\n"
	+ "\t-k n\tSpecify count of extrema to use (keepers), default is 15.…
	+ "\t-m type\t[DEPRECATED] Specify mail storage format (mbox\|maildir…
	+ "\t-p\tPassthrough mode, like SpamAssassin.\n"
	+ "\t-v\tIncrease verbosity level.\n"
	+ "\t-V\tShow version information and exit.\n"
	+ "\t-h\tShow this message and exit.\n"
	+ "\n" );
	+ exit( 2 );
	+}
	+
	+static void version( void )
	+{
	+ printf( "\n"
	+ PACKAGE " version " VERSION " - a Bayesian mail filter\n"
	+ "Copyright (c) 2002 Tom Marshall\n"
	+ "\n"
	+ PACKAGE " comes with ABSOLUTELY NO WARRANTY.\n"
	+ "This is free software. You are welcome to redistribute it under …
	+ "of the GNU General Public License. See the file LICENSE in the s…
	+ "distribution, or visit http://www.gnu.org/licenses/gpl.html\n"
	+ "\n" );
	+ exit( 2 );
	+}
	+
	+int main( int argc, char** argv )
	+{
	+ int ch;
	+ dbfmt_t dbfmt = db_db;
	+ char* dbname = NULL;
	+ bool_t rdonly;
	+
	+ runmode_t mode = mode_normal;
	+ mbox_t mboxtype = detect;
	+ bool_t do_passthru = false;
	+
	+ dbh_t* pdb;
	+ dbt_t* pblist;
	+ dbt_t* pglist;
	+ dbt_t* ptable;
	+ vec_t mlist;
	+ stats_t stats;
	+ lex_t lex;
	+ tok_t tok;
	+ bool_t is_spam;
	+
	+ int fd = STDIN_FILENO;
	+ char* infile = NULL;
	+
	+ srand(time(NULL));
	+ atexit( dump_alloc_heap );
	+
	+#ifdef HAVE_LIBDB
	+ dbfmt = db_db;
	+#else
	+ dbfmt = db_text;
	+#endif
	+
	+ stats.keepers = DEF_KEEPERS;
	+ while( (ch = getopt( argc, argv, "NSVd:f:i:hk:m:npstv" )) != EOF )
	+ {
	+ switch( ch )
	+ {
	+ case 'N':
	+ mode = mode_s_to_n;
	+ break;
	+ case 'S':
	+ mode = mode_n_to_s;
	+ break;
	+ case 'V':
	+ version();
	+ break; /* notreached */
	+ case 'd':
	+ free( dbname );
	+ dbname = strdup( optarg );
	+ break;
	+ case 'f':
	+ if( strcasecmp( optarg, "text" ) == 0 )
	+ {
	+ dbfmt = db_text;
	+ }
	+ else if( strcasecmp( optarg, "db" ) == 0 )
	+ {
	+ dbfmt = db_db;
	+ }
	+ else if( strcasecmp( optarg, "mysql" ) == 0 )
	+ {
	+ dbfmt = db_mysql;
	+ }
	+ else
	+ {
	+ usage();
	+ }
	+ break;
	+ case 'h':
	+ usage();
	+ break; /* notreached */
	+ case 'i':
	+ free( infile );
	+ infile = strdup( optarg );
	+ break;
	+ case 'k':
	+ stats.keepers = atoi( optarg );
	+ break;
	+ case 'm':
	+ if( strcasecmp( optarg, "mbox" ) == 0 )
	+ {
	+ mboxtype = mbox;
	+ }
	+ else if( strcasecmp( optarg, "maildir" ) == 0 )
	+ {
	+ mboxtype = maildir;
	+ }
	+ else
	+ {
	+ usage();
	+ }
	+ break;
	+ case 'n':
	+ mode = mode_reg_n;
	+ break;
	+ case 'p':
	+ do_passthru = true;
	+ break;
	+ case 's':
	+ mode = mode_reg_s;
	+ break;
	+ case 't':
	+ mode = mode_test;
	+ break;
	+ case 'v':
	+ g_verbose++;
	+ verbose( 1, "Verbose level now %u\n", g_verbose );
	+ break;
	+ default:
	+ usage();
	+ }
	+ }
	+ stats.extrema = (discrim_t)malloc( stats.keeperssizeof(discrim_t) );
	+
	+ if( infile != NULL )
	+ {
	+ fd = open( infile, O_RDONLY );
	+ if( fd == -1 )
	+ {
	+ fprintf( stderr, "%s: cannot open input file '%s': %s\n",
	+ argv[0], infile, strerror(errno) );
	+ exit( 2 );
	+ }
	+ }
	+
	+ pdb = dbh_open( dbfmt, "localhost", dbname, DB_USER, DB_PASS );
	+ if( pdb == NULL )
	+ {
	+ fprintf( stderr, "%s: cannot open database\n", argv[0] );
	+ exit( 2 );
	+ }
	+
	+ lex_create( &lex, mboxtype );
	+ if( !lex_load( &lex, fd ) )
	+ {
	+ fprintf( stderr, "%s: cannot read input\n", argv[0] );
	+ exit( 2 );
	+ }
	+ lex_nexttoken( &lex, &tok );
	+ if( tok.tt == eof )
	+ {
	+ fprintf( stderr, "%s: no input available\n", argv[0] );
	+ exit( 2 );
	+ }
	+
	+ while( tok.tt != eof )
	+ {
	+ if( mboxtype == mbox && tok.tt != from )
	+ {
	+ fprintf( stderr, "%s: input does not look like an mbox message\n",…
	+ exit( 2 );
	+ }
	+
	+ rdonly = (mode == mode_test \|\| mode == mode_reg_n);
	+ pblist = pdb->opentable( pdb, "spamlist", rdonly );
	+ if( pblist == NULL )
	+ {
	+ fprintf( stderr, "%s: cannot open spamlist\n", argv[0] );
	+ exit( 2 );
	+ }
	+
	+ rdonly = (mode == mode_test \|\| mode == mode_reg_s);
	+ pglist = pdb->opentable( pdb, "goodlist", rdonly );
	+ if( pglist == NULL )
	+ {
	+ fprintf( stderr, "%s: cannot open goodlist\n", argv[0] );
	+ exit( 2 );
	+ }
	+
	+ vec_create( &mlist );
	+ bvec_loadmsg( &mlist, &lex, &tok );
	+
	+ switch( mode )
	+ {
	+ case mode_test:
	+ bayesfilt( pglist, pblist, &mlist, &stats );
	+ is_spam = (stats.spamicity > SPAM_CUTOFF);
	+ break;
	+ case mode_normal:
	+ bayesfilt( pglist, pblist, &mlist, &stats );
	+ is_spam = (stats.spamicity > SPAM_CUTOFF);
	+ ptable = (is_spam ? pblist : pglist);
	+ svec_sort( &mlist );
	+ if( !ptable->mergeclose( ptable, &mlist ) )
	+ {
	+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] );
	+ exit( 2 );
	+ }
	+ break;
	+ case mode_reg_s:
	+ stats.spamicity = 1.0;
	+ is_spam = true;
	+ svec_sort( &mlist );
	+ if( !pblist->mergeclose( pblist, &mlist ) )
	+ {
	+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] );
	+ exit( 2 );
	+ }
	+ break;
	+ case mode_reg_n:
	+ stats.spamicity = 0.0;
	+ is_spam = false;
	+ svec_sort( &mlist );
	+ if( !pglist->mergeclose( pglist, &mlist ) )
	+ {
	+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] );
	+ exit( 2 );
	+ }
	+ break;
	+ case mode_n_to_s:
	+ stats.spamicity = 1.0;
	+ is_spam = true;
	+ svec_sort( &mlist );
	+ if( !pblist->mergeclose( pblist, &mlist ) \|\|
	+ !pglist->unmergeclose( pglist, &mlist ) )
	+ {
	+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] );
	+ exit( 2 );
	+ }
	+ break;
	+ case mode_s_to_n:
	+ stats.spamicity = 0.0;
	+ is_spam = false;
	+ svec_sort( &mlist );
	+ if( !pblist->unmergeclose( pblist, &mlist ) \|\|
	+ !pglist->mergeclose( pglist, &mlist ) )
	+ {
	+ fprintf( stderr, "%s: cannot merge/save list\n", argv[0] );
	+ exit( 2 );
	+ }
	+ break;
	+ default:
	+ usage();
	+ }
	+
	+ if( mode == mode_test )
	+ {
	+ statdump( &stats, STDOUT_FILENO );
	+ }
	+
	+ if( do_passthru )
	+ {
	+ lex_passthru( &lex, is_spam, stats.spamicity );
	+ }
	+
	+ vec_destroy( &mlist );
	+
	+ pglist->close( pglist );
	+ free( pglist );
	+ pblist->close( pblist );
	+ free( pblist );
	+ }
	+
	+ lex_destroy( &lex );
	+
	+ pdb->close( pdb );
	+ free( pdb );
	+
	+ if( infile != NULL )
	+ {
	+ free( infile );
	+ close( fd );
	+ }
	+ free( stats.extrema );
	+
	+ return ( (do_passthru \|\| is_spam) ? 0 : 1 );
	+}
	diff --git a/bmf.spec.in b/bmf.spec.in
	@@ -0,0 +1,64 @@
	+Name: bmf
	+Version: VERSION
	+Release: 1
	+URL: http://www.sourceforge.net/projects/bmf
	+Source0: %{name}-%{version}.tar.gz
	+License: GPL
	+Group: Applications/Internet
	+Summary: fast anti-spam filtering by Bayesian statistical analysis
	+Buildroot: %{_tmppath}/%{name}-%{version}-root
	+
	+%description
	+bmf is a Bayesian mail filter. It takes an email message or other text on
	+stdin, does a statistical check against lists of "good" and "spam" words,
	+and returns a status code indicating whether or not the message is spam.
	+bmf is efficient, small, and self-contained.
	+
	+%prep
	+
	+%setup
	+
	+%build
	+./configure --with-libdb --without-mysql
	+make
	+
	+%install
	+[ -n "$RPM_BUILD_ROOT" -a "$RPM_BUILD_ROOT" != / ] && rm -rf $RPM_BUILD_ROOT
	+make DESTDIR=${RPM_BUILD_ROOT} install
	+gzip $RPM_BUILD_ROOT/%{_mandir}//.?
	+
	+
	+%files
	+%{_bindir}/bmf
	+%{_mandir}/man1/bmf.1.gz
	+%{_bindir}/bmfconv
	+%{_mandir}/man1/bmfconv.1.gz
	+%doc README LICENSE
	+
	+%changelog
	+* Mon Oct 14 2002 Tom Marshall <[email protected]>
	+- Update to version 0.9.3.
	+
	+* Sat Oct 12 2002 Tom Marshall <[email protected]>
	+- Update to version 0.9.2.
	+
	+* Sat Oct 12 2002 Tom Marshall <[email protected]>
	+- Update to version 0.9.1.
	+
	+* Wed Oct 09 2002 Tom Marshall <[email protected]>
	+- Update to version 0.84.
	+
	+* Mon Oct 07 2002 Tom Marshall <[email protected]>
	+- Update to version 0.83.
	+
	+* Sat Oct 05 2002 Tom Marshall <[email protected]>
	+- Update to version 0.82.
	+
	+* Thu Oct 03 2002 Tom Marshall <[email protected]>
	+- Update to version 0.81.
	+- Add bmfconv.
	+- Use new configure script.
	+
	+* Fri Sep 27 2002 Tom Marshall <[email protected]>
	+- Initial build.
	+
	diff --git a/bmfconv.1 b/bmfconv.1
	@@ -0,0 +1,81 @@
	+.\"Generated by db2man.xsl. Don't modify this, modify the source.
	+.de Sh \" Subsection
	+.br
	+.if t .Sp
	+.ne 5
	+.PP
	+\fB\\$1\fR
	+.PP
	+..
	+.de Sp \" Vertical space (when we can't use .PP)
	+.if t .sp .5v
	+.if n .sp
	+..
	+.de Ip \" List item
	+.br
	+.ie \\n(.$>=3 .ne \\$3
	+.el .ne 3
	+.IP "\\$1" \\$2
	+..
	+.TH "BMFCONV" 1 "" "" ""
	+.SH NAME
	+bmfconv \- Database converter for bmf
	+.SH "SYNOPSIS"
	+
	+.nf
	+\fBbmfconv\fR [-f fmt] [-d db] [-e] [-i] [-v] [-h]
	+.fi
	+
	+.SH "DESCRIPTION"
	+
	+.PP
	+bmfconv converts bmf token databases between the supported formats. It can imp…
	+
	+.PP
	+PLEASE NOTE that the text files used in import and export operations are read …
	+
	+.SH "OPTIONS"
	+
	+.PP
	+\fB-f fmt\fR Specify database format. Supported formats are "db" for libdb and…
	+
	+.PP
	+\fB-d db\fR Specify database name.
	+
	+.PP
	+\fB-e\fR Export the database to text files.
	+
	+.PP
	+\fB-i\fR Import the database from text files.
	+
	+.PP
	+\fB-v\fR Display version information.
	+
	+.PP
	+\fB-h\fR Display usage information.
	+
	+.SH "RETURN VALUES"
	+
	+.PP
	+0 if conversion succeeds, nonzero if conversion fails.
	+
	+.SH "FILES"
	+
	+.TP
	+\fIgoodlist.txt\fR
	+Text file for import or export of good tokens.
	+
	+.TP
	+\fIspamlist.txt\fR
	+Text file for import or export of spam tokens.
	+
	+.SH "BUGS"
	+
	+.PP
	+Should be more robust.
	+
	+.SH "AUTHOR"
	+
	+.PP
	+Tom Marshall <[email protected]>. bmfconv is a part of the bmf package.
	+
	diff --git a/bmfconv.c b/bmfconv.c
	@@ -0,0 +1,169 @@
	+/* $Id: bmfconv.c,v 1.9 2002/10/20 18:19:17 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * bmfconv.c: bmf database converter
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "vec.h"
	+#include "dbh.h"
	+
	+typedef enum
	+{
	+ none,
	+ db2text,
	+ text2db
	+} dir_t;
	+
	+static void usage( void )
	+{
	+ printf( "\n"
	+ "Usage: " PACKAGE " [options]\n"
	+ "\t-f fmt\tSpecify database format (db\|mysql).\n"
	+ "\t-d db\tSpecify database or directory name.\n"
	+ "\t-e\tExport to text files goodlist.txt and spamlist.txt.\n"
	+ "\t-i\tImport from text files goodlist.txt and spamlist.txt.\n"
	+ "\t-v\tShow version information and exit\n"
	+ "\t-h\tShow this message and exit\n"
	+ "\n" );
	+ exit( 2 );
	+}
	+
	+static void version( void )
	+{
	+ printf( "\n"
	+ PACKAGE " version " VERSION " - a Bayesian mail filter\n"
	+ "Copyright (c) 2002 Tom Marshall\n"
	+ "\n"
	+ PACKAGE " comes with ABSOLUTELY NO WARRANTY.\n"
	+ "This is free software. You are welcome to redistribute it under …
	+ "of the GNU General Public License. See the file LICENSE in the s…
	+ "distribution, or visit http://www.gnu.org/licenses/gpl.html\n"
	+ "\n" );
	+ exit( 2 );
	+}
	+
	+int main( int argc, char** argv )
	+{
	+ int ch;
	+ dbfmt_t dbfmt = db_db;
	+ char* dbname = NULL;
	+ bool_t rdonly;
	+
	+ dbh_t* pdb;
	+ dbt_t* ptable;
	+ dir_t dir = none;
	+
	+ while( (ch = getopt( argc, argv, "d:ef:ihv" )) != EOF )
	+ {
	+ switch( ch )
	+ {
	+ case 'd':
	+ free( dbname );
	+ dbname = strdup( optarg );
	+ break;
	+ case 'e':
	+ dir = db2text;
	+ break;
	+ case 'f':
	+ if( strcasecmp( optarg, "db" ) == 0 )
	+ {
	+ dbfmt = db_db;
	+ }
	+ else if( strcasecmp( optarg, "mysql" ) == 0 )
	+ {
	+ dbfmt = db_mysql;
	+ }
	+ else
	+ {
	+ usage();
	+ }
	+ break;
	+ case 'h':
	+ usage();
	+ break; /* notreached */
	+ case 'i':
	+ dir = text2db;
	+ break;
	+ case 'v':
	+ version();
	+ break; /* notreached */
	+ default:
	+ usage();
	+ }
	+ }
	+ if( dir == none )
	+ {
	+ usage();
	+ }
	+
	+ pdb = dbh_open( dbfmt, "localhost", dbname, DB_USER, DB_PASS );
	+ if( pdb == NULL )
	+ {
	+ fprintf( stderr, "cannot open database\n" );
	+ exit( 1 );
	+ }
	+ rdonly = (dir == db2text ? true : false);
	+
	+ ptable = pdb->opentable( pdb, "spamlist", rdonly );
	+ if( ptable == NULL )
	+ {
	+ fprintf( stderr, "cannot open spamlist\n" );
	+ exit( 1 );
	+ }
	+ if( dir == db2text )
	+ {
	+ if( !ptable->export( ptable, "spamlist.txt" ) )
	+ {
	+ fprintf( stderr, "cannot export spamlist\n" );
	+ exit( 1 );
	+ }
	+ }
	+ else
	+ {
	+ if( !ptable->import( ptable, "spamlist.txt" ) )
	+ {
	+ fprintf( stderr, "cannot import spamlist\n" );
	+ exit( 1 );
	+ }
	+ }
	+ ptable->close( ptable );
	+ free( ptable );
	+
	+ ptable = pdb->opentable( pdb, "goodlist", rdonly );
	+ if( ptable == NULL )
	+ {
	+ fprintf( stderr, "cannot open goodlist\n" );
	+ exit( 1 );
	+ }
	+ if( dir == db2text )
	+ {
	+ if( !ptable->export( ptable, "goodlist.txt" ) )
	+ {
	+ fprintf( stderr, "cannot export goodlist\n" );
	+ exit( 1 );
	+ }
	+ }
	+ else
	+ {
	+ if( !ptable->import( ptable, "goodlist.txt" ) )
	+ {
	+ fprintf( stderr, "cannot import goodlist\n" );
	+ exit( 1 );
	+ }
	+ }
	+ ptable->close( ptable );
	+ free( ptable );
	+
	+ pdb->close( pdb );
	+ free( pdb );
	+
	+ return 0;
	+}
	diff --git a/config.h b/config.h
	@@ -0,0 +1,80 @@
	+/* $Id: config.h,v 1.8 2002/10/20 07:16:57 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _CONFIG_H
	+#define _CONFIG_H
	+
	+/**************************************
	+ * Standard headers
	+ */
	+#include <stdlib.h>
	+#include <stdio.h>
	+#include <string.h>
	+#include <errno.h>
	+#include <math.h>
	+#include <ctype.h>
	+#include <assert.h>
	+
	+/**************************************
	+ * System headers
	+ */
	+#include <sys/types.h>
	+#include <limits.h>
	+#include <unistd.h>
	+#include <sys/stat.h>
	+#include <fcntl.h>
	+#include <sys/file.h>
	+#include <time.h>
	+
	+/**************************************
	+ * For convenience
	+ */
	+typedef unsigned char byte;
	+typedef const char* cpchar;
	+typedef const byte* cpbyte;
	+typedef const void* cpvoid;
	+typedef enum { false, true } bool_t;
	+
	+#define min(a,b) ( (a)<(b) ? (a) : (b) )
	+#define max(a,b) ( (a)<(b) ? (b) : (a) )
	+#define minmax(v,a,b) ( (v)<(a)?(a) : (v)>(b)?(b) : (v) )
	+
	+/* XXX: need to figure out MH and any others (MMDF?) */
	+typedef enum { detect, mbox, maildir } mbox_t;
	+
	+/**************************************
	+ * Tweakables
	+ */
	+
	+/* If you have the mysql client libs installed and wish to use them... */
	+/* #define HAVE_MYSQL */
	+
	+#define MSGCOUNT_KEY ".MSGCOUNT"
	+#define MSGCOUNT_KEY_LEN (sizeof(MSGCOUNT_KEY)-1)
	+
	+#define DB_USER "username"
	+#define DB_PASS "password"
	+
	+#define IOBUFSIZE 4096 /* chunk size for file buffers */
	+#define MAXWORDLEN 20 /* max word length, inclusive */
	+#define MAXFREQ 4 /* max times to count word per email */
	+#define GOOD_BIAS 2.0 /* give good words more weight */
	+#define DEF_KEEPERS 15 /* how many extrema to keep by default */
	+#define MINIMUM_FREQ 5 /* min word count for consideration in filter …
	+#define UNKNOWN_WORD 0.4 /* odds that unknown word is spammish */
	+#define SPAM_CUTOFF 0.9 /* if it's spammier than this... */
	+
	+/*
	+ * If NON_EQUIPROBABLE is defined, use ratio of spamcount/goodcount instead
	+ * of UNKNOWN_WORDS, and as a factor in the known word calculation. This is
	+ * merely copied from bogofilter. I didn't write it and I cannot explain the
	+ * relative merits of using it or not. Please don't ask. :-)
	+ */
	+
	+#endif /* ndef _CONFIG_H */
	diff --git a/configure b/configure
	@@ -0,0 +1,354 @@
	+#!/bin/sh
	+
	+echo ""
	+
	+# defaults
	+DEBUG=no
	+with_libdb=test
	+with_mysql=test
	+
	+# parse options
	+# --with/--without parsing copied from autoconf's output
	+while [ $# -gt 0 ]; do
	+ case $1 in
	+ --with-*)
	+ ac_option=$1
	+ ac_optarg=`expr "x$ac_option" : 'x[^=]=$.$'`
	+ ac_package=`expr "x$ac_option" : 'x-with-$[^=]$'`
	+ # Reject names that are not valid shell variable names.
	+ expr "x$ac_package" : ".*[^A-Za-z0-9_-]" >/dev/null &&
	+ { echo "error: invalid package name: $ac_package"; exit 1; }
	+ ac_package=`echo $ac_package \| sed 's/-/_/g'`
	+ case $ac_option in
	+ =)
	+ ac_optarg=`echo "$ac_optarg" \| sed "s/'/'\\\\\\\\''/g"`
	+ ;;
	+ *)
	+ ac_optarg=yes
	+ ;;
	+ esac
	+ eval "with_$ac_package='$ac_optarg'"
	+ ;;
	+ --without-*)
	+ ac_option=$1
	+ ac_optarg=`expr "x$ac_option" : 'x[^=]=$.$'`
	+ ac_package=`expr "x$ac_option" : 'x-without-$[^=]$'`
	+ # Reject names that are not valid shell variable names.
	+ expr "x$ac_package" : ".*[^A-Za-z0-9_-]" >/dev/null &&
	+ { echo "error: invalid package name: $ac_package"; exit 1; }
	+ ac_package=`echo $ac_package \| sed 's/-/_/g'`
	+ eval "with_$ac_package=no"
	+ ;;
	+ --debug=yes\|--debug\|-d)
	+ echo "Debug mode enabled."
	+ with_debug=yes
	+ ;;
	+ --debug=no)
	+ echo "Debug mode disabled."
	+ with_debug=no
	+ ;;
	+ --help\|-help\|-h)
	+ echo "usage: $0 [ options ]"
	+ echo "available options:"
	+ echo " --debug=yes (or -d) Enable debugging support."
	+ echo " --debug=no Disable debugging support."
	+ echo " --with-package Enable support for package in default lo…
	+ echo " --with-package=path Enable support for package installed in …
	+ echo " --without-package Disable support for package."
	+ echo " --help (or -h) Show this message."
	+ echo ""
	+ echo "relevant packages:"
	+ echo " libdb = BerkeleyDB"
	+ echo " mysql = MySQL database"
	+ exit 1
	+ ;;
	+ *)
	+ echo "Unknown option '$1', try -h for help"
	+ exit 1
	+ ;;
	+ esac
	+ shift
	+done
	+
	+echo "Examining system setup..."
	+
	+# Some known configs:
	+#
	+# uname -s uname -r uname -m uname -p
	+# ======== =========== ======== ========
	+# SunOS 5.6 sun4u sparc
	+# Linux 2.2.17 i686 unknown
	+# FreeBSD 4.1-RELEASE i386 i386
	+
	+UNAME_S=`uname -s`
	+UNAME_R=`uname -r`
	+
	+case "$UNAME_S" in
	+ Linux)
	+ if [ "$with_debug" = "yes" ]; then
	+ CCDBG="-g"
	+ LDDBG="-g"
	+ else
	+ CCDBG="-DNDEBUG"
	+ LDDBG=""
	+ fi
	+ CC=gcc
	+ CFLAGS="$CCDBG -D_UNIX -D_LINUX -Wall"
	+ CXX=g++
	+ CXXFLAGS=${CFLAGS}
	+ AR=ar
	+ ARFLAGS="-rc"
	+ LD=gcc
	+ LDFLAGS="$LDDBG"
	+ SYSLIBS=""
	+ LIBDB_LIB="-ldb"
	+ MYSQL_LIB="-lmysqlclient"
	+ ;;
	+ FreeBSD)
	+ case "$UNAME_R" in
	+ 2.*)
	+ OSVER=20
	+ ;;
	+ 3.*)
	+ OSVER=30
	+ ;;
	+ 4.*)
	+ OSVER=40
	+ ;;
	+ *)
	+ # Assume 5.0 +
	+ OSVER=50
	+ ;;
	+ esac
	+ if [ "$with_debug" = "yes" ]; then
	+ CCDBG="-g"
	+ LDDBG="-g"
	+ else
	+ CCDBG="-DNDEBUG"
	+ LDDBG=""
	+ fi
	+ CC=gcc
	+ CFLAGS="$CCDBG -D_UNIX -D_BSD=$OSVER -Wall"
	+ CXX=g++
	+ CXXFLAGS=${CFLAGS}
	+ AR=ar
	+ ARFLAGS="-rc"
	+ LD=gcc
	+ LDFLAGS="$LDDBG"
	+ SYSLIBS=""
	+ LIBDB_LIB=""
	+ MYSQL_LIB="-lmysqlclient"
	+ ;;
	+ OpenBSD)
	+ # I'm guessing OpenBSD looks mostly like FreeBSD 4.x
	+ OSVER=40
	+ if [ "$with_debug" = "yes" ]; then
	+ CCDBG="-g"
	+ LDDBG="-g"
	+ else
	+ CCDBG="-DNDEBUG"
	+ LDDBG=""
	+ fi
	+ CC=gcc
	+ CFLAGS="$CCDBG -D_UNIX -D_BSD=$OSVER -Wall"
	+ CXX=g++
	+ CXXFLAGS=${CFLAGS}
	+ AR=ar
	+ ARFLAGS="-rc"
	+ LD=gcc
	+ LDFLAGS="$LDDBG"
	+ SYSLIBS=""
	+ LIBDB_LIB=""
	+ MYSQL_LIB="-lmysqlclient"
	+ ;;
	+ Darwin)
	+ # I'm guessing Darwin looks mostly like FreeBSD 4.x
	+ OSVER=40
	+ if [ "$with_debug" = "yes" ]; then
	+ CCDBG="-g"
	+ LDDBG="-g"
	+ else
	+ CCDBG="-DNDEBUG"
	+ LDDBG=""
	+ fi
	+ CC=cc
	+ CFLAGS="$CCDBG -D_UNIX -D_BSD=$OSVER -DNOLOCK -Wall"
	+ CXX=c++
	+ CXXFLAGS=${CFLAGS}
	+ AR=ar
	+ ARFLAGS="-rc"
	+ LD=cc
	+ LDFLAGS="$LDDBG"
	+ SYSLIBS=""
	+ LIBDB_LIB=""
	+ MYSQL_LIB="-lmysqlclient"
	+ ;;
	+ SunOS)
	+ case "$UNAME_R" in
	+ 5.6)
	+ OSVER=56
	+ ;;
	+ 5.7)
	+ OSVER=57
	+ ;;
	+ 5.8)
	+ OSVER=58
	+ ;;
	+ *)
	+ # Assume 9.0+
	+ OSVER=59
	+ ;;
	+ esac
	+ # Prefer gcc to the native cc here because I haven't tested with the
	+ # native compiler yet.
	+ type gcc >/dev/null 2>&1
	+ if [ $? -eq 0 ]; then
	+ if [ "$with_debug" = "yes" ]; then
	+ CCDBG="-g"
	+ LDDBG="-g"
	+ else
	+ CCDBG="-DNDEBUG"
	+ LDDBG=""
	+ fi
	+ CC=gcc
	+ CFLAGS="$CCDBG -D_UNIX -D_SOLARIS=$OSVER -Wall"
	+ CXX=g++
	+ CXXFLAGS=${CFLAGS}
	+ LD=gcc
	+ LDFLAGS="$LDDBG"
	+ else
	+ if [ "$with_debug" = "yes" ]; then
	+ CCDBG="-g"
	+ LDDBG="-g"
	+ else
	+ CCDBG="-DNDEBUG"
	+ LDDBG=""
	+ fi
	+ CC=cc
	+ CFLAGS="-DNDEBUG -D_UNIX -D_SOLARIS=$OSVER"
	+ CXX=c++
	+ CXXFLAGS=${CFLAGS}
	+ LD=ld
	+ LDFLAGS=""
	+ fi
	+ AR=ar
	+ ARFLAGS="-rc"
	+ SYSLIBS=""
	+ LIBDB_LIB="-ldb"
	+ MYSQL_LIB="-lmysqlclient"
	+ ;;
	+ OSF1)
	+ if [ "$with_debug" = "yes" ]; then
	+ CCDBG="-g2"
	+ LDDBG="-g2"
	+ else
	+ CCDBG="-DNDEBUG"
	+ LDDBG=""
	+ fi
	+ CC=cc
	+ CFLAGS="$CCDBG -D_UNIX -D_OSF"
	+ CXX=cxx
	+ CXXFLAGS="-noexceptions ${CFLAGS}"
	+ AR=ar
	+ ARFLAGS="-rc"
	+ LD=ld
	+ LDFLAGS=""
	+ SYSLIBS="-lm"
	+ LIBDB_LIB="-ldb"
	+ MYSQL_LIB="-lmysqlclient"
	+ ;;
	+ *)
	+ if [ "$with_debug" = "yes" ]; then
	+ CCDBG="-g"
	+ LDDBG="-g"
	+ else
	+ CCDBG="-DNDEBUG"
	+ LDDBG=""
	+ fi
	+ CC=cc
	+ CFLAGS="$CCDBG -DNDEBUG -D_UNIX"
	+ CXX=c++
	+ CXXFLAGS=${CFLAGS}
	+ AR=ar
	+ ARFLAGS="-rc"
	+ LD=ld
	+ LDFLAGS="$LDDBG"
	+ SYSLIBS=""
	+ LIBDB_LIB="-ldb"
	+ MYSQL_LIB="-lmysqlclient"
	+ ;;
	+esac
	+
	+echo -n "Looking for compiler... "
	+type $CC >/dev/null 2>&1
	+if [ $? -ne 0 ]; then
	+ echo "compiler '$CC' not found!"
	+ exit 1
	+else
	+ echo "$CC is executable."
	+fi
	+
	+echo -n "Checking for BerkeleyDB... "
	+if [ "$with_libdb" = "test" ]; then
	+ if [ -e /usr/local/BerkeleyDB.4.1/include/db.h ]; then
	+ with_libdb=/usr/local/BerkeleyDB.4.1
	+ LIBDB_LIB="-ldb"
	+ elif [ -e /usr/include/db.h -o \
	+ -e /usr/local/include/db.h ]; then
	+ with_libdb=yes
	+ else
	+ with_libdb=no
	+ fi
	+fi
	+if [ "$with_libdb" = "no" ]; then
	+ echo "disabled."
	+else
	+ if [ "$with_libdb" != "yes" ]; then
	+ CFLAGS="$CFLAGS -I$with_libdb/include"
	+ LDFLAGS="$LDFLAGS -L$with_libdb/lib"
	+ fi
	+ CFLAGS="$CFLAGS -DHAVE_LIBDB"
	+ LDFLAGS="$LDFLAGS $LIBDB_LIB"
	+ echo "enabled."
	+fi
	+
	+echo -n "Checking for MySQL... "
	+if [ "$with_mysql" = "test" ]; then
	+ type mysql_config >/dev/null 2>&1
	+ if [ $? -eq 0 ]; then
	+ with_mysql=yes
	+ CFLAGS="$CFLAGS `mysql_config --cflags`"
	+ LDFLAGS="$LDFLAGS `mysql_config --libs`"
	+ # mysql_config will add -lmysqlclient, don't add it twice
	+ MYSQL_LIB=""
	+ else
	+ with_mysql=no
	+ fi
	+fi
	+if [ "$with_mysql" = "no" ]; then
	+ echo "disabled."
	+else
	+ if [ "$with_mysql" != "yes" ]; then
	+ CFLAGS="$CFLAGS -I$with_mysql/include"
	+ LDFLAGS="$LDFLAGS -L$with_mysql/lib"
	+ fi
	+ CFLAGS="$CFLAGS -DHAVE_MYSQL"
	+ LDFLAGS="$LDFLAGS $MYSQL_LIB"
	+ echo "enabled."
	+fi
	+
	+for D in .; do
	+ cat $D/Makefile.in \| sed "s%@CC@%${CC}%g
	+ s%@CXX@%${CXX}%g
	+ s%@AR@%${AR}%g
	+ s%@LD@%${LD}%g
	+ s%@CFLAGS@%${CFLAGS}%g
	+ s%@CXXFLAGS@%${CXXFLAGS}%g
	+ s%@ARFLAGS@%${ARFLAGS}%g
	+ s%@LDFLAGS@%${LDFLAGS}%g
	+ s%@SYSLIBS@%${SYSLIBS}%" > $D/Makefile \|\| exit 1
	+done
	+
	+echo "Configuration successful."
	+echo "Now run 'make all' and 'make install'."
	+echo ""
	diff --git a/dbdb.c b/dbdb.c
	@@ -0,0 +1,684 @@
	+/* $Id: dbdb.c,v 1.22 2002/10/19 09:59:35 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * dbdb.c: berkeley database handler
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "lex.h"
	+#include "vec.h"
	+
	+#include "dbh.h"
	+#include "dbdb.h"
	+
	+#ifdef HAVE_LIBDB
	+
	+#define DBT_init( pdbt ) memset( pdbt, 0, sizeof(DBT) )
	+
	+#if !defined(DB_VERSION_MAJOR) /* v1 */
	+#define dbx_get(dbp,kp,vp) dbp->get( dbp, kp, vp, 0 )
	+#define dbx_put(dbp,kp,vp) dbp->put( dbp, kp, vp, 0 )
	+#define dbx_fd(dbp,fd) fd = dbp->fd( dbp )
	+#else /* v2+ */
	+#define dbx_get(dbp,kp,vp) dbp->get( dbp, NULL, kp, vp, 0 )
	+#define dbx_put(dbp,kp,vp) dbp->put( dbp, NULL, kp, vp, 0 )
	+#define dbx_fd(dbp,fd) dbp->fd( dbp, &fd )
	+#endif /* DB_VERSION_MAJOR */
	+
	+#if !defined(DB_VERSION_MAJOR) /* v1 */
	+typedef DB DBC; /* no separate cursor type */
	+#define dbx_createcursor(dbp,dbcp) ((dbcp = dbp) ? 0 : -1)
	+#define dbx_destroycursor(dbcp) (dbcp = NULL)
	+#define dbx_first(dbcp,kp,vp) dbcp->seq(dbcp,kp,vp,R_FIRST)
	+#define dbx_next(dbcp,kp,vp) dbcp->seq(dbcp,kp,vp,R_NEXT)
	+#define dbx_prev(dbcp,kp,vp) dbcp->seq(dbcp,kp,vp,R_PREV)
	+#define dbx_last(dbcp,kp,vp) dbcp->seq(dbcp,kp,vp,R_LAST)
	+#elif DB_VERSION_MAJOR == 2
	+#define dbx_createcursor(dbp,dbcp) dbp->cursor(dbp,NULL,&csrp)
	+#define dbx_destroycursor(dbcp) dbcp->c_close(dbcp)
	+#define dbx_first(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_FIRST)
	+#define dbx_next(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_NEXT)
	+#define dbx_prev(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_PREV)
	+#define dbx_last(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_LAST)
	+#else /* v3+ */
	+#define dbx_createcursor(dbp,dbcp) dbp->cursor(dbp,NULL,&csrp,0)
	+#define dbx_destroycursor(dbcp) dbcp->c_close(dbcp)
	+#define dbx_first(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_FIRST)
	+#define dbx_next(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_NEXT)
	+#define dbx_prev(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_PREV)
	+#define dbx_last(dbcp,kp,vp) dbcp->c_get(dbcp,kp,vp,DB_LAST)
	+#endif /* DB_VERSION_MAJOR */
	+
	+static void char2DBT( DBT* pdbt, char* p )
	+{
	+ pdbt->data = p;
	+ pdbt->size = strlen(p);
	+}
	+
	+static void uint2DBT( DBT* pdbt, uint* p )
	+{
	+ pdbt->data = p;
	+ pdbt->size = sizeof(uint);
	+}
	+
	+static uint DBT2uint( DBT* pdbt )
	+{
	+ uint n;
	+ memcpy( &n, pdbt->data, sizeof(n) );
	+ return n;
	+}
	+
	+dbh_t* dbdb_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpas…
	+{
	+ dbhdb_t* pthis;
	+
	+ uint dirlen;
	+ cpchar phome;
	+ struct stat st;
	+
	+ pthis = (dbhdb_t*)malloc( sizeof(dbhdb_t) );
	+ if( pthis == NULL )
	+ {
	+ goto bail;
	+ }
	+ pthis->close = dbdb_db_close;
	+ pthis->opentable = dbdb_db_opentable;
	+ if( dbname != NULL && *dbname != '\0' )
	+ {
	+ dirlen = strlen( dbname );
	+ pthis->dir = strdup( dbname );
	+ if( pthis->dir[dirlen-1] == '/' )
	+ {
	+ pthis->dir[dirlen-1] = '\0';
	+ }
	+ }
	+ else
	+ {
	+ phome = getenv( "HOME" );
	+ if( phome == NULL \|\| *phome == '\0' )
	+ {
	+ phome = ".";
	+ }
	+ pthis->dir = (char*)malloc( strlen(phome)+5+1 );
	+ if( pthis->dir == NULL )
	+ {
	+ goto bail;
	+ }
	+ sprintf( pthis->dir, "%s/.bmf", phome );
	+ }
	+
	+ /* ensure config directory exists */
	+ if( stat( pthis->dir, &st ) != 0 )
	+ {
	+ if( errno == ENOENT )
	+ {
	+ if( mkdir( pthis->dir, S_IRUSR\|S_IWUSR\|S_IXUSR ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ }
	+ else
	+ {
	+ goto bail;
	+ }
	+ }
	+ else
	+ {
	+ if( !S_ISDIR( st.st_mode ) )
	+ {
	+ goto bail;
	+ }
	+ }
	+
	+#if !defined(DB_VERSION_MAJOR) \|\| DB_VERSION_MAJOR < 3
	+ /* no initialization */
	+#else /* DB_VERSION_MAJOR >= 3 */
	+ if( db_env_create( &pthis->envp, 0 ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( pthis->envp->open( pthis->envp, pthis->dir, DB_INIT_LOCK\|DB_INIT_MPOOL…
	+ {
	+ goto bail;
	+ }
	+#endif /* DB_VERSION_MAJOR */
	+
	+ return (dbh_t*)pthis;
	+
	+bail:
	+ free( pthis );
	+ return NULL;
	+}
	+
	+bool_t dbdb_db_close( dbhdb_t* pthis )
	+{
	+#if !defined(DB_VERSION_MAJOR) \|\| DB_VERSION_MAJOR < 3
	+ /* no cleanup */
	+#else /* DB_VERSION_MAJOR >= 3 */
	+ pthis->envp->close( pthis->envp, 0 );
	+#endif /* DB_VERSION_MAJOR */
	+
	+ free( pthis->dir );
	+ pthis->dir = NULL;
	+
	+ return true;
	+}
	+
	+dbt_t* dbdb_db_opentable( dbhdb_t* pthis, cpchar table, bool_t rdonly )
	+{
	+ dbtdb_t* ptable;
	+ DB* dbp;
	+ DBT key;
	+ DBT val;
	+
	+ char szpath[PATH_MAX];
	+
	+ ptable = (dbtdb_t*)malloc( sizeof(dbtdb_t) );
	+ if( ptable == NULL )
	+ {
	+ return NULL;
	+ }
	+ ptable->close = dbdb_table_close;
	+ ptable->mergeclose = dbdb_table_mergeclose;
	+ ptable->unmergeclose = dbdb_table_unmergeclose;
	+ ptable->import = dbdb_table_import;
	+ ptable->export = dbdb_table_export;
	+ ptable->getmsgcount = dbdb_table_getmsgcount;
	+ ptable->getcount = dbdb_table_getcount;
	+ ptable->dbp = NULL;
	+
	+ sprintf( szpath, "%s/%s.db", pthis->dir, table );
	+#if !defined(DB_VERSION_MAJOR)
	+ if( (dbp = dbopen( szpath, O_CREAT\|O_RDWR, 0644, DB_BTREE, NULL)) == NULL )
	+ {
	+ goto bail;
	+ }
	+#elif DB_VERSION_MAJOR == 2
	+ if( db_open( szpath, DB_BTREE, DB_CREATE, 0644, NULL, NULL, &dbp ) != 0 )
	+ {
	+ goto bail;
	+ }
	+#elif (DB_VERSION_MAJOR == 3) \|\| (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR ==…
	+ ptable->envp = pthis->envp;
	+ if( db_create( &dbp, NULL, 0 ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( dbp->open( dbp, szpath, NULL, DB_BTREE, DB_CREATE, 0644 ) != 0 )
	+ {
	+ goto bail;
	+ }
	+#else /* v4.1+ */
	+ ptable->envp = pthis->envp;
	+ if( db_create( &dbp, NULL, 0 ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( dbp->open( dbp, NULL, szpath, NULL, DB_BTREE, DB_CREATE, 0644 ) != 0 )
	+ {
	+ goto bail;
	+ }
	+#endif /* DB_VERSION_MAJOR */
	+ ptable->dbp = dbp;
	+
	+ DBT_init( &key );
	+ DBT_init( &val );
	+ ptable->nmsgs = 0;
	+ char2DBT( &key, MSGCOUNT_KEY );
	+ if( dbx_get( dbp, &key, &val ) == 0 )
	+ {
	+ ptable->nmsgs = DBT2uint( &val );
	+ }
	+
	+ return (dbt_t*)ptable;
	+
	+bail:
	+ free( ptable );
	+ return NULL;
	+}
	+
	+static bool_t dbdb_table_lock( dbtdb_t* pthis )
	+{
	+#ifndef NOLOCK
	+ struct flock lock;
	+ int fd;
	+
	+ dbx_fd( pthis->dbp, fd );
	+ memset( &lock, 0, sizeof(lock) );
	+ lock.l_type = F_WRLCK;
	+ lock.l_start = 0;
	+ lock.l_whence = SEEK_SET;
	+ lock.l_len = 0;
	+ if( fcntl( fd, F_SETLKW, &lock ) != 0 )
	+ {
	+ return false;
	+ }
	+#endif /* ndef NOLOCK */
	+ return true;
	+}
	+
	+static bool_t dbdb_table_unlock( dbtdb_t* pthis )
	+{
	+#ifndef NOLOCK
	+ struct flock lock;
	+ int fd;
	+
	+ dbx_fd( pthis->dbp, fd );
	+ memset( &lock, 0, sizeof(lock) );
	+ lock.l_type = F_UNLCK;
	+ lock.l_start = 0;
	+ lock.l_whence = SEEK_SET;
	+ lock.l_len = 0;
	+ if( fcntl( fd, F_SETLK, &lock ) != 0 )
	+ {
	+ return false;
	+ }
	+#endif /* ndef NOLOCK */
	+ return true;
	+}
	+
	+bool_t dbdb_table_close( dbtdb_t* pthis )
	+{
	+ DB* dbp = pthis->dbp;
	+
	+ if( dbp != NULL )
	+ {
	+#if !defined(DB_VERSION_MAJOR) /* v1 */
	+ dbp->close( dbp );
	+#else /* v2+ */
	+ dbp->close( dbp, 0 );
	+#endif /* DB_VERSION_MAJOR */
	+ pthis->dbp = NULL;
	+ }
	+
	+ return true;
	+}
	+
	+bool_t dbdb_table_mergeclose( dbtdb_t* pthis, vec_t* pmsg )
	+{
	+ DB* dbp = pthis->dbp;
	+ DBT key;
	+ DBT val;
	+
	+ char szword[MAXWORDLEN+1];
	+ uint count;
	+ veciter_t msgiter;
	+ str_t* pmsgstr;
	+
	+ if( pthis->dbp == NULL )
	+ {
	+ return false;
	+ }
	+
	+ if( !dbdb_table_lock( pthis ) )
	+ {
	+ return false;
	+ }
	+
	+ pthis->nmsgs++;
	+
	+ DBT_init( &key );
	+ DBT_init( &val );
	+
	+ char2DBT( &key, MSGCOUNT_KEY );
	+ uint2DBT( &val, &pthis->nmsgs );
	+ dbx_put( dbp, &key, &val );
	+
	+ vec_first( pmsg, &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+
	+ while( pmsgstr != NULL )
	+ {
	+ assert( pmsgstr->len <= MAXWORDLEN );
	+ strncpylwr( szword, pmsgstr->p, pmsgstr->len );
	+ szword[pmsgstr->len] = '\0';
	+ count = db_getnewcount( &msgiter );
	+
	+ char2DBT( &key, szword );
	+ if( dbx_get( dbp, &key, &val ) == 0 )
	+ {
	+ count += DBT2uint( &val );
	+ }
	+ char2DBT( &key, szword );
	+ uint2DBT( &val, &count );
	+ if( dbx_put( dbp, &key, &val ) != 0 )
	+ {
	+ goto bail;
	+ }
	+
	+ veciter_next( &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+ }
	+
	+ veciter_destroy( &msgiter );
	+ dbdb_table_unlock( pthis );
	+ return dbdb_table_close( pthis );
	+
	+bail:
	+ return false;
	+}
	+
	+bool_t dbdb_table_unmergeclose( dbtdb_t* pthis, vec_t* pmsg )
	+{
	+ DB* dbp = pthis->dbp;
	+ DBT key;
	+ DBT val;
	+
	+ char szword[MAXWORDLEN+1];
	+ uint count;
	+ veciter_t msgiter;
	+ str_t* pmsgstr;
	+
	+ if( pthis->dbp == NULL )
	+ {
	+ return false;
	+ }
	+
	+ if( pthis->nmsgs > 0 )
	+ {
	+ pthis->nmsgs--;
	+ }
	+
	+ if( !dbdb_table_lock( pthis ) )
	+ {
	+ return false;
	+ }
	+
	+ DBT_init( &key );
	+ DBT_init( &val );
	+
	+ char2DBT( &key, MSGCOUNT_KEY );
	+ uint2DBT( &val, &pthis->nmsgs );
	+ dbx_put( dbp, &key, &val );
	+
	+ vec_first( pmsg, &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+
	+ while( pmsgstr != NULL )
	+ {
	+ assert( pmsgstr->len <= MAXWORDLEN );
	+ strncpylwr( szword, pmsgstr->p, pmsgstr->len );
	+ szword[pmsgstr->len] = '\0';
	+ count = db_getnewcount( &msgiter );
	+
	+ char2DBT( &key, szword );
	+ if( dbx_get( dbp, &key, &val ) == 0 )
	+ {
	+ uint n = DBT2uint( &val );
	+ n = (n > count) ? (n - count) : 0;
	+ char2DBT( &key, szword );
	+ uint2DBT( &val, &n );
	+ if( dbx_put( dbp, &key, &val ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ }
	+
	+ veciter_next( &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+ }
	+
	+ veciter_destroy( &msgiter );
	+ dbdb_table_unlock( pthis );
	+ return dbdb_table_close( pthis );
	+
	+bail:
	+ return false;
	+}
	+
	+bool_t dbdb_table_import( dbtdb_t* pthis, cpchar filename )
	+{
	+ DB* dbp = pthis->dbp;
	+ int fd;
	+ struct stat st;
	+ char* pbuf;
	+ char* pbegin;
	+ char* pend;
	+ rec_t r;
	+ DBT key;
	+ DBT val;
	+ char szword[MAXWORDLEN+1];
	+
	+ if( pthis->dbp == NULL )
	+ {
	+ return false;
	+ }
	+ if( (fd = open( filename, O_RDONLY, 0644 )) < 0 )
	+ {
	+ return false;
	+ }
	+ if( fstat( fd, &st ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( st.st_size == 0 )
	+ {
	+ goto bail;
	+ }
	+ pbuf = (char*)malloc( st.st_size );
	+ if( pbuf == NULL )
	+ {
	+ goto bail;
	+ }
	+ if( read( fd, pbuf, st.st_size ) != st.st_size )
	+ {
	+ goto bail;
	+ }
	+
	+ DBT_init( &key );
	+ DBT_init( &val );
	+
	+ if( sscanf( pbuf, BOGOFILTER_HEADER, &pthis->nmsgs ) != 1 )
	+ {
	+ goto bail;
	+ }
	+ pbegin = pbuf;
	+ while( *pbegin != '\n' ) pbegin++;
	+ pbegin++;
	+
	+ char2DBT( &key, MSGCOUNT_KEY );
	+ uint2DBT( &val, &pthis->nmsgs );
	+ if( dbx_put( dbp, &key, &val ) != 0 )
	+ {
	+ goto bail;
	+ }
	+
	+ while( pbegin < pbuf + st.st_size )
	+ {
	+ pend = pbegin;
	+ r.w.p = pbegin;
	+ r.w.len = 0;
	+ r.n = 0;
	+
	+ while( *pend != '\n' )
	+ {
	+ if( pend >= pbuf + st.st_size )
	+ {
	+ goto bail;
	+ }
	+ pend = tolower(pend);
	+ if( *pend == ' ' )
	+ {
	+ r.w.len = (pend-pbegin);
	+ r.n = strtol( pend+1, NULL, 10 );
	+ }
	+ pend++;
	+ }
	+ if( pend > pbegin && pbegin != '#' && pbegin != ';' )
	+ {
	+ if( r.w.len == 0 \|\| r.w.len > MAXWORDLEN )
	+ {
	+ fprintf( stderr, "dbh_loadfile: bad file format\n" );
	+ goto bail;
	+ }
	+ strncpylwr( szword, r.w.p, r.w.len );
	+ szword[r.w.len] = '\0';
	+ char2DBT( &key, szword );
	+ uint2DBT( &val, &r.n );
	+ if( dbx_put( dbp, &key, &val ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ }
	+ pbegin = pend+1;
	+ }
	+
	+ free( pbuf );
	+ close( fd );
	+
	+ return true;
	+
	+bail:
	+ return false;
	+}
	+
	+bool_t dbdb_table_export( dbtdb_t* pthis, cpchar filename )
	+{
	+ DB* dbp = pthis->dbp;
	+ int fd;
	+ char iobuf[IOBUFSIZE];
	+ char* p;
	+
	+ DBC* csrp;
	+ int rc;
	+ DBT key;
	+ DBT val;
	+
	+ if( (fd = open( filename, O_CREAT\|O_WRONLY\|O_TRUNC, 0644 )) < 0 )
	+ {
	+ goto bail;
	+ }
	+ if( dbx_createcursor( dbp, csrp ) != 0 )
	+ {
	+ goto bail;
	+ }
	+
	+ DBT_init( &key );
	+ DBT_init( &val );
	+
	+ p = iobuf;
	+ p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs );
	+
	+ rc = dbx_first( csrp, &key, &val );
	+ while( rc == 0 )
	+ {
	+ assert( key.data != NULL && key.size <= MAXWORDLEN );
	+ assert( val.data != NULL && val.size == sizeof(uint) );
	+ if( key.size != MSGCOUNT_KEY_LEN \|\|
	+ memcmp( key.data, MSGCOUNT_KEY, MSGCOUNT_KEY_LEN ) != 0 )
	+ {
	+ memcpy( p, key.data, key.size ); p += key.size;
	+ *p++ = ' ';
	+ p += sprintf( p, "%u\n", DBT2uint(&val) );
	+ if( p+TEXTDB_MAXLINELEN > (iobuf+1) )
	+ {
	+ write( fd, iobuf, p-iobuf );
	+ p = iobuf;
	+ }
	+ }
	+ rc = dbx_next( csrp, &key, &val );
	+ }
	+ dbx_destroycursor( csrp );
	+ if( p != iobuf )
	+ {
	+ write( fd, iobuf, p-iobuf );
	+ }
	+ close( fd );
	+ return true;
	+
	+bail:
	+ return false;
	+}
	+
	+uint dbdb_table_getmsgcount( dbtdb_t* pthis )
	+{
	+ return pthis->nmsgs;
	+}
	+
	+uint dbdb_table_getcount( dbtdb_t* pthis, str_t* pword )
	+{
	+ DB* dbp = pthis->dbp;
	+ DBT key;
	+ DBT val;
	+
	+ char szword[MAXWORDLEN+1];
	+ uint count = 0;
	+
	+ assert( pword->len <= MAXWORDLEN );
	+ strncpylwr( szword, pword->p, pword->len );
	+ szword[pword->len] = '\0';
	+ count = 0;
	+
	+ DBT_init( &key );
	+ DBT_init( &val );
	+
	+ char2DBT( &key, szword );
	+ if( dbx_get( dbp, &key, &val ) == 0 )
	+ {
	+ count = DBT2uint( &val );
	+ }
	+
	+ return count;
	+}
	+
	+#else /* def HAVE_LIBDB */
	+
	+dbh_t* dbdb_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpas…
	+{
	+ return NULL;
	+}
	+
	+#endif /* def HAVE_LIBDB */
	+
	+#ifdef UNIT_TEST
	+int main( int argc, char** argv )
	+{
	+ dbh_t* pdb;
	+ veciter_t iter;
	+ str_t* pstr;
	+ uint n;
	+
	+ if( argc != 2 )
	+ {
	+ fprintf( stderr, "usage: %s <file>\n", argv[0] );
	+ return 1;
	+ }
	+
	+ for( n = 0; n < 100; n++ )
	+ {
	+ pdb = dbh_open( "testlist", true );
	+
	+ vec_first( &db, &iter );
	+ while( (pstr = veciter_get( &iter )) != NULL )
	+ {
	+ char buf[MAXWORDLEN+32];
	+ char* p;
	+ if( pstr->len > 200 )
	+ {
	+ fprintf( stderr, "str too long: %u chars\n", pstr->len );
	+ break;
	+ }
	+ p = buf;
	+ strcpy( buf, "str: " );
	+ p += 6;
	+ memcpy( p, pstr->p, pstr->len );
	+ p += pstr->len;
	+ sprintf( p, " %u", pstr->count );
	+ puts( buf );
	+
	+ veciter_next( &iter );
	+ }
	+
	+ dbh_close( &db );
	+ }
	+
	+ return 0;
	+}
	+#endif /* def UNIT_TEST */
	diff --git a/dbdb.h b/dbdb.h
	@@ -0,0 +1,61 @@
	+/* $Id: dbdb.h,v 1.7 2002/10/14 22:17:19 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _DBDB_H
	+#define _DBDB_H
	+
	+#ifdef HAVE_LIBDB
	+
	+#include <db.h>
	+
	+typedef struct _dbtdb dbtdb_t;
	+struct _dbtdb
	+{
	+ bool_t (close)(dbtdb_t);
	+ bool_t (mergeclose)(dbtdb_t,vec_t*);
	+ bool_t (unmergeclose)(dbtdb_t,vec_t*);
	+ bool_t (import)(dbtdb_t,cpchar);
	+ bool_t (export)(dbtdb_t,cpchar);
	+ uint (getmsgcount)(dbtdb_t);
	+ uint (getcount)(dbtdb_t,str_t*);
	+
	+ DB* dbp; /* db handle */
	+#if defined(DB_VERSION_MAJOR) && DB_VERSION_MAJOR >= 3
	+ DB_ENV* envp; /* we don't own this */
	+#endif /* DB_VERSION_MAJOR */
	+ uint nmsgs; /* number of messages in table (cached) */
	+};
	+
	+typedef struct _dbhdb dbhdb_t;
	+struct _dbhdb
	+{
	+ bool_t (close)(dbhdb_t);
	+ dbt_t* (opentable)(dbhdb_t,cpchar,bool_t);
	+
	+ char* dir; /* directory for db files */
	+#if defined(DB_VERSION_MAJOR) && DB_VERSION_MAJOR >= 3
	+ DB_ENV* envp; /* db environment */
	+#endif /* DB_VERSION_MAJOR */
	+};
	+
	+dbh_t* dbdb_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpa…
	+bool_t dbdb_db_close( dbhdb_t* pthis );
	+dbt_t* dbdb_db_opentable( dbhdb_t* pthis, cpchar table, bool_t rdonly );
	+
	+bool_t dbdb_table_close( dbtdb_t* pthis );
	+bool_t dbdb_table_mergeclose( dbtdb_t* pthis, vec_t* pmsg );
	+bool_t dbdb_table_unmergeclose( dbtdb_t* pthis, vec_t* pmsg );
	+bool_t dbdb_table_import( dbtdb_t* pthis, cpchar filename );
	+bool_t dbdb_table_export( dbtdb_t* pthis, cpchar filename );
	+uint dbdb_table_getmsgcount( dbtdb_t* pthis );
	+uint dbdb_table_getcount( dbtdb_t* pthis, str_t* pword );
	+
	+#endif /* def HAVE_LIBDB */
	+
	+#endif /* ndef _DBDB_H */
	diff --git a/dbg.c b/dbg.c
	@@ -0,0 +1,302 @@
	+/* $Id: dbg.c,v 1.3 2002/10/19 08:30:57 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * dbg.c: debug functions for bmf.
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include <stdarg.h>
	+
	+
	+uint g_verbose = 0;
	+
	+void verbose( int level, const char* fmt, ... )
	+{
	+ if( g_verbose >= level )
	+ {
	+ char str[4096];
	+ va_list v;
	+ va_start( v, fmt );
	+ vsnprintf( str, sizeof(str)-1, fmt, v );
	+ str[sizeof(str)-1] = '\0';
	+#ifdef _UNIX
	+ fputs( str, stderr );
	+#endif
	+#ifdef _WIN32
	+ ::OutputDebugString( str );
	+#endif
	+ }
	+}
	+
	+#ifndef NDEBUG
	+
	+void dbgout( const char* fmt, ... )
	+{
	+ char str[4096];
	+ va_list v;
	+ va_start( v, fmt );
	+ vsnprintf( str, sizeof(str)-1, fmt, v );
	+ str[sizeof(str)-1] = '\0';
	+#ifdef _UNIX
	+ fputs( str, stderr );
	+#endif
	+#ifdef _WIN32
	+ ::OutputDebugString( str );
	+#endif
	+}
	+
	+/*
	+ * Heap management routines. These routines use unbalanced binary trees to
	+ * keep track of allocations in an attempt to make them fast yet simple.
	+ *
	+ * Each block of memory consists of an alloc_node header, the requested
	+ * memory block, and guard bytes before and after the requested memory
	+ * block. The requested memory block is filled with a semi-random byte
	+ * value to ensure that the caller does not rely on any particular initial
	+ * bit pattern (eg. a block of zeros or NULLs). It is refilled with a
	+ * (possibly different) byte value after deallocation to ensure that the
	+ * caller doesn't attempt to use the freed memory.
	+ */
	+
	+/* we need to use the real malloc and free */
	+#undef malloc
	+#undef free
	+
	+typedef struct _alloc_node
	+{
	+ struct _alloc_node* lptr;
	+ struct _alloc_node* rptr;
	+ size_t len;
	+ cpchar file;
	+ uint line;
	+} alloc_node;
	+
	+static alloc_node* g_heap = NULL;
	+
	+/* Our magic guard bytes */
	+static byte g_guard[] =
	+{
	+ 0xDE, 0xAD, 0xBE, 0xEF, 0xDE, 0xAD, 0xBE, 0xEF,
	+ 0xDE, 0xAD, 0xBE, 0xEF, 0xDE, 0xAD, 0xBE, 0xEF
	+};
	+
	+void* debug_malloc( cpchar file, uint line, size_t n, int fill )
	+{
	+ byte* pmem = NULL;
	+ alloc_node* pnode;
	+
	+ pmem = NULL;
	+ if( n == 0 )
	+ {
	+ n = 1;
	+ }
	+ pnode = (alloc_node)malloc( n + 2sizeof(g_guard) + sizeof(alloc_node) );
	+ if( pnode != NULL )
	+ {
	+ alloc_node** ppuplink;
	+ alloc_node* pcur;
	+
	+ pmem = (byte*)pnode + sizeof(alloc_node) + sizeof(g_guard);
	+ memcpy( pmem - sizeof(g_guard), g_guard, sizeof(g_guard) );
	+ memset( pmem, fill, n );
	+ memcpy( pmem + n, g_guard, sizeof(g_guard) );
	+
	+ pnode->lptr = pnode->rptr = NULL;
	+ pnode->len = n;
	+ pnode->file = file;
	+ pnode->line = line;
	+ ppuplink = &g_heap;
	+ pcur = g_heap;
	+ while( pcur != NULL )
	+ {
	+ if( pnode == pcur )
	+ {
	+ dbgout( "%s(%u): * FATAL: duplicate memory allocated *\n",…
	+ assert( false );
	+ exit( -1 );
	+ }
	+ if( pnode < pcur )
	+ {
	+ ppuplink = &pcur->lptr;
	+ pcur = pcur->lptr;
	+ }
	+ else
	+ {
	+ ppuplink = &pcur->rptr;
	+ pcur = pcur->rptr;
	+ }
	+ }
	+ *ppuplink = pnode;
	+ }
	+
	+ return pmem;
	+}
	+
	+void debug_free( cpchar file, uint line, void* p )
	+{
	+ alloc_node** ppuplink;
	+ alloc_node* pcur;
	+
	+ if( p == NULL )
	+ {
	+ return;
	+ }
	+ if( g_heap == NULL )
	+ {
	+ dbgout( "%s(%u): * FATAL: delete with empty heap *\n", file, line …
	+ assert( false );
	+ exit( -1 );
	+ }
	+
	+ ppuplink = &g_heap;
	+ pcur = g_heap;
	+ while( pcur != NULL )
	+ {
	+ void* pcurblk = (char*)pcur + sizeof(alloc_node) + sizeof(g_guard);
	+ if( p == pcurblk )
	+ {
	+ byte* pmem = (byte*)p;
	+ if( memcmp( pmem - sizeof(g_guard), g_guard, sizeof(g_guard) ) != …
	+ memcmp( pmem + pcur->len, g_guard, sizeof(g_guard) ) != 0 )
	+ {
	+ dbgout( "%s(%u): *** FATAL: corrupted memory at %p\n", file, l…
	+ assert( false );
	+ exit( -1 );
	+ }
	+ memset( pmem, rand(), pcur->len );
	+ if( pcur->lptr && pcur->rptr )
	+ {
	+ /*
	+ * node has both ptrs so replace it with left child and move
	+ * right child to bottom right of left child's tree
	+ */
	+ alloc_node* pend = pcur->lptr;
	+ while( pend->rptr ) pend = pend->rptr;
	+ *ppuplink = pcur->lptr;
	+ pend->rptr = pcur->rptr;
	+ }
	+ else
	+ {
	+ /* move child up */
	+ *ppuplink = (pcur->lptr) ? pcur->lptr : pcur->rptr;
	+ }
	+ free( pcur );
	+ return;
	+ }
	+ if( p < pcurblk )
	+ {
	+ ppuplink = &pcur->lptr;
	+ pcur = pcur->lptr;
	+ }
	+ else
	+ {
	+ ppuplink = &pcur->rptr;
	+ pcur = pcur->rptr;
	+ }
	+ }
	+
	+ dbgout( "%s(%u): * FATAL: delete on unalloced memory *\n", file, line …
	+ assert( false );
	+ exit( -1 );
	+}
	+
	+void* debug_realloc( cpchar file, uint line, void* p, size_t n )
	+{
	+ void* pnew;
	+
	+ if( p == NULL )
	+ {
	+ pnew = debug_malloc( file, line, n, rand() );
	+ }
	+ else if( n == 0 )
	+ {
	+ debug_free( file, line, p );
	+ pnew = NULL;
	+ }
	+ else
	+ {
	+ alloc_node* pnode = (alloc_node)((char)p-sizeof(g_guard)-sizeof(allo…
	+ pnew = debug_malloc( file, line, n, rand() );
	+ if( pnew != NULL )
	+ {
	+ memcpy( pnew, p, pnode->len );
	+ debug_free( file, line, p );
	+ }
	+ }
	+
	+ return pnew;
	+}
	+
	+char* debug_strdup( cpchar file, uint line, cpchar s )
	+{
	+ char* s2;
	+ uint sl = strlen(s);
	+
	+ s2 = (char*)debug_malloc( file, line, sl+1, 0 );
	+ memcpy( s2, s, sl );
	+ s2[sl] = '\0';
	+
	+ return s2;
	+}
	+
	+char* debug_strndup( cpchar file, uint line, cpchar s, size_t n )
	+{
	+ char* s2;
	+ uint sl = strlen(s);
	+
	+ sl = min( n-1, sl );
	+ s2 = (char*)debug_malloc( file, line, sl+1, 0 );
	+ memcpy( s2, s, sl );
	+ s2[sl] = '\0';
	+
	+ return s2;
	+}
	+
	+static void walk_alloc_tree( alloc_node* pcur, size_t* pttl )
	+{
	+ if( pcur != NULL )
	+ {
	+ walk_alloc_tree( pcur->lptr, pttl );
	+ dbgout( "%s(%u): %u bytes at %p\n", pcur->file, pcur->line,
	+ pcur->len, pcur+sizeof(alloc_node)+sizeof(g_guard) );
	+ *pttl += pcur->len;
	+ walk_alloc_tree( pcur->rptr, pttl );
	+ }
	+}
	+
	+void dump_alloc_heap( void )
	+{
	+ if( g_heap != NULL )
	+ {
	+ size_t ttl = 0;
	+ dbgout( "\n" );
	+ dbgout( "Memory leaks detected\n" );
	+ dbgout( "=====================\n" );
	+ dbgout( "\n" );
	+ walk_alloc_tree( g_heap, &ttl );
	+ dbgout( "\n" );
	+ dbgout( "=====================\n" );
	+ dbgout( "Total bytes: %u\n", ttl );
	+ dbgout( "=====================\n" );
	+ }
	+}
	+
	+#else /* ndef NDEBUG */
	+
	+void dbgout( const char* fmt, ... )
	+{
	+ /* empty */
	+}
	+
	+void dump_alloc_heap( void )
	+{
	+ /* empty */
	+}
	+
	+#endif /* ndef NDEBUG */
	diff --git a/dbg.h b/dbg.h
	@@ -0,0 +1,35 @@
	+/* $Id: dbg.h,v 1.1 2002/10/14 07:09:51 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _DBG_H
	+#define _DBG_H
	+
	+extern uint g_verbose;
	+
	+void verbose( int level, const char* fmt, ... );
	+
	+void dbgout( const char* fmt, ... );
	+void dump_alloc_heap( void );
	+
	+#ifndef NDEBUG
	+void* debug_malloc ( cpchar file, uint line, size_t n, int fill );
	+void debug_free ( cpchar file, uint line, void* p );
	+void* debug_realloc ( cpchar file, uint line, void* p, size_t n );
	+char* debug_strdup ( cpchar file, uint line, cpchar s );
	+char* debug_strndup ( cpchar file, uint line, cpchar s, size_t n );
	+
	+#define malloc(n) debug_malloc (__FILE__,__LINE__,n,rand())
	+#define calloc(n) debug_calloc (__FILE__,__LINE__,n,0)
	+#define free(p) debug_free (__FILE__,__LINE__,p)
	+#define realloc(p,n) debug_realloc (__FILE__,__LINE__,p,n)
	+#define strdup(s) debug_strdup (__FILE__,__LINE__,s)
	+#define strndup(s,n) debug_strndup (__FILE__,__LINE__,s,n)
	+#endif /* ndef NDEBUG */
	+
	+#endif /* ndef _DBG_H */
	diff --git a/dbh.c b/dbh.c
	@@ -0,0 +1,74 @@
	+/* $Id: dbh.c,v 1.2 2002/10/14 07:09:51 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * dbh.c: database handler interface
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "lex.h"
	+#include "vec.h"
	+
	+#include "dbh.h"
	+#include "dbtext.h"
	+#include "dbdb.h"
	+#include "dbmysql.h"
	+
	+/*
	+ * get count for new (incoming) word. there may be duplicate entries for the
	+ * str, so sum the counts and leave the iterator at the last one.
	+ *
	+ * the list referenced in the iterator must be sorted.
	+ */
	+uint db_getnewcount( veciter_t* piter )
	+{
	+ str_t* pstr;
	+ uint count;
	+ veciter_t curiter;
	+ str_t* pcurstr;
	+
	+ pstr = &piter->plist->pitems[piter->index];
	+ count = 0;
	+
	+ curiter.plist = piter->plist;
	+ curiter.index = piter->index;
	+ pcurstr = &curiter.plist->pitems[curiter.index];
	+
	+ while( curiter.index < curiter.plist->nitems && str_casecmp( pstr, pcurstr…
	+ {
	+ piter->index = curiter.index;
	+ count = min( MAXFREQ, count + 1 );
	+ veciter_next( &curiter );
	+ pcurstr = &curiter.plist->pitems[curiter.index];
	+ }
	+
	+ return count;
	+}
	+
	+dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, c…
	+{
	+ dbh_t* pdb = NULL;
	+
	+ switch( dbfmt )
	+ {
	+ case db_text:
	+ pdb = (dbh_t*)dbtext_db_open( dbhost, dbname, dbuser, dbpass );
	+ break;
	+ case db_db:
	+ pdb = (dbh_t*)dbdb_db_open( dbhost, dbname, dbuser, dbpass );
	+ break;
	+ case db_mysql:
	+ pdb = (dbh_t*) dbmysql_db_open( dbhost, dbname, dbuser, dbpass );
	+ break;
	+ default:
	+ assert(false);
	+ }
	+
	+ return pdb;
	+}
	diff --git a/dbh.h b/dbh.h
	@@ -0,0 +1,56 @@
	+/* $Id: dbh.h,v 1.3 2002/10/02 04:45:40 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _DBH_H
	+#define _DBH_H
	+
	+/* database formats */
	+typedef enum
	+{
	+ db_text, /* flat text */
	+ db_db, /* libdb */
	+ db_mysql /* mysql */
	+} dbfmt_t;
	+
	+/* record/field structure */
	+typedef struct _rec
	+{
	+ str_t w;
	+ uint n;
	+} rec_t;
	+
	+/* database table */
	+typedef struct _dbt dbt_t;
	+struct _dbt
	+{
	+ bool_t (close)(dbt_t);
	+ bool_t (mergeclose)(dbt_t,vec_t*);
	+ bool_t (unmergeclose)(dbt_t,vec_t*);
	+ bool_t (import)(dbt_t,cpchar);
	+ bool_t (export)(dbt_t,cpchar);
	+ uint (getmsgcount)(dbt_t);
	+ uint (getcount)(dbt_t,str_t*);
	+};
	+
	+/* database instance */
	+typedef struct _dbh dbh_t;
	+struct _dbh
	+{
	+ bool_t (close)(dbh_t);
	+ dbt_t* (opentable)(dbh_t,cpchar,bool_t);
	+};
	+
	+dbh_t* dbh_open( dbfmt_t dbfmt, cpchar dbhost, cpchar dbname, cpchar dbuser, …
	+
	+#define BOGOFILTER_HEADER "# bogofilter wordlist (format version A): %u\n"
	+#define TEXTDB_MAXLINELEN (MAXWORDLEN+32)
	+
	+uint db_getnewcount( veciter_t* piter );
	+
	+#endif /* ndef _DBH_H */
	diff --git a/dbmysql.c b/dbmysql.c
	@@ -0,0 +1,545 @@
	+/* $Id: dbmysql.c,v 1.9 2002/10/14 07:09:51 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * dbmysql.c: mysql database handler
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "lex.h"
	+#include "vec.h"
	+
	+#include "dbh.h"
	+#include "dbmysql.h"
	+
	+#ifdef HAVE_MYSQL
	+
	+#define MAXQUERY 256
	+
	+static MYSQL* g_mysql = NULL;
	+
	+static void sql_escape( char* d, const char* s )
	+{
	+ while( *s != '\0' )
	+ {
	+ if( *s == '\'' )
	+ {
	+ *d++ = '\'';
	+ }
	+ d++ = tolower(s++);
	+ }
	+}
	+
	+dbh_t* dbmysql_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db…
	+{
	+ dbhmysql_t* pthis;
	+
	+ if( g_mysql == NULL )
	+ {
	+ g_mysql = mysql_init( NULL );
	+ if( g_mysql == NULL )
	+ {
	+ return NULL;
	+ }
	+ }
	+
	+ pthis = (dbhmysql_t*)malloc( sizeof(dbhmysql_t) );
	+ if( pthis == NULL )
	+ {
	+ perror( "malloc()" );
	+ goto bail;
	+ }
	+ pthis->close = dbmysql_db_close;
	+ pthis->opentable = dbmysql_db_opentable;
	+
	+ pthis->dbh = mysql_real_connect( g_mysql, dbhost, dbuser, dbpass, dbname, …
	+ if( pthis->dbh == NULL )
	+ {
	+ goto bail;
	+ }
	+
	+
	+ return (dbh_t*)pthis;
	+
	+bail:
	+ fprintf( stderr, "cannot open mysql database '%s': %s\n", dbname, mysql_er…
	+ free( pthis );
	+ return NULL;
	+}
	+
	+bool_t dbmysql_db_close( dbhmysql_t* pthis )
	+{
	+ if( pthis->dbh != NULL )
	+ {
	+ mysql_close( pthis->dbh );
	+ pthis->dbh = NULL;
	+ }
	+ return true;
	+}
	+
	+dbt_t* dbmysql_db_opentable( dbhmysql_t* pthis, cpchar table, bool_t rdonly )
	+{
	+ dbtmysql_t* ptable;
	+
	+ char query[MAXQUERY];
	+ MYSQL_RES* res;
	+ MYSQL_ROW row;
	+
	+ ptable = (dbtmysql_t*)malloc( sizeof(dbtmysql_t) );
	+ if( ptable == NULL )
	+ {
	+ return NULL;
	+ }
	+ ptable->close = dbmysql_table_close;
	+ ptable->mergeclose = dbmysql_table_mergeclose;
	+ ptable->unmergeclose = dbmysql_table_unmergeclose;
	+ ptable->import = dbmysql_table_import;
	+ ptable->export = dbmysql_table_export;
	+ ptable->getmsgcount = dbmysql_table_getmsgcount;
	+ ptable->getcount = dbmysql_table_getcount;
	+ ptable->pdb = pthis;
	+ ptable->table = strdup( table );
	+ ptable->nmsgs = 0;
	+
	+ sprintf( query, "SELECT count FROM %s WHERE name='%s'",
	+ table, MSGCOUNT_KEY );
	+ if( mysql_query( pthis->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( (res = mysql_store_result( pthis->dbh )) == NULL )
	+ {
	+ goto bail;
	+ }
	+ while( (row = mysql_fetch_row( res )) )
	+ {
	+ ptable->nmsgs = atoi( row[0] );
	+ }
	+
	+ return (dbt_t*)ptable;
	+
	+bail:
	+ free( ptable->table );
	+ free( ptable );
	+ return NULL;
	+}
	+
	+bool_t dbmysql_table_close( dbtmysql_t* pthis )
	+{
	+ if( pthis->pdb != NULL )
	+ {
	+ free( pthis->table );
	+ pthis->table = NULL;
	+ pthis->pdb = NULL;
	+ }
	+ return true;
	+}
	+
	+bool_t dbmysql_table_mergeclose( dbtmysql_t* pthis, vec_t* pmsg )
	+{
	+ char szword[MAXWORDLEN+1];
	+ char szsqlword[MAXWORDLEN*2+1];
	+ veciter_t msgiter;
	+ str_t* pmsgstr;
	+
	+ char query[MAXQUERY];
	+ uint count;
	+
	+ if( pthis->pdb == NULL \|\| pthis->pdb->dbh == NULL )
	+ {
	+ assert( false );
	+ return false;
	+ }
	+
	+ pthis->nmsgs++;
	+
	+ sprintf( query, "UPDATE %s SET count=%u WHERE name='%s'",
	+ pthis->table, pthis->nmsgs, MSGCOUNT_KEY );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u )",
	+ pthis->table, MSGCOUNT_KEY, pthis->nmsgs );
	+ mysql_query( pthis->pdb->dbh, query );
	+ }
	+
	+ vec_first( pmsg, &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+
	+ while( pmsgstr != NULL )
	+ {
	+ assert( pmsgstr->len <= MAXWORDLEN );
	+ strncpylwr( szword, pmsgstr->p, pmsgstr->len );
	+ szword[pmsgstr->len] = '\0';
	+ sql_escape( szsqlword, szword );
	+ count = db_getnewcount( &msgiter );
	+
	+ sprintf( query, "UPDATE %s SET count=count+%u WHERE name='%s'",
	+ pthis->table, count, szsqlword );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u …
	+ pthis->table, szsqlword, count );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ goto bail;
	+ }
	+ }
	+
	+ veciter_next( &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+ }
	+
	+ veciter_destroy( &msgiter );
	+ return dbmysql_table_close( pthis );
	+
	+bail:
	+ return false;
	+}
	+
	+bool_t dbmysql_table_unmergeclose( dbtmysql_t* pthis, vec_t* pmsg )
	+{
	+ char szword[MAXWORDLEN+1];
	+ char szsqlword[MAXWORDLEN*2+1];
	+ veciter_t msgiter;
	+ str_t* pmsgstr;
	+
	+ char query[MAXQUERY];
	+ uint count;
	+
	+ if( pthis->pdb == NULL \|\| pthis->pdb->dbh == NULL )
	+ {
	+ assert( false );
	+ return false;
	+ }
	+
	+ if( pthis->nmsgs > 0 )
	+ {
	+ pthis->nmsgs--;
	+ }
	+
	+ sprintf( query, "UPDATE %s SET count=%u WHERE name='%s'",
	+ pthis->table, pthis->nmsgs, MSGCOUNT_KEY );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u )",
	+ pthis->table, MSGCOUNT_KEY, pthis->nmsgs );
	+ mysql_query( pthis->pdb->dbh, query );
	+ }
	+
	+ vec_first( pmsg, &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+
	+ while( pmsgstr != NULL )
	+ {
	+ assert( pmsgstr->len <= MAXWORDLEN );
	+ strncpylwr( szword, pmsgstr->p, pmsgstr->len );
	+ szword[pmsgstr->len] = '\0';
	+ sql_escape( szsqlword, szword );
	+ count = db_getnewcount( &msgiter );
	+
	+ sprintf( query, "UPDATE %s SET count=GREATEST(0,count-%u) WHERE name='…
	+ pthis->table, count, szsqlword );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ /* this should not happen, so write with count=0 */
	+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', 0 )…
	+ pthis->table, szsqlword );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ goto bail;
	+ }
	+ }
	+
	+ veciter_next( &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+ }
	+
	+ veciter_destroy( &msgiter );
	+ return dbmysql_table_close( pthis );
	+
	+bail:
	+ return false;
	+}
	+
	+bool_t dbmysql_table_import( dbtmysql_t* pthis, cpchar filename )
	+{
	+ int fd;
	+ struct stat st;
	+ char* pbuf;
	+ char* pbegin;
	+ char* pend;
	+ rec_t r;
	+ char szword[MAXWORDLEN+1];
	+ char szsqlword[MAXWORDLEN*2+1];
	+ char query[MAXQUERY];
	+
	+ if( pthis->pdb == NULL \|\| pthis->pdb->dbh == NULL )
	+ {
	+ return false;
	+ }
	+ if( (fd = open( filename, O_RDONLY, 0644 )) < 0 )
	+ {
	+ return false;
	+ }
	+ if( fstat( fd, &st ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( st.st_size == 0 )
	+ {
	+ goto bail;
	+ }
	+ pbuf = (char*)malloc( st.st_size );
	+ if( pbuf == NULL )
	+ {
	+ goto bail;
	+ }
	+ if( read( fd, pbuf, st.st_size ) != st.st_size )
	+ {
	+ goto bail;
	+ }
	+
	+ if( sscanf( pbuf, BOGOFILTER_HEADER, &pthis->nmsgs ) != 1 )
	+ {
	+ goto bail;
	+ }
	+ pbegin = pbuf;
	+ while( *pbegin != '\n' ) pbegin++;
	+ pbegin++;
	+
	+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u )",
	+ pthis->table, MSGCOUNT_KEY, pthis->nmsgs );
	+ mysql_query( pthis->pdb->dbh, query );
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ goto bail;
	+ }
	+
	+ while( pbegin < pbuf + st.st_size )
	+ {
	+ pend = pbegin;
	+ r.w.p = pbegin;
	+ r.w.len = 0;
	+ r.n = 0;
	+
	+ while( *pend != '\n' )
	+ {
	+ if( pend >= pbuf + st.st_size )
	+ {
	+ goto bail;
	+ }
	+ pend = tolower(pend);
	+ if( *pend == ' ' )
	+ {
	+ r.w.len = (pend-pbegin);
	+ r.n = strtol( pend+1, NULL, 10 );
	+ }
	+ pend++;
	+ }
	+ if( pend > pbegin && pbegin != '#' && pbegin != ';' )
	+ {
	+ if( r.w.len == 0 \|\| r.w.len > MAXWORDLEN )
	+ {
	+ fprintf( stderr, "dbh_loadfile: bad file format\n" );
	+ goto bail;
	+ }
	+ strncpylwr( szword, r.w.p, r.w.len );
	+ szword[r.w.len] = '\0';
	+ sql_escape( szsqlword, szword );
	+
	+ sprintf( query, "INSERT INTO %s ( name, count ) VALUES ( '%s', %u …
	+ pthis->table, szsqlword, r.n );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ goto bail;
	+ }
	+ }
	+ pbegin = pend+1;
	+ }
	+
	+ return true;
	+
	+bail:
	+ return false;
	+}
	+
	+bool_t dbmysql_table_export( dbtmysql_t* pthis, cpchar filename )
	+{
	+ int fd;
	+ char iobuf[IOBUFSIZE];
	+ char* p;
	+
	+ char query[MAXQUERY];
	+ MYSQL_RES* res;
	+ MYSQL_ROW row;
	+
	+ if( (fd = open( filename, O_CREAT\|O_WRONLY\|O_TRUNC, 0644 )) < 0 )
	+ {
	+ return false;
	+ }
	+
	+ p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs );
	+
	+ sprintf( query, "SELECT name, count FROM %s",
	+ pthis->table );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( mysql_affected_rows( pthis->pdb->dbh ) == 0 )
	+ {
	+ goto bail;
	+ }
	+
	+ while( (row = mysql_fetch_row( res )) )
	+ {
	+ if( strcmp( row[0], MSGCOUNT_KEY ) == 0 )
	+ {
	+ continue;
	+ }
	+
	+ p += sprintf( p, "%s %s\n", row[0], row[1] );
	+ if( p+TEXTDB_MAXLINELEN > (iobuf+1) )
	+ {
	+ write( fd, iobuf, p-iobuf );
	+ p = iobuf;
	+ }
	+ }
	+ if( p != iobuf )
	+ {
	+ write( fd, iobuf, p-iobuf );
	+ }
	+ close( fd );
	+
	+ return true;
	+
	+bail:
	+ return false;
	+}
	+
	+uint dbmysql_table_getmsgcount( dbtmysql_t* pthis )
	+{
	+ return pthis->nmsgs;
	+}
	+
	+uint dbmysql_table_getcount( dbtmysql_t* pthis, str_t* pword )
	+{
	+ uint count = 0;
	+ char szword[MAXWORDLEN+1];
	+ char szsqlword[MAXWORDLEN*2+1];
	+
	+ char query[MAXQUERY];
	+ MYSQL_RES* res;
	+ MYSQL_ROW row;
	+
	+ assert( pword->len <= MAXWORDLEN );
	+ strncpylwr( szword, pword->p, pword->len );
	+ szword[pword->len] = '\0';
	+ sql_escape( szsqlword, szword );
	+ sprintf( query, "SELECT count FROM %s WHERE name='%s'",
	+ pthis->table, szsqlword );
	+ if( mysql_query( pthis->pdb->dbh, query ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ if( (res = mysql_store_result( pthis->pdb->dbh )) == NULL )
	+ {
	+ goto bail;
	+ }
	+ while( (row = mysql_fetch_row( res )) )
	+ {
	+ count = atoi( row[0] );
	+ }
	+
	+bail:
	+ return count;
	+}
	+
	+#else /* def HAVE_MYSQL */
	+
	+dbh_t* dbmysql_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db…
	+{
	+ return NULL;
	+}
	+
	+#endif /* def HAVE_MYSQL */
	+
	+#ifdef UNIT_TEST
	+int main( int argc, char** argv )
	+{
	+ dbh_t* pdb;
	+ veciter_t iter;
	+ str_t* pstr;
	+ uint n;
	+
	+ if( argc != 2 )
	+ {
	+ fprintf( stderr, "usage: %s <file>\n", argv[0] );
	+ return 1;
	+ }
	+
	+ for( n = 0; n < 100; n++ )
	+ {
	+ pdb = dbh_open( "testlist", true );
	+
	+ vec_first( &db, &iter );
	+ while( (pstr = veciter_get( &iter )) != NULL )
	+ {
	+ char buf[MAXWORDLEN+32];
	+ char* p;
	+ if( pstr->len > 200 )
	+ {
	+ fprintf( stderr, "str too long: %u chars\n", pstr->len );
	+ break;
	+ }
	+ p = buf;
	+ strcpy( buf, "str: " );
	+ p += 6;
	+ memcpy( p, pstr->p, pstr->len );
	+ p += pstr->len;
	+ sprintf( p, " %u", pstr->count );
	+ puts( buf );
	+
	+ veciter_next( &iter );
	+ }
	+
	+ dbh_close( &db );
	+ }
	+
	+ return 0;
	+}
	+#endif /* def UNIT_TEST */
	diff --git a/dbmysql.h b/dbmysql.h
	@@ -0,0 +1,60 @@
	+/* $Id: dbmysql.h,v 1.4 2002/10/06 06:46:53 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _DBMYSQL_H
	+#define _DBMYSQL_H
	+
	+#ifdef HAVE_MYSQL
	+
	+#include "mysql.h"
	+
	+typedef struct _dbtmysql dbtmysql_t;
	+struct _dbtmysql
	+{
	+ bool_t (close)(dbtmysql_t);
	+ bool_t (mergeclose)(dbtmysql_t,vec_t*);
	+ bool_t (unmergeclose)(dbtmysql_t,vec_t*);
	+ bool_t (import)(dbtmysql_t,cpchar);
	+ bool_t (export)(dbtmysql_t,cpchar);
	+ uint (getmsgcount)(dbtmysql_t);
	+ uint (getcount)(dbtmysql_t,str_t*);
	+
	+ struct _dbhmysql* pdb;
	+ char* table; /* table name */
	+ uint nmsgs; /* number of messages in table (cached) */
	+};
	+
	+typedef struct _dbhmysql dbhmysql_t;
	+struct _dbhmysql
	+{
	+ bool_t (close)(dbhmysql_t);
	+ dbt_t* (opentable)(dbhmysql_t,cpchar,bool_t);
	+
	+ MYSQL* dbh; /* database handle, if currently open */
	+};
	+
	+dbh_t* dbmysql_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar d…
	+bool_t dbmysql_db_close( dbhmysql_t* pthis );
	+dbt_t* dbmysql_db_opentable( dbhmysql_t* pthis, cpchar table, bool_t rdonly );
	+
	+bool_t dbmysql_table_close( dbtmysql_t* pthis );
	+bool_t dbmysql_table_mergeclose( dbtmysql_t* pthis, vec_t* pmsg );
	+bool_t dbmysql_table_unmergeclose( dbtmysql_t* pthis, vec_t* pmsg );
	+bool_t dbmysql_table_import( dbtmysql_t* pthis, cpchar filename );
	+bool_t dbmysql_table_export( dbtmysql_t* pthis, cpchar filename );
	+uint dbmysql_table_getmsgcount( dbtmysql_t* pthis );
	+uint dbmysql_table_getcount( dbtmysql_t* pthis, str_t* pword );
	+
	+#else /* def HAVE_MYSQL */
	+
	+dbh_t* dbmysql_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar d…
	+
	+#endif /* def HAVE_MYSQL */
	+
	+#endif /* ndef _DBMYSQL_H */
	diff --git a/dbtext.c b/dbtext.c
	@@ -0,0 +1,591 @@
	+/* $Id: dbtext.c,v 1.12 2002/10/19 09:59:35 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * dbtext.c: flatfile database handler
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "lex.h"
	+#include "vec.h"
	+
	+#include "dbh.h"
	+#include "dbtext.h"
	+
	+static void dbtext_table_setsize( dbttext_t* pthis, uint nsize )
	+{
	+ if( nsize > pthis->nalloc )
	+ {
	+ uint nnewalloc;
	+ rec_t* pnewitems;
	+ uint n;
	+
	+ nnewalloc = pthis->nalloc * 2;
	+ if( nnewalloc < nsize ) nnewalloc = nsize;
	+ pnewitems = (rec_t)realloc( pthis->pitems, nnewallocsizeof(rec_t) );
	+ if( pnewitems == NULL )
	+ {
	+ exit( 2 );
	+ }
	+ for( n = pthis->nitems; n < nsize; n++ )
	+ {
	+ str_create( &pnewitems[n].w );
	+ pnewitems[n].n = 0;
	+ }
	+ pthis->pitems = pnewitems;
	+ pthis->nalloc = nnewalloc;
	+ }
	+}
	+
	+dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbp…
	+{
	+ dbhtext_t* pthis;
	+
	+ uint dirlen;
	+ cpchar phome;
	+ struct stat st;
	+
	+ pthis = (dbhtext_t*)malloc( sizeof(dbhtext_t) );
	+ if( pthis == NULL )
	+ {
	+ goto bail;
	+ }
	+ pthis->close = dbtext_db_close;
	+ pthis->opentable = dbtext_db_opentable;
	+ if( dbname != NULL && *dbname != '\0' )
	+ {
	+ dirlen = strlen( dbname );
	+ pthis->dir = strdup( dbname );
	+ if( pthis->dir[dirlen-1] == '/' )
	+ {
	+ pthis->dir[dirlen-1] = '\0';
	+ }
	+ }
	+ else
	+ {
	+ phome = getenv( "HOME" );
	+ if( phome == NULL \|\| *phome == '\0' )
	+ {
	+ phome = ".";
	+ }
	+ pthis->dir = (char*)malloc( strlen(phome)+5+1 );
	+ if( pthis->dir == NULL )
	+ {
	+ goto bail;
	+ }
	+ sprintf( pthis->dir, "%s/.bmf", phome );
	+ }
	+
	+ /* ensure config directory exists */
	+ if( stat( pthis->dir, &st ) != 0 )
	+ {
	+ if( errno == ENOENT )
	+ {
	+ if( mkdir( pthis->dir, S_IRUSR\|S_IWUSR\|S_IXUSR ) != 0 )
	+ {
	+ goto bail;
	+ }
	+ }
	+ else
	+ {
	+ goto bail;
	+ }
	+ }
	+ else
	+ {
	+ if( !S_ISDIR( st.st_mode ) )
	+ {
	+ goto bail;
	+ }
	+ }
	+
	+ return (dbh_t*)pthis;
	+
	+bail:
	+ return NULL;
	+}
	+
	+bool_t dbtext_db_close( dbhtext_t* pthis )
	+{
	+ free( pthis->dir );
	+ pthis->dir = NULL;
	+ return true;
	+}
	+
	+dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly )
	+{
	+ dbttext_t* ptable = NULL;
	+
	+#ifndef NOLOCK
	+ struct flock lock;
	+#endif /* ndef NOLOCK */
	+ char szpath[PATH_MAX];
	+ int flags;
	+ struct stat st;
	+
	+ char* pbegin;
	+ char* pend;
	+ rec_t r;
	+ uint pos;
	+
	+ if( pthis->dir == NULL )
	+ {
	+ goto bail;
	+ }
	+
	+ ptable = (dbttext_t*)malloc( sizeof(dbttext_t) );
	+ if( ptable == NULL )
	+ {
	+ perror( "malloc()" );
	+ goto bail;
	+ }
	+ ptable->close = dbtext_table_close;
	+ ptable->mergeclose = dbtext_table_mergeclose;
	+ ptable->unmergeclose = dbtext_table_unmergeclose;
	+ ptable->import = dbtext_table_import;
	+ ptable->export = dbtext_table_export;
	+ ptable->getmsgcount = dbtext_table_getmsgcount;
	+ ptable->getcount = dbtext_table_getcount;
	+ ptable->fd = -1;
	+ ptable->pbuf = NULL;
	+ ptable->nmsgs = 0;
	+ ptable->nalloc = 0;
	+ ptable->nitems = 0;
	+ ptable->pitems = NULL;
	+
	+ sprintf( szpath, "%s/%s.txt", pthis->dir, table );
	+ flags = (rdonly ? O_RDONLY\|O_CREAT : O_RDWR\|O_CREAT);
	+ ptable->fd = open( szpath, flags, 0644 );
	+ if( ptable->fd == -1 )
	+ {
	+ perror( "open()" );
	+ goto bail;
	+ }
	+
	+#ifndef NOLOCK
	+ memset( &lock, 0, sizeof(lock) );
	+ lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
	+ lock.l_start = 0;
	+ lock.l_whence = SEEK_SET;
	+ lock.l_len = 0;
	+ fcntl( ptable->fd, F_SETLKW, &lock );
	+#endif /* ndef NOLOCK */
	+
	+ if( fstat( ptable->fd, &st ) != 0 )
	+ {
	+ perror( "fstat()" );
	+ goto bail_uc;
	+ }
	+
	+ if( st.st_size == 0 )
	+ {
	+ return (dbt_t*)ptable;
	+ }
	+
	+ ptable->pbuf = (char*)malloc( st.st_size );
	+ if( ptable->pbuf == NULL )
	+ {
	+ perror( "malloc()" );
	+ goto bail_uc;
	+ }
	+
	+ if( read( ptable->fd, ptable->pbuf, st.st_size ) != st.st_size )
	+ {
	+ perror( "read()" );
	+ goto bail_fuc;
	+ }
	+
	+ /* XXX: bogofilter compatibility */
	+ if( sscanf( ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs ) != 1 )
	+ {
	+ goto bail_fuc;
	+ }
	+ pbegin = ptable->pbuf;
	+ while( *pbegin != '\n' ) pbegin++;
	+ pbegin++;
	+
	+ pos = 0;
	+ while( pbegin < ptable->pbuf + st.st_size )
	+ {
	+ pend = pbegin;
	+ r.w.p = pbegin;
	+ r.w.len = 0;
	+ r.n = 0;
	+
	+ while( *pend != '\n' )
	+ {
	+ if( pend >= ptable->pbuf + st.st_size )
	+ {
	+ goto bail_fuc;
	+ }
	+ pend = tolower(pend);
	+ if( *pend == ' ' )
	+ {
	+ r.w.len = (pend-pbegin);
	+ r.n = strtol( pend+1, NULL, 10 );
	+ }
	+ pend++;
	+ }
	+ if( pend > pbegin && pbegin != '#' && pbegin != ';' )
	+ {
	+ if( r.w.len == 0 \|\| r.w.len > MAXWORDLEN )
	+ {
	+ fprintf( stderr, "dbh_loadfile: bad file format\n" );
	+ goto bail_fuc;
	+ }
	+ dbtext_table_setsize( ptable, pos+1 );
	+ ptable->pitems[pos++] = r;
	+ ptable->nitems = pos;
	+ }
	+ pbegin = pend+1;
	+ }
	+
	+ if( rdonly )
	+ {
	+#ifndef NOLOCK
	+ lock.l_type = F_UNLCK;
	+ fcntl( ptable->fd, F_SETLKW, &lock );
	+#endif /* ndef NOLOCK */
	+ close( ptable->fd );
	+ ptable->fd = -1;
	+ }
	+
	+ return (dbt_t*)ptable;
	+
	+bail_fuc:
	+ free( ptable->pbuf );
	+
	+bail_uc:
	+#ifndef NOLOCK
	+ lock.l_type = F_UNLCK;
	+ fcntl( ptable->fd, F_SETLKW, &lock );
	+#endif /* ndef NOLOCK */
	+
	+ close( ptable->fd );
	+ ptable->fd = -1;
	+
	+bail:
	+ free( ptable );
	+ return NULL;
	+}
	+
	+bool_t dbtext_table_close( dbttext_t* pthis )
	+{
	+ struct flock lockall;
	+
	+ free( pthis->pbuf );
	+ pthis->pbuf = NULL;
	+ free( pthis->pitems );
	+ pthis->pitems = NULL;
	+
	+ if( pthis->fd != -1 )
	+ {
	+#ifndef NOLOCK
	+ memset( &lockall, 0, sizeof(lockall) );
	+ lockall.l_type = F_UNLCK;
	+ lockall.l_start = 0;
	+ lockall.l_whence = SEEK_SET;
	+ lockall.l_len = 0;
	+ fcntl( pthis->fd, F_SETLKW, &lockall );
	+#endif /* ndef NOLOCK */
	+ close( pthis->fd );
	+ pthis->fd = -1;
	+ }
	+
	+ return true;
	+}
	+
	+bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg )
	+{
	+ /* note that we require both vectors to be sorted */
	+
	+ uint pos;
	+ rec_t* prec;
	+ veciter_t msgiter;
	+ str_t* pmsgstr;
	+ uint count;
	+ char iobuf[IOBUFSIZE];
	+ char* p;
	+
	+ if( pthis->fd == -1 )
	+ {
	+ return false;
	+ }
	+ ftruncate( pthis->fd, 0 );
	+ lseek( pthis->fd, 0, SEEK_SET );
	+
	+ pthis->nmsgs++;
	+
	+ p = iobuf;
	+ p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs );
	+
	+ vec_first( pmsg, &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+
	+ pos = 0;
	+ while( pos < pthis->nitems \|\| pmsgstr != NULL )
	+ {
	+ int cmp = 0;
	+ prec = &pthis->pitems[pos];
	+ if( pmsgstr != NULL && pos < pthis->nitems )
	+ {
	+ cmp = str_casecmp( &prec->w, pmsgstr );
	+ }
	+ else
	+ {
	+ /* we exhausted one list or the other (but not both) */
	+ cmp = (pos < pthis->nitems) ? -1 : 1;
	+ }
	+ if( cmp < 0 )
	+ {
	+ /* write existing str */
	+ assert( prec->w.p != NULL && prec->w.len > 0 );
	+ assert( prec->w.len <= MAXWORDLEN );
	+ count = prec->n;
	+ strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len;
	+ *p++ = ' ';
	+ p += sprintf( p, "%u\n", count );
	+
	+ pos++;
	+ }
	+ else if( cmp == 0 )
	+ {
	+ /* same str, merge and write sum */
	+ assert( prec->w.p != NULL && prec->w.len > 0 );
	+ assert( pmsgstr->p != NULL && pmsgstr->len > 0 );
	+ assert( prec->w.len <= MAXWORDLEN );
	+ assert( pmsgstr->len <= MAXWORDLEN );
	+ count = db_getnewcount( &msgiter );
	+ count += prec->n;
	+ strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len;
	+ *p++ = ' ';
	+ p += sprintf( p, "%u\n", count );
	+
	+ pos++;
	+ veciter_next( &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+ }
	+ else /* cmp > 0 */
	+ {
	+ /* write new str */
	+ assert( pmsgstr->p != NULL && pmsgstr->len > 0 );
	+ assert( pmsgstr->len <= MAXWORDLEN );
	+ count = db_getnewcount( &msgiter );
	+ strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len;
	+ *p++ = ' ';
	+ p += sprintf( p, "%u\n", count );
	+
	+ veciter_next( &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+ }
	+
	+ if( p+TEXTDB_MAXLINELEN > (iobuf+1) )
	+ {
	+ write( pthis->fd, iobuf, p-iobuf );
	+ p = iobuf;
	+ }
	+ }
	+ if( p != iobuf )
	+ {
	+ write( pthis->fd, iobuf, p-iobuf );
	+ }
	+
	+ veciter_destroy( &msgiter );
	+ return dbtext_table_close( pthis );
	+}
	+
	+bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg )
	+{
	+ /* note that we require both vectors to be sorted */
	+
	+ uint pos;
	+ rec_t* prec;
	+ veciter_t msgiter;
	+ str_t* pmsgstr;
	+ uint count;
	+ char iobuf[IOBUFSIZE];
	+ char* p;
	+
	+ if( pthis->fd == -1 )
	+ {
	+ return false;
	+ }
	+ ftruncate( pthis->fd, 0 );
	+ lseek( pthis->fd, 0, SEEK_SET );
	+
	+ pthis->nmsgs--;
	+
	+ p = iobuf;
	+ p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs );
	+
	+ vec_first( pmsg, &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+
	+ pos = 0;
	+ while( pos < pthis->nitems \|\| pmsgstr != NULL )
	+ {
	+ int cmp = 0;
	+ prec = &pthis->pitems[pos];
	+ if( pmsgstr != NULL && pos < pthis->nitems )
	+ {
	+ cmp = str_casecmp( &prec->w, pmsgstr );
	+ }
	+ else
	+ {
	+ /* we exhausted one list or the other (but not both) */
	+ cmp = (pos < pthis->nitems) ? -1 : 1;
	+ }
	+ if( cmp < 0 )
	+ {
	+ /* write existing str */
	+ assert( prec->w.p != NULL && prec->w.len > 0 );
	+ assert( prec->w.len <= MAXWORDLEN );
	+ count = prec->n;
	+ strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len;
	+ *p++ = ' ';
	+ p += sprintf( p, "%u\n", count );
	+
	+ pos++;
	+ }
	+ else if( cmp == 0 )
	+ {
	+ /* same str, merge and write difference */
	+ assert( prec->w.p != NULL && prec->w.len > 0 );
	+ assert( pmsgstr->p != NULL && pmsgstr->len > 0 );
	+ assert( prec->w.len <= MAXWORDLEN );
	+ assert( pmsgstr->len <= MAXWORDLEN );
	+ count = db_getnewcount( &msgiter );
	+ count = (prec->n > count) ? (prec->n - count) : 0;
	+ strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len;
	+ *p++ = ' ';
	+ p += sprintf( p, "%u\n", count );
	+
	+ pos++;
	+ veciter_next( &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+ }
	+ else /* cmp > 0 */
	+ {
	+ /* this should not happen, so write with count=0 */
	+ assert( pmsgstr->p != NULL && pmsgstr->len > 0 );
	+ assert( pmsgstr->len <= MAXWORDLEN );
	+ db_getnewcount( &msgiter );
	+ count = 0;
	+ strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len;
	+ *p++ = ' ';
	+ p += sprintf( p, "%u\n", count );
	+
	+ veciter_next( &msgiter );
	+ pmsgstr = veciter_get( &msgiter );
	+ }
	+
	+ if( p+TEXTDB_MAXLINELEN > (iobuf+1) )
	+ {
	+ write( pthis->fd, iobuf, p-iobuf );
	+ p = iobuf;
	+ }
	+ }
	+ if( p != iobuf )
	+ {
	+ write( pthis->fd, iobuf, p-iobuf );
	+ }
	+
	+ veciter_destroy( &msgiter );
	+ return dbtext_table_close( pthis );
	+}
	+
	+bool_t dbtext_table_import( dbttext_t* pthis, cpchar filename )
	+{
	+ return false;
	+}
	+
	+bool_t dbtext_table_export( dbttext_t* pthis, cpchar filename )
	+{
	+ return false;
	+}
	+
	+uint dbtext_table_getmsgcount( dbttext_t* pthis )
	+{
	+ return pthis->nmsgs;
	+}
	+
	+uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword )
	+{
	+ int lo, hi, mid;
	+
	+ if( pthis->nitems == 0 )
	+ {
	+ return 0;
	+ }
	+
	+ hi = pthis->nitems - 1;
	+ lo = -1;
	+ while( hi-lo > 1 )
	+ {
	+ mid = (hi+lo)/2;
	+ if( str_casecmp( pword, &pthis->pitems[mid].w ) <= 0 )
	+ hi = mid;
	+ else
	+ lo = mid;
	+ }
	+ assert( hi >= 0 && hi < pthis->nitems );
	+
	+ if( str_casecmp( pword, &pthis->pitems[hi].w ) != 0 )
	+ {
	+ return 0;
	+ }
	+
	+ return pthis->pitems[hi].n;
	+}
	+
	+#ifdef UNIT_TEST
	+int main( int argc, char** argv )
	+{
	+ dbh_t* pdb;
	+ veciter_t iter;
	+ str_t* pstr;
	+ uint n;
	+
	+ if( argc != 2 )
	+ {
	+ fprintf( stderr, "usage: %s <file>\n", argv[0] );
	+ return 1;
	+ }
	+
	+ for( n = 0; n < 100; n++ )
	+ {
	+ pdb = dbh_open( "testlist", true );
	+
	+ vec_first( &db, &iter );
	+ while( (pstr = veciter_get( &iter )) != NULL )
	+ {
	+ char buf[MAXWORDLEN+32];
	+ char* p;
	+ if( pstr->len > 200 )
	+ {
	+ fprintf( stderr, "str too long: %u chars\n", pstr->len );
	+ break;
	+ }
	+ p = buf;
	+ strcpy( buf, "str: " );
	+ p += 6;
	+ memcpy( p, pstr->p, pstr->len );
	+ p += pstr->len;
	+ sprintf( p, " %u", pstr->count );
	+ puts( buf );
	+
	+ veciter_next( &iter );
	+ }
	+
	+ dbh_close( &db );
	+ }
	+
	+ return 0;
	+}
	+#endif /* def UNIT_TEST */
	diff --git a/dbtext.h b/dbtext.h
	@@ -0,0 +1,53 @@
	+/* $Id: dbtext.h,v 1.3 2002/10/02 04:45:40 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _DBTEXT_H
	+#define _DBTEXT_H
	+
	+typedef struct _dbttext dbttext_t;
	+struct _dbttext
	+{
	+ bool_t (close)(dbttext_t);
	+ bool_t (mergeclose)(dbttext_t,vec_t*);
	+ bool_t (unmergeclose)(dbttext_t,vec_t*);
	+ bool_t (import)(dbttext_t,cpchar);
	+ bool_t (export)(dbttext_t,cpchar);
	+ uint (getmsgcount)(dbttext_t);
	+ uint (getcount)(dbttext_t,str_t*);
	+
	+ int fd; /* file descriptor, if currently open */
	+ char* pbuf; /* data buffer, if currently open */
	+ uint nmsgs; /* number of messages represented in list */
	+ uint nalloc; /* items alloced in pitems */
	+ uint nitems; /* items available */
	+ rec_t* pitems; /* growing vector of items */
	+};
	+
	+typedef struct _dbhtext dbhtext_t;
	+struct _dbhtext
	+{
	+ bool_t (close)(dbhtext_t);
	+ dbt_t* (opentable)(dbhtext_t,cpchar,bool_t);
	+
	+ char* dir;
	+};
	+
	+dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar db…
	+bool_t dbtext_db_close( dbhtext_t* pthis );
	+dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly );
	+
	+bool_t dbtext_table_close( dbttext_t* pthis );
	+bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg );
	+bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg );
	+bool_t dbtext_table_import( dbttext_t* pthis, cpchar filename );
	+bool_t dbtext_table_export( dbttext_t* pthis, cpchar filename );
	+uint dbtext_table_getmsgcount( dbttext_t* pthis );
	+uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword );
	+
	+#endif /* ndef _DBTEXT_H */
	diff --git a/filt.c b/filt.c
	@@ -0,0 +1,175 @@
	+/* $Id: filt.c,v 1.1 2002/10/20 18:19:17 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * filt.c: The Bayes filter implementation.
	+ * See http://www.paulgraham.com/spam.html for discussion.
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "lex.h"
	+#include "vec.h"
	+#include "dbh.h"
	+#include "filt.h"
	+
	+#define DEVIATION(n) fabs((n)-0.5f)
	+
	+/* Dump the contents of a statistics structure */
	+void statdump( stats_t* pstat, int fd )
	+{
	+ char iobuf[IOBUFSIZE];
	+ char* p;
	+ discrim_t* pp;
	+
	+ p = iobuf;
	+ p += sprintf( iobuf, "# Spamicity: %f\n", pstat->spamicity );
	+
	+ for (pp = pstat->extrema; pp < pstat->extrema + pstat->keepers; pp++)
	+ {
	+ if (pp->key.len)
	+ {
	+ strcpy( p, "# '" ); p += 3;
	+ strncpylwr( p, pp->key.p, pp->key.len ); p += pp->key.len;
	+ p += snprintf( p, 28, "' -> %f\n", pp->prob );
	+ if( p+MAXWORDLEN+32 > (iobuf+1) )
	+ {
	+ write( fd, iobuf, p-iobuf );
	+ p = iobuf;
	+ }
	+ }
	+ }
	+ if( p != iobuf )
	+ {
	+ write( fd, iobuf, p-iobuf );
	+ }
	+}
	+
	+void bayesfilt( dbt_t* pglist, dbt_t* pblist, vec_t* pmlist, stats_t* pstats )
	+{
	+ veciter_t iter;
	+ str_t* pword;
	+
	+ double prob, product, invproduct, dev;
	+ double slotdev, hitdev;
	+
	+#ifdef NON_EQUIPROBABLE
	+ /* There is an argument that we should (go?) by number of words here. */
	+ double msg_prob = ((double)pblist->nitems / (double)pglist->nitems);
	+#endif
	+
	+ discrim_t* pp;
	+ discrim_t* hit;
	+
	+ for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++)
	+ {
	+ pp->key.p = NULL;
	+ pp->key.len = 0;
	+ pp->prob = 0.5f;
	+ }
	+
	+ vec_first( pmlist, &iter );
	+ while( (pword = veciter_get( &iter )) != NULL )
	+ {
	+ double goodness = pglist->getcount( pglist, pword );
	+ double spamness = pblist->getcount( pblist, pword );
	+ uint goodtotal = pglist->getmsgcount( pglist );
	+ uint spamtotal = pblist->getmsgcount( pblist );
	+
	+ if( goodness + spamness < MINIMUM_FREQ )
	+ {
	+#ifdef NON_EQUIPROBABLE
	+ /*
	+ * In the absence of evidence, the probability that a new word will
	+ * be spam is the historical ratio of spam words to nonspam words.
	+ */
	+ prob = msg_prob;
	+#else
	+ prob = UNKNOWN_WORD;
	+#endif
	+ }
	+ else
	+ {
	+ double goodprob = goodtotal ? min( 1.0, (goodness / goodtotal) ) :…
	+ double spamprob = spamtotal ? min( 1.0, (spamness / spamtotal) ) :…
	+ assert( goodtotal > 0 \|\| spamtotal > 0 );
	+
	+#ifdef NON_EQUIPROBABLE
	+ prob = (spamprob * msg_prob) / ((goodprob * (1 - msg_prob)) + (spa…
	+#else
	+ prob = spamprob / (goodprob + spamprob);
	+#endif
	+
	+ prob = minmax( prob, 0.01, 0.99 );
	+ }
	+
	+ /* update the list of tokens with maximum deviation */
	+ dev = DEVIATION(prob);
	+ hit = NULL;
	+ hitdev = 0;
	+ for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++)
	+ {
	+ /* don't allow duplicate tokens in the stats.extrema */
	+ if( pp->key.len > 0 && str_casecmp( pword, &pp->key ) == 0 )
	+ {
	+ hit = NULL;
	+ break;
	+ }
	+
	+ slotdev = DEVIATION(pp->prob);
	+ if (dev>slotdev && dev>hitdev)
	+ {
	+ hit = pp;
	+ hitdev = slotdev;
	+ }
	+ }
	+ if (hit)
	+ {
	+ hit->prob = prob;
	+ hit->key = *pword;
	+ }
	+
	+ veciter_next( &iter );
	+ }
	+ veciter_destroy( &iter );
	+
	+ /*
	+ * Bayes' theorem.
	+ * For discussion, see <http://www.mathpages.com/home/kmath267.htm>.
	+ */
	+ product = invproduct = 1.0f;
	+ for (pp = pstats->extrema; pp < pstats->extrema+pstats->keepers; pp++)
	+ {
	+ if( pp->prob == 0 )
	+ {
	+ break;
	+ }
	+ else
	+ {
	+ product *= pp->prob;
	+ invproduct *= (1 - pp->prob);
	+ }
	+ }
	+ pstats->spamicity = product / (product + invproduct);
	+}
	+
	+bool_t bvec_loadmsg( vec_t* pthis, lex_t* plex, tok_t* ptok )
	+{
	+ str_t w;
	+
	+ lex_nexttoken( plex, ptok );
	+ while( ptok->tt != eof && ptok->tt != from )
	+ {
	+ w.p = ptok->p;
	+ w.len = ptok->len;
	+ vec_addtail( pthis, &w );
	+ lex_nexttoken( plex, ptok );
	+ }
	+
	+ return true;
	+}
	diff --git a/filt.h b/filt.h
	@@ -0,0 +1,31 @@
	+/* $Id: filt.h,v 1.1 2002/10/20 18:19:17 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _FILT_H
	+#define _FILT_H
	+
	+typedef struct
	+{
	+ str_t key;
	+ double prob;
	+} discrim_t;
	+
	+typedef struct
	+{
	+ double spamicity;
	+ uint keepers;
	+ discrim_t* extrema;
	+} stats_t;
	+
	+void statdump( stats_t* pstat, int fd );
	+void bayesfilt( dbt_t* pglist, dbt_t* pblist, vec_t* pmlist, stats_t* pstats );
	+
	+bool_t bvec_loadmsg( vec_t* pthis, lex_t* plex, tok_t* ptok );
	+
	+#endif /* ndef _FILT_H */
	diff --git a/lex.c b/lex.c
	@@ -0,0 +1,787 @@
	+/* $Id: lex.c,v 1.18 2002/10/20 20:29:15 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * lex.c: generate token stream for bmf.
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "lex.h"
	+
	+static cpchar g_htmltags[] =
	+{
	+ "abbr",
	+ "above",
	+ "accesskey",
	+ "acronym",
	+ "align",
	+ "alink",
	+ "all",
	+ "alt",
	+ "applet",
	+ "archive",
	+ "axis",
	+ "basefont",
	+ "baseline",
	+ "below",
	+ "bgcolor",
	+ "big",
	+ "body",
	+ "border",
	+ "bottom",
	+ "box",
	+ "button",
	+ "cellpadding",
	+ "cellspacing",
	+ "center",
	+ "char",
	+ "charoff",
	+ "charset",
	+ "circle",
	+ "cite",
	+ "class",
	+ "classid",
	+ "clear",
	+ "codebase",
	+ "codetype",
	+ "color",
	+ "cols",
	+ "colspan",
	+ "compact",
	+ "content",
	+ "coords",
	+ "data",
	+ "datetime",
	+ "declare",
	+ "default",
	+ "defer",
	+ "dfn",
	+ "dir",
	+ "disabled",
	+ "face",
	+ "font",
	+ "frameborder",
	+ "groups",
	+ "head",
	+ "headers",
	+ "height",
	+ "href",
	+ "hreflang",
	+ "hsides",
	+ "hspace",
	+ "http-equiv",
	+ "iframe",
	+ "img",
	+ "input",
	+ "ismap",
	+ "justify",
	+ "kbd",
	+ "label",
	+ "lang",
	+ "language",
	+ "left",
	+ "lhs",
	+ "link",
	+ "longdesc",
	+ "map",
	+ "marginheight",
	+ "marginwidth",
	+ "media",
	+ "meta",
	+ "middle",
	+ "multiple",
	+ "name",
	+ "nohref",
	+ "none",
	+ "noresize",
	+ "noshade",
	+ "nowrap",
	+ "object",
	+ "onblur",
	+ "onchange",
	+ "onclick",
	+ "ondblclick",
	+ "onfocus",
	+ "onkeydown",
	+ "onkeypress",
	+ "onkeyup",
	+ "onload",
	+ "onmousedown",
	+ "onmousemove",
	+ "onmouseout",
	+ "onmouseover",
	+ "onmouseup",
	+ "onselect",
	+ "onunload",
	+ "param",
	+ "poly",
	+ "profile",
	+ "prompt",
	+ "readonly",
	+ "rect",
	+ "rel",
	+ "rev",
	+ "rhs",
	+ "right",
	+ "rows",
	+ "rowspan",
	+ "rules",
	+ "samp",
	+ "scheme",
	+ "scope",
	+ "script",
	+ "scrolling",
	+ "select",
	+ "selected",
	+ "shape",
	+ "size",
	+ "small",
	+ "span",
	+ "src",
	+ "standby",
	+ "strike",
	+ "strong",
	+ "style",
	+ "sub",
	+ "summary",
	+ "sup",
	+ "tabindex",
	+ "table",
	+ "target",
	+ "textarea",
	+ "title",
	+ "top",
	+ "type",
	+ "usemap",
	+ "valign",
	+ "value",
	+ "valuetype",
	+ "var",
	+ "vlink",
	+ "void",
	+ "vsides",
	+ "vspace",
	+ "width"
	+};
	+static const uint g_nhtmltags = sizeof(g_htmltags)/sizeof(cpchar);
	+
	+static cpchar g_ignoredheaders[] =
	+{
	+ "Date:",
	+ "Delivery-date:",
	+ "Message-ID:",
	+ "X-Sorted:",
	+ "X-Spam-"
	+};
	+static const uint g_nignoredheaders = sizeof(g_ignoredheaders)/sizeof(cpchar);
	+
	+static inline bool_t is_whitespace( int c )
	+{
	+ return ( c == ' ' \|\| c == '\t' \|\| c == '\r' );
	+}
	+
	+static inline bool_t is_base64char(c)
	+{
	+ return ( isalnum(c) \|\| (c == '/' \|\| c == '+') );
	+}
	+
	+static inline bool_t is_wordmidchar(c)
	+{
	+ return ( isalnum(c) \|\| c == '$' \|\| c == '\'' \|\| c == '.' \|\| c == '-' );
	+}
	+
	+static inline bool_t is_wordendchar(c)
	+{
	+ return ( isalnum(c) \|\| c == '$' );
	+}
	+
	+static inline bool_t is_htmltag( cpchar p, uint len, uint* ptoklen )
	+{
	+ int lo, hi, mid, minlen, cmp;
	+
	+ *ptoklen = 0;
	+
	+ hi = g_nhtmltags-1;
	+ lo = -1;
	+ while( hi-lo > 1 )
	+ {
	+ mid = (hi+lo)/2;
	+ minlen = min( strlen(g_htmltags[mid]), len );
	+ cmp = strncmp( g_htmltags[mid], p, minlen );
	+ if( cmp > 0 \|\| (cmp == 0 && minlen < len && !islower(p[minlen])) )
	+ hi = mid;
	+ else
	+ lo = mid;
	+ }
	+ minlen = min( strlen(g_htmltags[hi]), len );
	+ if( len == minlen \|\| strncmp(g_htmltags[hi], p, minlen) != 0 )
	+ {
	+ return false;
	+ }
	+
	+ /* check if is_word() will have a longer match */
	+ if( is_wordendchar(p[minlen]) )
	+ {
	+ return false;
	+ }
	+ if( is_wordmidchar(p[minlen]) && is_wordendchar(p[minlen+1]) )
	+ {
	+ return false;
	+ }
	+
	+ *ptoklen = strlen(g_htmltags[hi]);
	+
	+ return true;
	+}
	+
	+static inline bool_t is_htmlcomment( cpchar p, uint len, uint* ptoklen )
	+{
	+ *ptoklen = 0;
	+
	+ if( len >=4 && memcmp( p, "<!--", 4 ) == 0 )
	+ {
	+ *ptoklen = 4;
	+ return true;
	+ }
	+ if( len >= 3 && memcmp( p, "-->", 3 ) == 0 )
	+ {
	+ *ptoklen = 3;
	+ return true;
	+ }
	+
	+ return false;
	+}
	+
	+static inline bool_t is_base64( cpchar p, uint len, uint* ptoklen )
	+{
	+ *ptoklen = 0;
	+ while( len > 0 )
	+ {
	+ if( p != '\n' && p != '\r' && !is_base64char(*p) )
	+ {
	+ return false;
	+ }
	+ p++;
	+ len--;
	+ (*ptoklen)++;
	+ }
	+ return true;
	+}
	+
	+static inline bool_t is_mimeboundary( cpchar p, uint len, uint* ptoklen )
	+{
	+ *ptoklen = 0;
	+
	+ if( len < 3 \|\| p[0] != '-' \|\| p[1] != '-' )
	+ {
	+ return false;
	+ }
	+ p += 2;
	+ len -= 2;
	+ *ptoklen += 2;
	+ while( len > 0 )
	+ {
	+ if( is_whitespace(*p) )
	+ {
	+ return false;
	+ }
	+ if( p == '\n' \|\| p == '\r' )
	+ {
	+ break;
	+ }
	+ p++;
	+ len--;
	+ (*ptoklen)++;
	+ }
	+ return true;
	+}
	+
	+static inline bool_t is_ipaddr( cpchar p, uint len, uint* ptoklen )
	+{
	+ uint noctets, ndigits;
	+
	+ *ptoklen = 0;
	+
	+ noctets = 0;
	+ while( len > 0 && noctets < 4 )
	+ {
	+ ndigits = 0;
	+ while( len > 0 && isdigit(*p) )
	+ {
	+ ndigits++;
	+ p++;
	+ len--;
	+ (*ptoklen)++;
	+ }
	+ if( ndigits == 0 \|\| ndigits > 3 )
	+ {
	+ return false;
	+ }
	+ noctets++;
	+ if( noctets < 4 )
	+ {
	+ if( *p != '.' )
	+ {
	+ return false;
	+ }
	+ p++;
	+ len--;
	+ (*ptoklen)++;
	+ }
	+ }
	+ if( noctets < 4 )
	+ {
	+ return false;
	+ }
	+ return true;
	+}
	+
	+static inline bool_t is_word( cpchar p, uint len, uint* ptoklen )
	+{
	+ if( len < 3 )
	+ {
	+ return false;
	+ }
	+ if( !(isalpha(p) \|\| p == '$') )
	+ {
	+ return false;
	+ }
	+ *ptoklen = 1;
	+ p++;
	+ len--;
	+ while( len > 0 )
	+ {
	+ if( !is_wordmidchar(*p) )
	+ {
	+ break;
	+ }
	+ (*ptoklen)++;
	+ p++;
	+ len--;
	+ }
	+ while( ptoklen >= 3 && !is_wordendchar((p-1)) )
	+ {
	+ (*ptoklen)--;
	+ p--;
	+ len++;
	+ }
	+ if( *ptoklen < 3 )
	+ {
	+ return false;
	+ }
	+
	+ return true;
	+}
	+
	+static inline bool_t is_ignoredheader( cpchar p, uint len, uint* ptoklen )
	+{
	+ int lo, hi, mid, minlen, cmp;
	+
	+ hi = g_nignoredheaders-1;
	+ lo = -1;
	+ while( hi-lo > 1 )
	+ {
	+ mid = (hi+lo)/2;
	+ minlen = min( strlen(g_ignoredheaders[mid]), len );
	+ cmp = strncasecmp( g_ignoredheaders[mid], p, minlen );
	+ if( cmp >= 0 )
	+ hi = mid;
	+ else
	+ lo = mid;
	+ }
	+ minlen = min( strlen(g_ignoredheaders[hi]), len );
	+ if( len == minlen \|\| strncasecmp(g_ignoredheaders[hi], p, minlen) != 0 )
	+ {
	+ return false;
	+ }
	+ *ptoklen = len;
	+ return true;
	+}
	+
	+static inline bool_t is_mailerid( cpchar p, uint len, uint* ptoklen )
	+{
	+ if( len < 4 \|\| strncmp( p, "\tid ", 4 ) != 0 )
	+ {
	+ return false;
	+ }
	+ *ptoklen = len;
	+ return true;
	+}
	+
	+static inline bool_t is_spamtext( cpchar p, uint len, uint* ptoklen )
	+{
	+ if( len < 5 \|\| strncmp( p, "SPAM:", 5 ) != 0 )
	+ {
	+ return false;
	+ }
	+ *ptoklen = len;
	+ return true;
	+}
	+
	+static inline bool_t is_smtpid( cpchar p, uint len, uint* ptoklen )
	+{
	+ if( len < 8 \|\| strncmp( p, "SMTP id ", 8 ) != 0 )
	+ {
	+ return false;
	+ }
	+ *ptoklen = len;
	+ return true;
	+}
	+
	+static inline bool_t is_boundaryequal( cpchar p, uint len, uint* ptoklen )
	+{
	+ if( len < 9 \|\| strncmp( p, "boundary=", 9 ) != 0 )
	+ {
	+ return false;
	+ }
	+ *ptoklen = len;
	+ return true;
	+}
	+
	+static inline bool_t is_nameequal( cpchar p, uint len, uint* ptoklen )
	+{
	+ if( len < 6 \|\| strncmp( p, "name=\"", 6 ) != 0 )
	+ {
	+ return false;
	+ }
	+ *ptoklen = 6;
	+ return true;
	+}
	+
	+static inline bool_t is_filenameequal( cpchar p, uint len, uint* ptoklen )
	+{
	+ if( len < 10 \|\| strncmp( p, "filename=\"", 10 ) != 0 )
	+ {
	+ return false;
	+ }
	+ *ptoklen = 10;
	+ return true;
	+}
	+
	+static inline bool_t is_from( cpchar p, uint len, uint* ptoklen )
	+{
	+ if( len < 5 \|\| strncmp( p, "From ", 5 ) != 0 )
	+ {
	+ return false;
	+ }
	+ *ptoklen = 5;
	+ return true;
	+}
	+
	+/*****************************************************************************/
	+
	+void lex_create( lex_t* pthis, mbox_t mboxtype )
	+{
	+ pthis->mboxtype = mboxtype;
	+ pthis->section = envelope;
	+ pthis->pos = 0;
	+ pthis->bom = 0;
	+ pthis->eom = 0;
	+ pthis->lineend = 0;
	+ pthis->buflen = 0;
	+ pthis->pbuf = NULL;
	+}
	+
	+void lex_destroy( lex_t* pthis )
	+{
	+ free( pthis->pbuf );
	+}
	+
	+bool_t lex_load( lex_t* pthis, int fd )
	+{
	+ uint nalloc;
	+ ssize_t nread;
	+
	+ nalloc = IOBUFSIZE;
	+ pthis->pbuf = (char*)malloc( IOBUFSIZE );
	+ if( pthis->pbuf == NULL )
	+ {
	+ return false;
	+ }
	+
	+ while( (nread = read( fd, pthis->pbuf + pthis->buflen, nalloc - pthis->buf…
	+ {
	+ pthis->buflen += nread;
	+ if( pthis->buflen == nalloc )
	+ {
	+ char* pnewbuf;
	+ nalloc += IOBUFSIZE;
	+ pnewbuf = (char*)realloc( pthis->pbuf, nalloc );
	+ if( pnewbuf == NULL )
	+ {
	+ free( pthis->pbuf );
	+ pthis->pbuf = NULL;
	+ return false;
	+ }
	+ pthis->pbuf = pnewbuf;
	+ }
	+ }
	+ if( nread < 0 )
	+ {
	+ free( pthis->pbuf );
	+ pthis->pbuf = NULL;
	+ return false;
	+ }
	+ if( pthis->mboxtype == detect )
	+ {
	+ if( pthis->buflen > 5 && memcmp( pthis->pbuf, "From ", 5 ) == 0 )
	+ {
	+ verbose( 1, "Input looks like an mbox\n" );
	+ pthis->mboxtype = mbox;
	+ }
	+ else
	+ {
	+ verbose( 1, "Input looks like a maildir\n" );
	+ pthis->mboxtype = maildir;
	+ }
	+ }
	+
	+ return true;
	+}
	+
	+static bool_t lex_nextline( lex_t* pthis )
	+{
	+ cpchar pbuf;
	+ uint len;
	+ uint toklen;
	+
	+again:
	+ /* XXX: use and update pthis->section */
	+ pthis->pos = pthis->lineend;
	+ if( pthis->lineend == pthis->buflen )
	+ {
	+ return false;
	+ }
	+
	+ pbuf = pthis->pbuf + pthis->pos;
	+ len = 0;
	+ while( pthis->pos + len < pthis->buflen && pbuf[len] != '\n' )
	+ {
	+ len++;
	+ }
	+ if( pthis->pos + len < pthis->buflen )
	+ {
	+ len++; /* bump past the LF */
	+ }
	+
	+ pthis->lineend = pthis->pos + len;
	+
	+ /* check beginning-of-line patterns */
	+ if( is_base64( pbuf, len, &toklen ) \|\|
	+ is_ignoredheader( pbuf, len, &toklen ) \|\|
	+ is_mailerid( pbuf, len, &toklen ) \|\|
	+ is_mimeboundary( pbuf, len, &toklen ) \|\|
	+ is_spamtext( pbuf, len, &toklen ) )
	+ {
	+ /* ignore line */
	+ pthis->pos += toklen;
	+ goto again;
	+ }
	+
	+ return true;
	+}
	+
	+void lex_nexttoken( lex_t* pthis, tok_t* ptok )
	+{
	+ cpchar pbuf;
	+ uint len;
	+ uint toklen;
	+
	+ assert( pthis->pbuf != NULL );
	+
	+ if( pthis->pos == pthis->eom )
	+ {
	+ pthis->bom = pthis->pos;
	+ }
	+
	+again:
	+ /* skip whitespace between tokens */
	+ while( pthis->pos != pthis->lineend && is_whitespace(pthis->pbuf[pthis->po…
	+ {
	+ pthis->pos++;
	+ }
	+
	+ pbuf = pthis->pbuf + pthis->pos;
	+ len = pthis->lineend - pthis->pos;
	+
	+ /* possibilities: end-of-line, html-comment, ipaddr, word, junk */
	+
	+ if( pthis->pos == pthis->lineend )
	+ {
	+ if( !lex_nextline( pthis ) )
	+ {
	+ pthis->eom = pthis->pos;
	+ ptok->tt = eof;
	+ return;
	+ }
	+
	+ pbuf = pthis->pbuf + pthis->pos;
	+ len = pthis->lineend - pthis->pos;
	+
	+ if( pthis->mboxtype == mbox )
	+ {
	+ if( is_from( pbuf, len, &toklen ) )
	+ {
	+ pthis->eom = pthis->pos;
	+ ptok->tt = from;
	+ ptok->p = pthis->pbuf + pthis->pos;
	+ ptok->len = toklen;
	+ pthis->pos += toklen;
	+ return;
	+ }
	+ }
	+
	+ goto again; /* skip lws */
	+ }
	+
	+ if( is_htmltag( pbuf, len, &toklen ) \|\|
	+ is_htmlcomment( pbuf, len, &toklen ) \|\|
	+ is_smtpid( pbuf, len, &toklen ) \|\|
	+ is_boundaryequal( pbuf, len, &toklen ) \|\|
	+ is_nameequal( pbuf, len, &toklen ) \|\|
	+ is_filenameequal( pbuf, len, &toklen ) )
	+ {
	+ /* ignore it */
	+ pthis->pos += toklen;
	+ goto again;
	+ }
	+
	+ if( is_ipaddr( pbuf, len, &toklen ) )
	+ {
	+ ptok->tt = word;
	+ ptok->p = pthis->pbuf + pthis->pos;
	+ ptok->len = toklen;
	+ pthis->pos += toklen;
	+ return;
	+ }
	+ if( is_word( pbuf, len, &toklen ) )
	+ {
	+ ptok->tt = word;
	+ ptok->p = pthis->pbuf + pthis->pos;
	+ ptok->len = toklen;
	+ pthis->pos += toklen;
	+ if( toklen > MAXWORDLEN )
	+ {
	+ goto again;
	+ }
	+ return;
	+ }
	+
	+ /* junk */
	+ pthis->pos++;
	+ goto again;
	+}
	+
	+/* SpamAssassin style passthru */
	+void lex_passthru( lex_t* pthis, bool_t is_spam, double hits )
	+{
	+ char szbuf[256];
	+ bool_t in_headers = true;
	+
	+ assert( pthis->bom < pthis->buflen && pthis->eom <= pthis->buflen );
	+ assert( pthis->bom <= pthis->eom );
	+
	+ pthis->pos = pthis->bom;
	+ if( is_spam )
	+ {
	+ sprintf( szbuf, "X-Spam-Status: Yes, hits=%f required=%f, tests=bmf\n"
	+ "X-Spam-Flag: YES\n",
	+ hits, SPAM_CUTOFF );
	+ }
	+ else
	+ {
	+ sprintf( szbuf, "X-Spam-Status: No, hits=%f required=%f\n",
	+ hits, SPAM_CUTOFF );
	+ }
	+
	+ /* existing headers */
	+ while( in_headers && pthis->pos < pthis->eom )
	+ {
	+ cpchar pbuf = pthis->pbuf + pthis->pos;
	+ uint len = 0;
	+ while( pthis->pos + len < pthis->buflen && pbuf[len] != '\n' )
	+ {
	+ len++;
	+ }
	+ if( pthis->pos + len < pthis->buflen )
	+ {
	+ len++; /* bump past the LF */
	+ }
	+
	+ /* check for end of headers */
	+ if( pbuf[0] == '\n' \|\| (pbuf[0] == '\r' && pbuf[1] == '\n') )
	+ {
	+ /* end of headers */
	+ break;
	+ }
	+
	+ /* write header, ignoring existing spam headers */
	+ if( strncasecmp( pbuf, "X-Spam-", 7 ) != 0 )
	+ {
	+ write( STDOUT_FILENO, pbuf, len );
	+ }
	+
	+ pthis->pos += len;
	+ }
	+
	+ /* new headers */
	+ write( STDOUT_FILENO, szbuf, strlen(szbuf) );
	+
	+ /* remainder */
	+ if( pthis->pos < pthis->eom )
	+ {
	+ write( STDOUT_FILENO, pthis->pbuf+pthis->pos, pthis->eom-pthis->pos );
	+ }
	+ pthis->bom = pthis->eom;
	+}
	+
	+#ifdef UNIT_TEST
	+
	+int main( int argc, char** argv )
	+{
	+ int fd;
	+ lex_t lex;
	+ tok_t tok;
	+
	+ fd = STDIN_FILENO;
	+ if( argc == 2 )
	+ {
	+ fd = open( argv[1], O_RDONLY );
	+ }
	+
	+ lex_create( &lex );
	+ if( ! lex_load( &lex, fd ) )
	+ {
	+ fprintf( stderr, "cannot load file\n" );
	+ exit( 1 );
	+ }
	+
	+ lex_nexttoken( &lex, &tok );
	+ while( tok.tt != eof )
	+ {
	+ char sztok[64];
	+ if( tok.len > MAXWORDLEN )
	+ {
	+ printf( "* token too long! *\n" );
	+ exit( 1 );
	+ }
	+
	+ memcpy( sztok, tok.p, tok.len );
	+ strlwr( sztok );
	+ sztok[tok.len] = '\0';
	+ printf( "get_token: %d '%s'\n", tok.tt, sztok );
	+
	+ lex_nexttoken( &lex, &tok );
	+ }
	+
	+ lex_destroy( &lex );
	+ return 0;
	+}
	+
	+#endif /* def UNIT_TEST */
	diff --git a/lex.h b/lex.h
	@@ -0,0 +1,44 @@
	+/* $Id: lex.h,v 1.4 2002/10/12 17:36:41 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _TOK_H
	+#define _TOK_H
	+
	+typedef enum { from, eof, word } toktype_t;
	+
	+typedef struct _tok
	+{
	+ toktype_t tt; /* token type */
	+ char* p;
	+ uint len;
	+} tok_t;
	+
	+typedef enum { envelope, hdrs, body } msgsec_t;
	+
	+typedef struct _lex
	+{
	+ mbox_t mboxtype;
	+ msgsec_t section; /* current section (envelope, headers, body) */
	+ uint pos; /* current position */
	+ uint bom; /* beginning of message */
	+ uint eom; /* end of current message (start of next) */
	+ uint lineend; /* line end (actually, start of next line) */
	+ uint buflen; /* length of buffer */
	+ char* pbuf;
	+} lex_t;
	+
	+void lex_create ( lex_t* plex, mbox_t mboxtype );
	+void lex_destroy ( lex_t* plex );
	+
	+bool_t lex_load ( lex_t* plex, int fd );
	+void lex_nexttoken( lex_t* plex, tok_t* ptok );
	+
	+void lex_passthru ( lex_t* plex, bool_t is_spam, double hits );
	+
	+#endif /* ndef TOK_H */
	diff --git a/str.c b/str.c
	@@ -0,0 +1,78 @@
	+/* $Id: str.c,v 1.2 2002/10/14 07:09:51 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+
	+void strlwr( char* s )
	+{
	+ while( *s != '\0' )
	+ {
	+ s = tolower(s);
	+ s++;
	+ }
	+}
	+
	+void strcpylwr( char* d, const char* s )
	+{
	+ while( *s != '\0' )
	+ {
	+ d++ = tolower(s++);
	+ }
	+}
	+
	+void strncpylwr( char* d, const char* s, int n )
	+{
	+ while( n-- )
	+ {
	+ d++ = tolower(s++);
	+ }
	+}
	+
	+void str_create( str_t* pstr )
	+{
	+ pstr->p = NULL;
	+ pstr->len = 0;
	+}
	+
	+void str_destroy( str_t* pstr )
	+{
	+ /* empty */
	+}
	+
	+int str_cmp( const str_t* pthis, const str_t* pother )
	+{
	+ uint minlen = min( pthis->len, pother->len );
	+ int cmp;
	+ assert( pthis->p != NULL && pother->p != NULL && minlen != 0 );
	+
	+ cmp = strncmp( pthis->p, pother->p, minlen );
	+
	+ if( cmp == 0 && pthis->len != pother->len )
	+ {
	+ cmp = (pthis->len < pother->len) ? -1 : 1;
	+ }
	+ return cmp;
	+}
	+
	+int str_casecmp( const str_t* pthis, const str_t* pother )
	+{
	+ uint minlen = min( pthis->len, pother->len );
	+ int cmp;
	+ assert( pthis->p != NULL && pother->p != NULL && minlen != 0 );
	+
	+ cmp = strncasecmp( pthis->p, pother->p, minlen );
	+
	+ if( cmp == 0 && pthis->len != pother->len )
	+ {
	+ cmp = (pthis->len < pother->len) ? -1 : 1;
	+ }
	+ return cmp;
	+}
	diff --git a/str.h b/str.h
	@@ -0,0 +1,30 @@
	+/* $Id: str.h,v 1.1.1.1 2002/09/30 21:08:29 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _STR_H
	+#define _STR_H
	+
	+/* a couple of generic string functions... */
	+void strlwr( char* s );
	+void strcpylwr( char* d, const char* s );
	+void strncpylwr( char* d, const char* s, int n );
	+
	+typedef struct _str
	+{
	+ char* p;
	+ uint len;
	+} str_t;
	+
	+void str_create ( str_t* pthis );
	+void str_destroy( str_t* pthis );
	+
	+int str_cmp ( const str_t* pthis, const str_t* pother );
	+int str_casecmp( const str_t* pthis, const str_t* pother );
	+
	+#endif /* ndef _STR_H */
	diff --git a/vec.c b/vec.c
	@@ -0,0 +1,345 @@
	+/* $Id: vec.c,v 1.4 2002/10/20 18:19:17 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ *
	+ * vec.c: vector functions for bmf.
	+ * Vectors are used to hold token lists for input data and flatfile database
	+ * entries in standalone mode. They dramatically reduce the number of small
	+ * mallocs and, if used properly, have no performance penalty over fancier
	+ * data structures.
	+ */
	+
	+#include "config.h"
	+#include "dbg.h"
	+#include "str.h"
	+#include "lex.h"
	+#include "vec.h"
	+
	+/*****************************************************************************
	+ * vector
	+ */
	+
	+void vec_create( vec_t* pthis )
	+{
	+ pthis->nalloc = VEC_INITIAL_SIZE;
	+ pthis->nitems = 0;
	+ pthis->pitems = (str_t)malloc( VEC_INITIAL_SIZEsizeof(str_t) );
	+}
	+
	+void vec_destroy( vec_t* pthis )
	+{
	+ free( pthis->pitems );
	+}
	+
	+static void vec_setsize( vec_t* pthis, uint nsize )
	+{
	+ if( nsize > pthis->nalloc )
	+ {
	+ uint nnewalloc;
	+ str_t* pnewitems;
	+ uint n;
	+
	+ nnewalloc = pthis->nalloc * 2;
	+ if( nnewalloc < nsize ) nnewalloc = nsize;
	+ pnewitems = (str_t)realloc( pthis->pitems, nnewallocsizeof(str_t) );
	+ if( pnewitems == NULL )
	+ {
	+ exit( 2 );
	+ }
	+ for( n = pthis->nitems; n < nsize; n++ )
	+ {
	+ str_create( &pnewitems[n] );
	+ }
	+ pthis->pitems = pnewitems;
	+ pthis->nalloc = nnewalloc;
	+ }
	+}
	+
	+void vec_addhead( vec_t* pthis, str_t* pstr )
	+{
	+ assert( pstr->p != NULL && pstr->len > 0 );
	+
	+ vec_setsize( pthis, pthis->nitems+1 );
	+ memmove( &pthis->pitems[1], &pthis->pitems[0], pthis->nitems*sizeof(str_t)…
	+ pthis->pitems[0] = *pstr;
	+ pthis->nitems++;
	+}
	+
	+void vec_addtail( vec_t* pthis, str_t* pstr )
	+{
	+ assert( pstr->p != NULL && pstr->len > 0 );
	+
	+ vec_setsize( pthis, pthis->nitems+1 );
	+ pthis->pitems[pthis->nitems] = *pstr;
	+ pthis->nitems++;
	+}
	+
	+void vec_delhead( vec_t* pthis )
	+{
	+ assert( pthis->nitems > 0 );
	+ pthis->nitems--;
	+ memmove( &pthis->pitems[0], &pthis->pitems[1], pthis->nitems*sizeof(str_t)…
	+}
	+
	+void vec_deltail( vec_t* pthis )
	+{
	+ assert( pthis->nitems > 0 );
	+ pthis->nitems--;
	+}
	+
	+void vec_first( vec_t* pthis, veciter_t* piter )
	+{
	+ piter->plist = pthis;
	+ piter->index = 0;
	+}
	+
	+void vec_last( vec_t* pthis, veciter_t* piter )
	+{
	+ piter->plist = pthis;
	+ piter->index = pthis->nitems;
	+}
	+
	+/*****************************************************************************
	+ * sorted vector
	+ */
	+
	+static int svec_compare( const void* p1, const void* p2 )
	+{
	+ return str_casecmp( (const str_t)p1, (const str_t)p2 );
	+}
	+
	+void svec_add( vec_t* pthis, str_t* pstr )
	+{
	+ int lo, hi, mid;
	+ veciter_t iter;
	+
	+ if( pthis->nitems == 0 )
	+ {
	+ vec_addtail( pthis, pstr );
	+ return;
	+ }
	+
	+ if( str_casecmp( pstr, &pthis->pitems[0] ) < 0 )
	+ {
	+ vec_addhead( pthis, pstr );
	+ return;
	+ }
	+
	+ hi = pthis->nitems - 1;
	+ lo = -1;
	+ while( hi-lo > 1 )
	+ {
	+ mid = (hi+lo)/2;
	+ if( str_casecmp( pstr, &pthis->pitems[mid] ) <= 0 )
	+ hi = mid;
	+ else
	+ lo = mid;
	+ }
	+ assert( hi < pthis->nitems );
	+
	+ iter.plist = pthis;
	+ iter.index = hi;
	+
	+ if( str_casecmp( pstr, &pthis->pitems[hi] ) < 0 )
	+ {
	+ veciter_addbefore( &iter, pstr );
	+ }
	+ else
	+ {
	+ veciter_addafter( &iter, pstr );
	+ }
	+}
	+
	+str_t* svec_find( vec_t* pthis, str_t* pstr )
	+{
	+ int lo, hi, mid;
	+
	+ if( pthis->nitems == 0 )
	+ {
	+ return NULL;
	+ }
	+
	+ hi = pthis->nitems - 1;
	+ lo = -1;
	+ while( hi-lo > 1 )
	+ {
	+ mid = (hi+lo)/2;
	+ if( str_casecmp( pstr, &pthis->pitems[mid] ) <= 0 )
	+ hi = mid;
	+ else
	+ lo = mid;
	+ }
	+ assert( hi >= 0 && hi < pthis->nitems );
	+
	+ if( str_casecmp( pstr, &pthis->pitems[hi] ) != 0 )
	+ {
	+ return NULL;
	+ }
	+
	+ return &pthis->pitems[hi];
	+}
	+
	+void svec_sort( vec_t* pthis )
	+{
	+ if( pthis->nitems > 1 )
	+ {
	+ qsort( pthis->pitems, pthis->nitems, sizeof(str_t), svec_compare );
	+ }
	+}
	+
	+/*****************************************************************************
	+ * vector iterator
	+ */
	+
	+void veciter_destroy( veciter_t* pthis )
	+{
	+ /* empty */
	+}
	+
	+str_t* veciter_get( veciter_t* pthis )
	+{
	+ if( pthis->plist == NULL \|\| pthis->index >= pthis->plist->nitems )
	+ {
	+ return NULL;
	+ }
	+
	+ return &pthis->plist->pitems[pthis->index];
	+}
	+
	+bool_t veciter_equal( veciter_t* pthis, veciter_t* pthat )
	+{
	+ if( pthis->plist != pthat->plist \|\|
	+ pthis->index != pthat->index )
	+ {
	+ return false;
	+ }
	+
	+ return true;
	+}
	+
	+bool_t veciter_hasitem( veciter_t* pthis )
	+{
	+ if( pthis->plist == NULL \|\| pthis->index >= pthis->plist->nitems )
	+ {
	+ return false;
	+ }
	+ return true;
	+}
	+
	+bool_t veciter_prev( veciter_t* pthis )
	+{
	+ if( pthis->index == 0 )
	+ {
	+ return false;
	+ }
	+ pthis->index--;
	+ return true;
	+}
	+
	+bool_t veciter_next( veciter_t* pthis )
	+{
	+ pthis->index++;
	+ if( pthis->index == pthis->plist->nitems )
	+ {
	+ return false;
	+ }
	+ return true;
	+}
	+
	+void veciter_addafter( veciter_t* pthis, str_t* pstr )
	+{
	+ str_t* pitems;
	+
	+ vec_setsize( pthis->plist, pthis->plist->nitems+1 );
	+ assert( pthis->index < pthis->plist->nitems );
	+ pitems = pthis->plist->pitems;
	+
	+ if( pthis->index != pthis->plist->nitems-1 )
	+ {
	+ memmove( &pitems[pthis->index+2], &pitems[pthis->index+1],
	+ (pthis->plist->nitems-pthis->index-1) * sizeof(str_t) );
	+ }
	+
	+ pitems[pthis->index+1] = *pstr;
	+ pthis->plist->nitems++;
	+}
	+
	+void veciter_addbefore( veciter_t* pthis, str_t* pstr )
	+{
	+ str_t* pitems;
	+
	+ vec_setsize( pthis->plist, pthis->plist->nitems+1 );
	+ assert( pthis->index < pthis->plist->nitems );
	+ pitems = pthis->plist->pitems;
	+
	+ memmove( &pitems[pthis->index+1], &pitems[pthis->index],
	+ (pthis->plist->nitems-pthis->index) * sizeof(str_t) );
	+
	+ pitems[pthis->index] = *pstr;
	+ pthis->plist->nitems++;
	+}
	+
	+void veciter_del( veciter_t* pthis )
	+{
	+ str_t* pitems;
	+
	+ assert( pthis->plist->nitems > 0 );
	+ pthis->plist->nitems--;
	+ if( pthis->index < pthis->plist->nitems )
	+ {
	+ pitems = pthis->plist->pitems;
	+ memmove( &pitems[pthis->index], &pitems[pthis->index+1],
	+ (pthis->plist->nitems-pthis->index) * sizeof(str_t) );
	+ }
	+}
	+
	+#ifdef UNIT_TEST
	+int main( int argc, char** argv )
	+{
	+ vec_t vl;
	+ veciter_t iter;
	+ str_t* pstr;
	+ uint n;
	+
	+ if( argc != 2 )
	+ {
	+ fprintf( stderr, "usage: %s <file>\n", argv[0] );
	+ return 1;
	+ }
	+
	+ for( n = 0; n < 100; n++ )
	+ {
	+ vec_create( &vl );
	+ vec_load( &vl, argv[1] );
	+
	+ vec_first( &vl, &iter );
	+ while( (pstr = veciter_get( &iter )) != NULL )
	+ {
	+ char buf[256];
	+ char* p;
	+ if( pstr->len > 200 )
	+ {
	+ fprintf( stderr, "str too long: %u chars\n", pstr->len );
	+ break;
	+ }
	+ p = buf;
	+ strcpy( buf, "str: " );
	+ p += 6;
	+ memcpy( p, pstr->p, pstr->len );
	+ p += pstr->len;
	+ sprintf( p, " %u", pstr->count );
	+ puts( buf );
	+
	+ veciter_next( &iter );
	+ }
	+
	+ vec_destroy( &vl );
	+ }
	+
	+ return 0;
	+}
	+#endif /* def UNIT_TEST */
	diff --git a/vec.h b/vec.h
	@@ -0,0 +1,58 @@
	+/* $Id: vec.h,v 1.3 2002/10/20 18:19:17 tommy Exp $ */
	+
	+/*
	+ * Copyright (c) 2002 Tom Marshall <[email protected]>
	+ *
	+ * This program is free software. It may be distributed under the terms
	+ * in the file LICENSE, found in the top level of the distribution.
	+ */
	+
	+#ifndef _VEC_H
	+#define _VEC_H
	+
	+/* item count for initial alloc */
	+#define VEC_INITIAL_SIZE 256
	+
	+typedef struct _vec
	+{
	+ uint nalloc; /* items alloced in pitems */
	+ uint nitems; /* items available */
	+ str_t* pitems; /* growing vector of items */
	+} vec_t;
	+
	+typedef struct _veciter
	+{
	+ struct _vec* plist;
	+ uint index;
	+} veciter_t;
	+
	+/* class vector */
	+void vec_create ( vec_t* pthis );
	+void vec_destroy ( vec_t* pthis );
	+
	+void vec_addhead ( vec_t* pthis, str_t* pstr );
	+void vec_addtail ( vec_t* pthis, str_t* pstr );
	+void vec_delhead ( vec_t* pthis );
	+void vec_deltail ( vec_t* pthis );
	+
	+void vec_first ( vec_t* pthis, veciter_t* piter );
	+void vec_last ( vec_t* pthis, veciter_t* piter );
	+
	+/* class sorted_vector */
	+void svec_add ( vec_t* pthis, str_t* pstr );
	+str_t* svec_find ( vec_t* pthis, str_t* pstr );
	+void svec_sort ( vec_t* ptthis );
	+
	+/* veciter_create not needed */
	+void veciter_destroy ( veciter_t* pthis );
	+
	+str_t* veciter_get ( veciter_t* pthis );
	+bool_t veciter_equal ( veciter_t* pthis, veciter_t* pthat );
	+bool_t veciter_hasitem ( veciter_t* pthis );
	+bool_t veciter_prev ( veciter_t* pthis );
	+bool_t veciter_next ( veciter_t* pthis );
	+void veciter_addafter ( veciter_t* pthis, str_t* pstr );
	+void veciter_addbefore( veciter_t* pthis, str_t* pstr );
	+void veciter_del ( veciter_t* pthis );
	+
	+#endif /* ndef _VEC_H */