#!/usr/bin/perl
#
# sizedb - calculate size of database needed
#
# usage: sizedb config [files]
#
# Algorithm is
#
# Extract all the words from all Indexed fields.
# Throw away ones <2 characters or that begin with parens.
# Pipe through "sort | uniq | wc -l".
# This gives the number of words that will be indexed.
# Multiply that by 2 or so to get a comfortable table size.
# Print the next prime number greater than the calculated table size.
#
# Copyright (c) 1985 Corporation for Research and Educational Networking
# Copyright (c) 1988 University of Illinois Board of Trustees, Steven
# Dorner, and Paul Pomes
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
# must display the following acknowledgement:
# This product includes software developed by the Corporation for
# Research and Educational Networking (CREN), the University of
# Illinois at Urbana, and their contributors.
# 4. Neither the name of CREN, the University nor the names of their
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE TRUSTEES AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# @(#)$Id: sizedb,v 1.3 1994/03/12 01:02:08 paul Exp $
#
# find the numbers of the Indexed fields
$config=shift;
open (CONFIG,$config) || die "$config: $!\n";
while (<CONFIG>)
{
($nnum,$nname) = (split(':'));
if (/:Indexed/) {$Dex{$nname} = $nnum;}
}
close(CONFIG);
open(OUT, "| sort | uniq | wc -l > /tmp/sizedb.$$");
#
# Collect all indexed words
while (<>)
{
chop;
foreach $d (values(%Dex))
{
if (/\b$d:([^ ]+)/)
{
$foo = $1;
$foo =~ s/\([^ ]* //g;
$foo =~ s/ \w\w / /g;
$foo =~ s/ \w / /g;
$foo =~ s/ /\n/g;
print OUT "$foo\n";
}
}
}
close(OUT);
open(OUT, ">>/tmp/sizedb.$$");
print OUT "2 * p\n";
close(OUT);
system("dc < /tmp/sizedb.$$ | primes | head -1");
unlink("/tmp/sizedb.$$");
exit 0;