Article 11537 of comp.infosystems.www:
Path: feenix.metronet.com!news.utdallas.edu!convex!cs.utexas.edu!howland.reston.ans.net!wupost!texbell.sbc.com!swuts!132.201.57.164!bm1822
From: [email protected] (Brian Millett)
Newsgroups: comp.infosystems.www
Subject: wais.pl hack
Message-ID: <[email protected]>
Date: 28 Mar 94 22:14:23 GMT
Sender: [email protected]
Lines: 130

Well, I've hacked on the wais.pl script from ncsa to get it to work
for me.  That is, the return data is interpreted correctly and I can
fetch the correct document.  It looks like the 'pipes weren't hot
enough' (see 'Programming perl' pg 110).  I also had to hack the
variable '$headline'.  The version of freeWAIS-0.202 I am using, or
the way I am indexing, caused the file name to be first, followed by
the path.  This made the URL to find the file garbage.  BUT I am still
having the following show up in the browser window:

Searching shakespear.src...Initializing connection...Found 3 items.HTTP/1.0 200 OK Date:
Monday, 28-Mar-94 21:41:11 GMT Server: NCSA/1.1 MIME-version: 1.0 Content-type: text/html

How do I get rid of it?

The two hacks can be found by looking for "HACK ALERT".  The new
script is (must change the waisq, waisd, & src variables) :

#!/usr/local/bin/perl
#
# wais.pl -- WAIS search interface
#
# wais.pl,v 1.1 1993/12/31 09:30:56 robm Exp
#
# Tony Sanders <[email protected]>, Nov 1993
#
# Example configuration (in local.conf):
#     map topdir wais.pl &do_wais($top, $path, $query, "database", "title")
#

## CHANGE THESE
$waisq = "/users/bm1822/development/web/freeWAIS-0.202/bin/waisq";
$waisd = "/users/bm1822/development/web/freeWAIS-0.202/wais-sources";
$src = "shakespear";
$title = "REPO Wais documentation";
#end vars

# PrintHeader
# Returns the magic line which tells WWW that we're an HTML document

sub PrintHeader {
       print "Content-type: text/html\n\n";
}

sub send_index {

   print "<HEAD>\n<TITLE>Index of ", $title, "</TITLE>\n</HEAD>\n";
   print "<BODY>\n<H1>", $title, "</H1>\n";

   print "This is an index of the information on this server. Please\n";
   print "type a query in the search dialog.\n<P>";
   print "You may use compound searches, such as: <CODE>environment AND cgi</CODE>\n";
   print "<ISINDEX>";
}

sub do_wais {
#    local($top, $path, $query, $src, $title) = @_;

       local(@query) = @ARGV;
   local($pquery) = join(" ", @query);

##  HACK ALERT
##  FIX added here to flush the STDOUT to get the correct
##  content-type message delivered.
       select((select(STDOUT), $| = 1)[0]);
##  end ALERT

       open(WAISQ, "-|") || exec ($waisq, "-c", $waisd,
                                                          "-f", "-", "-S", "$src.src", "-g", @query);

   print "<HEAD>\n<TITLE>Search of ", $title, "</TITLE>\n</HEAD>\n";
   print "<BODY>\n<H1>", $title, "</H1>\n";

   print "Index \`$src\' contains the following\n";
   print "items relevant to \`$pquery\':<P>\n";
   print "<DL>\n";

       local($hits, $score, $headline, $lines, $bytes, $type, $date);
   while (<WAISQ>) {
       /:score\s+(\d+)/ && ($score = $1);
       /:number-of-lines\s+(\d+)/ && ($lines = $1);
       /:number-of-bytes\s+(\d+)/ && ($bytes = $1);
       /:type "(.*)"/ && ($type = $1);
       /:headline "(.*)"/ && ($headline = $1);         # XXX
       /:date "(\d+)"/ && ($date = $1, $hits++, &docdone);
   }
   close(WAISQ);
   print "</DL>\n";

   if ($hits == 0) {
       print "Nothing found.\n";
   }
   print "</BODY>\n";
}

sub docdone {
   if ($headline =~ /Search produced no result/) {
       print "<HR>";
       print $headline, "<P>\n<PRE>";
# the following was &'safeopen
       open(WAISCAT, "$waisd/$src.cat") || die "$src.cat: $!";
       while (<WAISCAT>) {
           s#(Catalog for database:)\s+.*#$1 <A HREF="/$top/$src.src">$src.src</A>#;
           s#Headline:\s+(.*)#Headline: <A HREF="$1">$1</A>#;
           print;
       }
       close(WAISCAT);
       print "\n</PRE>\n";
   } else {
               #
               # HACK ALERT!!!
               # But the headline looks like
               #:headline "macbeth.sp   /users/bm1822/doc/shakespeare/Tragedies/"
               # soo how do I rotate the filename & dir path?
               ($filename, $path) = split (' ',$headline);
       print "<DT><A HREF=\"$path$filename\">$filename</A>\n";
               # end ALERT

       print "<DD>Score: $score, Lines: $lines, Bytes: $bytes\n";
   }
   $score = $headline = $lines = $bytes = $type = $date = '';
}

&PrintHeader;
if ( !defined @ARGV ) { &send_index; }
else { &do_wais; }
--
Brian Millett                    "The significant problems we face in life
Southwestern Bell Telephone Co.   cannot be solved at the same level of
(314) 235-3866                    thinking we were at when we created them"
[email protected]              Albert Einstein.