/* index sequential files, producing .idx and .sel files */
/* Bruce Tanner - Cerritos College */

/*
Version History:

1.0  05/10/93 Original attempt
2.0  06/20/93 Create indexed files directly, add keyword count field
2.1  07/08/93 Change the file name for NOISE_WORDS
2.2  07/08/93 Move the range end (end_pos) to before the terminator
2.2jlw 07/14/93 - JLW added length spec to dash, added additional topic
                 divider keywords
2.3  07/19/93 Set multi-buffer, multi-block counts, read-ahead, write-behind
             and deferred write; noticeably increased performance
2.4  07/26/93 Removed index name, added CLI$ interface, added /TOPIC
2.4jlw 07/27/93 fixed version retention, which was broken
2.5  07/27/93 Selector strings forced to lowercase; use a good copy
2.6  07/29/93 revamp /TOPIC syntax to include text, size, exclude
2.7  07/30/93 make SIZE=n pad as well as truncate field width
2.8  08/03/93 take wildcard input file names, add /OUTPUT, /VERSION
2.9  08/05/93 JLW changed filename sizes from 80 to 256 characters
2.10 08/05/93 add check for max number of topics, reformat code
2.11 08/24/93 JLW added specific statuses for exit errors
2.12 10/01/93 add /NODEFAULT_TOPIC to omit topics that have no topic keyword
2.13 11/03/93 add /LINK to generate .link file instead of .idx/.sel
2.14 11/15/93 add /NOISE=file to specify the noise words file
2.15 11/17/93 add /TOPIC=(position), /FIELD=(position, size), /PUNCTUATION
2.16 11/18/93 fix illegal strcpy for AXP, add /MAX_TOPICS
2.17 11/21/93 make load_noise friendlier, add /NOPUNCTUATION support
2.18 11/27/93 add /MINIMUM_WORD, /COUNT_WORDS
2.19 11/30/93 fix broken /TOPIC
2.20 03/20/94 sort words, add /LINK=SORT, /SEQUENTIAL, remove /COUNT_WORDS
2.21 04/29/94 add /NONUMBERS
2.22 06/23/94 add /TOPIC=(offset) /TOPIC=(position=0)
2.23 06/24/94 add /TOPIC=(end)
2.24 06/27/94 add /CANDIDATE, /KEYWORD=(text,end,exclude)
2.24a 06/29/94 replaced VAXC-specific "#include foo" declarations with
               more portable "#include <foo.h>" (so DECC won't balk).
2.25 08/04/94 fix /TOPIC=END not matching
2.26 09/15/94 /KEYWORD=END=foo stopped at end of line if 'foo' wasn't found
2.27 09/27/94 change get_text() to return updated pointer to fix mangled text
3.0  09/29/94 redo parsing routines, add /SPECIFICATION, /TOPIC=BREAK
3.1  10/10/94 add /SELECTOR, don't index selector line
3.2  10/17/94 change /SELECTOR to /SELECTOR=(TEXT,END,BOTH)
3.3  11/04/94 add /KEYWORD=(offset), extend selector to include host/port
3.4  11/07/94 add /HELPFILE /SELECTOR=IGNORE
3.5  12/16/94 move close of link file for wildcards
3.6  01/02/95 program around selector.end default problem
*/

#include <ssdef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <rms.h>
#include <descrip.h>
#include <climsgdef.h>
#include <lib$routines.h>
#include <starlet.h>

#define CHUNK 100     /* increment to expand table of words */
#define DESC_SIZE 500  /* maximum size of a topic description */
#define SELECTOR_SIZE 200 /* maximum size of a selector (minus description) */
#define TOPIC_SIZE 20 /* maximum number of topics to list */
#define PUNCT_CHARS  "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
#define DEFAULT_POS 0 /* column to look for topic by default; 0 = anywhere */
#define MAX_INPUT_LINE 500

typedef struct {
   char *text;
   char *found;
   char *end;
   int  pos;
   int  size;
   int  offset;
   int  exclude;
   int  force_break;
   int  used;
} topic_str;

typedef struct {
   char *name;
   int  state;
   char *value;
} switch_str;

typedef struct dsc$descriptor_s dsc;

int words_index, words_size;
char **words;
int noise_index, noise_size;
int candidate_index, candidate_size;
char **noise;
char **candidate;
int max_word, max_topic, max_count, sequential;
char *idx_record, *prev_keyword;
char sel_record[DESC_SIZE + SELECTOR_SIZE + 100];
int keyword_index;
FILE *spc;
switch_str switches[50];
topic_str topics[TOPIC_SIZE];
topic_str keywords[TOPIC_SIZE];
char *selector_spec;

int cli$dcl_parse();
int cli$get_value();
int cli$present();

void build_words(char *, char *, int);
void test_words(char *, char *, int, topic_str *);
void expand_table(char ***, int *);
void write_words(FILE *, FILE *, struct RAB *, struct RAB*, int *, int *,
                char *, topic_str *);
void load_words(char *, char *, char ***, int *, int *);
int is_noise(char *, int, int);
int is_candidate(char *, int);
int is_punct(char, char *);
int is_spaces(char *, int, int);
dsc *descr(char *string);
void parse_topic(char *, topic_str *);
void parse_keyword(char *, topic_str *);
void *my_realloc(void *, int);
void index_commands();
int find_str(char *, char *);
void parse_commands(dsc *, switch_str[]);
int switch_present(char *);
char *switch_value(char *);


main(int argc, char *argv[])
{
   FILE  *src, *lnk;
   char  *cp, *cp2, *ptr, desc[DESC_SIZE + 1], src_line[MAX_INPUT_LINE];
   static char cli_input[256], punctuation[128], temp_punct[128];
   static char value[20], file_arg[256], file_spec[256], out_name[256];
   static char spec_name[256], spec_line[270];
   char  orig_line[MAX_INPUT_LINE], lc_line[MAX_INPUT_LINE];
   char spaces[DESC_SIZE + 1], help_index[10];
   int   start_pos, end_pos, status, index, context = 0;
   enum  {none, para, dash, hex, equal, line, whole, field} type = none;
   int   dash_len = 0, ind, minimum_word, where;
   int   hex_value, field_pos = 1, field_size;
   short leng;
   char  *dashes = NULL;
   struct FAB idxfab, selfab;
   struct RAB idxrab, selrab;
   struct XABKEY idxxab, selxab;
   $DESCRIPTOR(input_dsc, cli_input);
   $DESCRIPTOR(file_dsc, file_arg);
   $DESCRIPTOR(file_spec_dsc, file_spec);
   $DESCRIPTOR(out_dsc, out_name);
   $DESCRIPTOR(punct_dsc, temp_punct);
   $DESCRIPTOR(value_dsc, value);
   $DESCRIPTOR(spec_dsc, spec_name);
   $DESCRIPTOR(spec_line_dsc, spec_line);

   for (index = 0; index < TOPIC_SIZE; index++) {
       topics[index].text = NULL;
       topics[index].end = NULL;
       topics[index].found = NULL;
       topics[index].pos = 0;
       topics[index].size = 0;
       topics[index].offset = 0;
       topics[index].exclude = 0;
       topics[index].used = 0;
       topics[index].force_break = 0;

       keywords[index].text = NULL;
       keywords[index].end = NULL;
       keywords[index].found = NULL;
       keywords[index].pos = 0;
       keywords[index].size = 0;
       keywords[index].offset = 0;
       keywords[index].exclude = 0;
       keywords[index].used = 0;
       keywords[index].force_break = 0;
   }

   status = lib$get_foreign(&input_dsc, 0, &leng, 0);

   for (ind = leng; ind >= 0; ind--)
       cli_input[ind+6] = cli_input[ind];
   strncpy(cli_input, "index ", 6);
   input_dsc.dsc$w_length = leng+6;

   status = cli$dcl_parse(&input_dsc, index_commands);

   if (status != CLI$_NORMAL)  /* error in parse, exit */
       exit(7);

   if ((cli$present(descr("file")) & 1) == 0) {
       printf("Usage: index document\n");
       printf(" /CANDIDATES=file    specify a file of words for index candidates\n");
       printf(" /CHARACTER=n        text separated by control character 'n'\n");
       printf(" /DASH=n             text separated n dashes (default 3)\n");
       printf(" /[NO]DEFAULT_TOPIC  keep [discard] topics [not] matched by /TOPIC\n");
       printf(" /EQUAL=n            text separated n equals (default 80)\n");
       printf(" /FF                 text separated by form feeds\n");
       printf(" /FIELD=(position,size)  specify topic break on field\n");
       printf(" /HELPFILE=(selector,title)  file to match query \"?\"\n");
       printf(" /KEYWORD=(text,end,offset,exclude) specify indexing range\n");
       printf(" /LINE               each line is separate text entry\n");
       printf(" /LINK[=SORT]        generate .link file instead of .idx,.sel files\n");
       printf(" /MAX_TOPICS=n       maximum size of topic ID field (default 4)\n");
       printf(" /MINIMUM_WORD=n     define minimum word to index (default 3)\n");
       printf(" /NOISE=file         specify a file of words to omit in the index\n");
       printf(" /NONUMBERS          omit all numbers from the index\n");
       printf(" /OUTPUT=file        override name of index/selection files\n");
       printf(" /PARAGRAPH          text separated by blank lines\n");
       printf(" /PUNCTUATION=\"...\"  specify the characters that separate words\n");
       printf(" /SELECTOR=(text,end,both,ignore) specify selectors to generate\n");
       printf(" /SEQUENTIAL         create sequential files (.seqidx, .seqsel)\n");
       printf(" /SPECIFICATION=file specify a file of switches\n");
       printf(" /TOPIC=(text,end,position,size,offset,exclude,break) specify topic names\n");
       printf(" /[NO]VERSION        keep [discard] document version in selection\n");
       printf(" /WHOLE              whole file is one text entry\n");
       printf(" /WORD_LENGTH=n      maximum size of index key (default 20)\n");
       exit(1);
   }

   if (cli$present(descr("specification")) & 1) {
       status = cli$get_value(descr("specification"), &spec_dsc, &leng);
       spec_name[leng] = '\0';
       if ((spc = fopen(spec_name, "r")) == NULL) {
           printf("Can't read spec file %s\n", spec_name);
           exit(13);
       }
       /* parse every line of the spec file */
       while (fgets(spec_line, sizeof(spec_line), spc)) {
           if ((spec_line[0] == '\n') || (spec_line[0] == '#') ||
               (spec_line[0] == '!'))
               continue;                 /* skip blank and comment lines */
           ptr = strchr(spec_line, '\n');
           if (ptr) *ptr = '\0';
           leng = strlen(spec_line);
           for (ind = leng; ind >= 0; ind--)
               spec_line[ind+6] = spec_line[ind];
           strncpy(spec_line, "index ", 6);
           spec_line_dsc.dsc$w_length = leng+6;

           parse_commands(&spec_line_dsc, switches);
       }
   }

   parse_commands(&input_dsc, switches);

   if (switch_present("paragraph"))
       type = para;
   if (switch_present("ff")) {
       type = hex;                      /* /FF same as /character=12 */
       hex_value = '\f';
   }
   if (switch_present("character")) {
       hex_value = atoi(switch_value("character"));
       type = hex;
   }
   if (switch_present("whole"))
       type = whole;
   if (switch_present("line"))
       type = line;
   if (switch_present("dash")) {
       dash_len = atoi(switch_value("dash"));
       type = dash;
   }
   if (switch_present("equal")) {
       dash_len = atoi(switch_value("equal"));
       type = equal;
   }
   if (switch_present("word_length")) {
       max_word = atoi(switch_value("word_length"));
   }
   if (switch_present("field")) {
       type = field;
       field_pos = atoi(switch_value("field.position"));
       field_size = atoi(switch_value("field.size"));
   }
   strcpy(punctuation, PUNCT_CHARS);  /* default for /punctuation */
   if (switch_present("punctuation")) {
       strcpy(temp_punct, switch_value("punctuation"));
       if (temp_punct[0] == '"') {  /* if quoted string */
           strncpy(punctuation, temp_punct+1, leng-2);
           punctuation[leng-2] = '\0';
       }
       else if (strlen(temp_punct) > 0)
           strcpy(punctuation, temp_punct);
   }
   else {  /*  /nopunctuation="$" means exclude $ from punct chars */
       if (cp = switch_value("punctuation"))
           strcpy(temp_punct, cp);
       else
           strcpy(temp_punct, "");
       if (temp_punct[0] == '"') {   /* if quoted string */
           strcpy(temp_punct, temp_punct+1);
           temp_punct[leng-2] = '\0';
       }
       for (cp = temp_punct; *cp; cp++) {
           cp2 = strchr(punctuation, *cp);
           if (cp2)
               strcpy(cp2, cp2+1);  /* remove character from punctuation */
       }
   }
   if (switch_present("max_topics")) {
       max_topic = max_count = atoi(switch_value("max_topics"));  /* query assumes topic = count */
       if (max_topic > 9) {
           printf("/MAX_TOPICS specifies the number of digits in the topic number field.\n");
           printf("A 32 bit system cannot handle integers greater than 9 digits.\n");
           exit(9);
       }
   }
   if (switch_present("minimum_word"))
       minimum_word = atoi(switch_value("minimum_word"));

   sequential = (switch_present("sequential"));

   strcpy(file_arg, switch_value("file"));  /* get source */
   file_dsc.dsc$w_length = (short) strlen(file_arg);  /* set the descriptor length */

   strncpy(file_spec, "", sizeof(file_spec));  /* clear out file_spec */
   status = lib$find_file(&file_dsc, &file_spec_dsc, &context, 0, 0, 0, 0);
   if ((status & 1) == 0) {
       perror("lib$find_file failed");
       exit(11);
   }
   ptr = strchr(file_spec, ' ');
   if (ptr)
       *ptr = '\0';            /* chop off trailing spaces */

   strcpy(out_name, file_spec);    /* make copy for output spec */

   if (switch_present("output"))   /* if /output, overwrite out_name */
       strcpy(out_name, switch_value("output"));

   words_size = words_index = 0;    /* no words yet */
   words = NULL;
   noise_size = noise_index = 0;    /* no noise yet */
   noise = NULL;
   candidate_size = candidate_index = 0;    /* no candidate yet */
   candidate = NULL;
   if (!switch_present("link")) {
       load_words("noise", punctuation, &noise, &noise_size, &noise_index);
       load_words("candidates", punctuation, &candidate, &candidate_size, &candidate_index);
   }

   dashes = (char *)malloc(dash_len+1);
   memset((void *)dashes, (type==dash) ? '-' : '=', dash_len);
   dashes[dash_len] = '\0';

   memset((void *) spaces, ' ', DESC_SIZE); /* make spaces for padding topic */
   spaces[DESC_SIZE] = '\0';

   idx_record = (char *) calloc(max_word + max_count + max_topic + 1,
                                sizeof(char));

   prev_keyword = (char *) calloc(max_word + 1, sizeof(char));

   ptr = strrchr(out_name, '.');  /* just get file name */
   if (ptr) *ptr = '\0';
   if (sequential)
       strcat(out_name, ".seqidx");
   else
       strcat(out_name, ".idx");

   idxfab = cc$rms_fab;
   idxfab.fab$l_alq = 100;
   idxfab.fab$b_bks = 3;
   idxfab.fab$w_deq = 25;
   idxfab.fab$b_fac = FAB$M_PUT;
   idxfab.fab$l_fna = out_name;
   idxfab.fab$b_fns = strlen(out_name);
   idxfab.fab$l_fop = FAB$M_CBT | FAB$M_DFW;
   idxfab.fab$w_mrs = max_word + max_topic + max_count;
   if (sequential)
       idxfab.fab$b_org = FAB$C_SEQ;
   else
       idxfab.fab$b_org = FAB$C_IDX;
   idxfab.fab$b_rat = FAB$M_CR;
   idxfab.fab$b_rfm = FAB$C_FIX;
   idxfab.fab$b_shr = FAB$M_NIL;
   idxfab.fab$l_xab = (char *) &idxxab;

   idxrab = cc$rms_rab;
   idxrab.rab$l_fab = (struct FAB *) &idxfab;
   idxrab.rab$b_krf = 0;
   if (sequential)
       idxrab.rab$b_rac = RAB$C_SEQ;
   else
       idxrab.rab$b_rac = RAB$C_KEY;
   idxrab.rab$l_rbf = idx_record;
   idxrab.rab$w_rsz = max_word + max_topic + max_count;
   idxrab.rab$l_ubf = idx_record;
   idxrab.rab$w_usz = max_word + max_topic + max_count;
   idxrab.rab$b_mbf = 20;
   idxrab.rab$l_rop = RAB$M_RAH | RAB$M_WBH;

   idxxab = cc$rms_xabkey;
   idxxab.xab$b_dtp = XAB$C_STG;
   idxxab.xab$b_flg = XAB$M_DAT_NCMPR | XAB$M_IDX_NCMPR;
   idxxab.xab$w_pos0 = 0;
   idxxab.xab$b_siz0 = max_word + max_topic;
   idxxab.xab$b_ref = 0;

   if (!switch_present("link")) {
       if (((status = sys$create(&idxfab)) & 1) != SS$_NORMAL)
           lib$stop(status);
       if (((status = sys$connect(&idxrab)) & 1) != SS$_NORMAL)
           lib$stop(status);
   }

   ptr = strrchr(out_name, '.');  /* just get file name */
   if (ptr) *ptr = '\0';
   if (sequential)
       strcat(out_name, ".seqsel");
   else
       strcat(out_name, ".sel");

   selfab = cc$rms_fab;
   selfab.fab$l_alq = 10;
   selfab.fab$b_bks = 3;
   selfab.fab$w_deq = 5;
   selfab.fab$b_fac = FAB$M_PUT;
   selfab.fab$l_fna = out_name;
   selfab.fab$b_fns = strlen(out_name);
   selfab.fab$l_fop = FAB$M_CBT | FAB$M_DFW;
   selfab.fab$w_mrs = max_topic + DESC_SIZE + SELECTOR_SIZE;
   if (sequential)
       selfab.fab$b_org = FAB$C_SEQ;
   else
       selfab.fab$b_org = FAB$C_IDX;
   selfab.fab$b_rat = FAB$M_CR;
   selfab.fab$b_rfm = FAB$C_VAR;
   selfab.fab$b_shr = FAB$M_NIL;
   selfab.fab$l_xab = (char *) &selxab;

   selrab = cc$rms_rab;
   selrab.rab$l_fab = (struct FAB *) &selfab;
   if (sequential)
       selrab.rab$b_rac = RAB$C_SEQ;
   else
       selrab.rab$b_rac = RAB$C_KEY;
   selrab.rab$l_rbf = sel_record;
   selrab.rab$b_mbf = 20;
   selrab.rab$l_rop = RAB$M_RAH | RAB$M_WBH;

   selxab = cc$rms_xabkey;
   selxab.xab$b_dtp = XAB$C_STG;
   selxab.xab$b_flg = XAB$M_DAT_NCMPR | XAB$M_IDX_NCMPR;
   selxab.xab$w_pos0 = 0;
   selxab.xab$b_siz0 = max_topic;
   selxab.xab$b_ref = 0;

   if (!switch_present("link")) {
       if (((status = sys$create(&selfab)) & 1) != SS$_NORMAL)
           lib$stop(status);
       if (((status = sys$connect(&selrab)) & 1) != SS$_NORMAL)
           lib$stop(status);
       if (switch_present("helpfile.selector")) {
           selector_spec = (char *)
               calloc((strlen(switch_value("helpfile.selector")) + 1),
                      sizeof(char));
           strcpy(selector_spec, switch_value("helpfile.selector"));

           if (switch_present("helpfile.title"))
               strcpy(desc, switch_value("helpfile.title"));
           else
               strcpy(desc, "Help on search commands");
           strcpy(help_index, "?");
           build_words(help_index, "", 0);      /* add "?" to the index */
           strcpy(help_index, "?help");
           build_words(help_index, "", 0);      /* add "?help" to the index */
           write_words(src, lnk, &selrab, &idxrab, &start_pos,
                           &end_pos, desc, topics);  /* write helpfile */
       }
   }
   else {  /* /link */
       ptr = strrchr(out_name, '.');
       if (ptr) *ptr = '\0';
       strcat(out_name, ".link");
       lnk = fopen(out_name, "w", "mbc=50", "mbf=20");
       if (!switch_present("link.sort"))
           fprintf(lnk, "Sortdir=False\n\n");
   }

   for (;;) {  /* process all files in input spec, first one already found */

       if ((src = fopen(file_spec, "r", "mbc=50", "mbf=20")) == NULL) {
           printf("Can't read input file %s\n", file_spec);
           exit(3);
       }
       printf("Building index for %s\n", file_spec);

       start_pos = ftell(src);          /* init start position */
       strncpy(desc, "", DESC_SIZE + 1);

       while (fgets(src_line, sizeof(src_line), src)) {
           /* if the first character of the line is the hex value, end topic */
           if ((src_line[0] == hex_value) && (type == hex)) {
               write_words(src, lnk, &selrab, &idxrab, &start_pos,
                           &end_pos, desc, topics);
               continue;
           }
           ptr = strchr(src_line, '\n');
           if (ptr) *ptr = '\0';              /* remove newline */
           for (ptr = src_line; *ptr; ptr++)
               if (iscntrl(*ptr))  *ptr = ' ';  /* convert tabs to spaces */
           while ((strlen(src_line) > 0) &&
                  (src_line[strlen(src_line)-1] == ' '))
               src_line[strlen(src_line)-1] = '\0';/* remove trailing blanks */
           strcpy(orig_line, src_line);    /* copy before forcing lower case */
           for (ptr = src_line; *ptr; ptr++)
               *ptr = _tolower(*ptr);           /* force lowercase */
           strcpy(lc_line, src_line);      /* copy with leading blanks */
           for (ptr = src_line; *ptr; ptr++)
               if (*ptr > ' ') break;  /* find first non-blank char */
           strcpy(src_line, ptr);      /* remove leading blanks */

           /* break on dashes */
           if (((type == equal) || (type == dash)) &&
               (strncmp(orig_line, dashes, dash_len) == 0)) {
               write_words(src, lnk, &selrab, &idxrab, &start_pos,
                           &end_pos, desc, topics);
               continue;
           }
           /* break on paragraph */
           if ((type == para) && (strlen(src_line) == 0)) {
               write_words(src, lnk, &selrab, &idxrab, &start_pos,
                           &end_pos, desc, topics);
               continue;
           }
           /* break on non-empty field */
           if ((type == field) && !is_spaces(orig_line, field_pos, field_size)) {
               write_words(src, lnk, &selrab, &idxrab, &start_pos,
                           &end_pos, desc, topics);
               start_pos = end_pos;   /* don't skip over line with field break */
           }
           /* save the first line by default */
           if ((switch_present("default_topic")) && (strlen(desc) == 0))
               strncpy(desc, orig_line, DESC_SIZE);

           if (switch_present("selector.text") &&
               (where = find_str(lc_line, switch_value("selector.text")))) {
               selector_spec = (char *) my_realloc((char *) selector_spec,
                                        strlen(orig_line) + 1);
               strcpy(selector_spec, orig_line + where - 1 +
                      strlen(switch_value("selector.text")));
               if (switch_present("selector.end") &&  /* if selector=end given */
                   (where = find_str(selector_spec, switch_value("selector.end"))))
                   selector_spec[where - 1] = '\0';  /* mark selector end */
               while (*selector_spec == ' ')  /* remove leading spaces */
                   strcpy(selector_spec, selector_spec + 1);
               continue;                    /* do not index this line */
           }
           else if (selector_spec == NULL)
               selector_spec = (char *) calloc(1, sizeof(char)); /* make empty spec */

           for (index = 0; topics[index].used; index++) { /* apply topic rules */
               where = topics[index].pos;      /* where text is found */
               /* if text matches the source text and position */
               if (
                   ((topics[index].pos > 0) && topics[index].text
                    && strncmp(lc_line + topics[index].pos - 1,
                           topics[index].text, strlen(topics[index].text)) == 0)
                   ||  /* or position = 0 and text is found _somewhere_ */
                   ((topics[index].pos == 0)
                    && (where = find_str(lc_line, topics[index].text)))
                   || /* or no text given but position and size field is non-blank */
                   (!topics[index].text &&
                    !is_spaces(orig_line, topics[index].pos, topics[index].size))
                  ) {
                   /* if topic matches and requested a break, do it */
                   if (topics[index].force_break) {
                       write_words(src, lnk, &selrab, &idxrab, &start_pos,
                                   &end_pos, desc, topics);
                       start_pos = end_pos;   /* don't skip over topic line */
                   }
                   if (topics[index].exclude)
                       strcpy(orig_line + where - 1,
                              orig_line + where - 1 + strlen(topics[index].text));
                   topics[index].found = (char *) my_realloc((char *) topics[index].found,
                                        (topics[index].size ? topics[index].size : strlen(orig_line))
                                        + 1);
                   if (topics[index].size > 0) {
                       strncpy(topics[index].found, orig_line
                               + where - 1 + topics[index].offset,
                               topics[index].size);
                       topics[index].found[topics[index].size] = '\0';
                       strncat(topics[index].found, spaces,
                               topics[index].size - strlen(topics[index].found));
                   }
                   else {
                       strcpy(topics[index].found, orig_line
                              + where - 1 + topics[index].offset);
                       if (where = find_str(topics[index].found, topics[index].end))
                           topics[index].found[where - 1] = '\0';    /* terminate the found string */
                   }
               break;     /* a line satisfies only one topic rule */
               }
           }
           if (!switch_present("link"))
               test_words(src_line, punctuation, minimum_word, keywords);
           end_pos = ftell(src);  /* end_pos points before any terminator */
           if (type == line)
               write_words(src, lnk, &selrab, &idxrab, &start_pos,
                           &end_pos, desc, topics);
       }

       /* in case file doesn't end with a terminator */
       write_words(src, lnk, &selrab, &idxrab, &start_pos, &end_pos,
                   desc, topics);
       fclose(src);
       status = lib$find_file(&file_dsc, &file_spec_dsc, &context, 0, 0, 0, 0);
       if ((status & 1) == 0) {
           lib$find_file_end(&context);
           break;
       }
       ptr = strchr(file_spec, ' ');
       if (ptr) *ptr = '\0';            /* chop off trailing spaces */
   }
   if (switch_present("link"))
       fclose(lnk);
   else {
       sys$close(&selfab);
       sys$close(&idxfab);
   }
}


/* test for start/end of keyword indexing */
void test_words(char *line, char *punct, int minimum_word, topic_str *keywords)
{
   char test_line[MAX_INPUT_LINE], copy_line[MAX_INPUT_LINE];
   int ind, where, retry;

   if (!switch_present("keyword")) {  /* no /keyword= */
       build_words(line, punct, minimum_word);  /* index everything */
       return;
   }

   if ((keyword_index > -1)
       && ((keywords[keyword_index].end == NULL)
           || (strlen(keywords[keyword_index].end) == 0)))
       keyword_index = -1;  /* keyword indexing stops at EOL unless /keyword=end */

   strcpy(test_line, line);        /* copy source line */
   do {
       where = 0;
       retry = FALSE;
       if (keyword_index == -1) {      /* between keywords */
           for (ind = 0; keywords[ind].used; ind++)
               if (where = find_str(test_line, keywords[ind].text))
                   break;
           if (where) {
               keyword_index = ind;           /* record current keyword */
               strcpy(test_line, test_line + where - 1 +
                      keywords[ind].offset);  /* remove up to keyword */
               if (keywords[ind].exclude)
                   strcpy(test_line, test_line + strlen(keywords[ind].text));
           }
       }
       if (keyword_index > -1)       /* in keyword index */
           if (where = find_str(test_line, keywords[keyword_index].end)) {
               strcpy(copy_line, test_line);
               test_line[where - 1] = '\0';
               build_words(test_line, punct, minimum_word);  /* index contents of line */
               strcpy(test_line, copy_line + where - 1);  /* restart at end word */
               keyword_index = -1;   /* no longer indexing */
               retry = TRUE;        /* check for another keyword */
           }
           else {  /* indexing and no end word found */
               build_words(test_line, punct, minimum_word);  /* index contents of line */
               return;
           }
   } while (retry);
}


/* break line into words and save them in words[] */

void build_words(char *line, char *punct, int minimum_word)
{
   char *cp, *cp2;

   for (cp = line; *cp; cp++)      /* convert punctuation to spaces */
       if (is_punct(*cp, punct)) *cp = ' ';

   strcat(line, " ");              /* line ends with a space */
   cp = line;
   while(cp2 = strchr(cp, ' ')) {  /* break at space boundary */
       *cp2 = '\0';
       if (strlen(cp) > max_word)
           printf("Truncating %d character word (%s) to %d characters\n",
                  strlen(cp), cp, max_word);
       if ((strlen(cp) > 0) &&
           (((candidate_size == 0) &&
            !is_noise(cp, noise_size, minimum_word)) ||
           (candidate_size && is_candidate(cp, candidate_size)))) {
           if (words_index == words_size)  /* table full */
               expand_table(&words, &words_size);
           strncpy(words[words_index++], cp, max_word);
       }
       cp = cp2 + 1;
   }
}


/* expand *table[] by CHUNK elements of max_word characters */

void expand_table(char ***table, int *size)
{
   int ind;

   *table = (char **) my_realloc((char **) *table, (*size + CHUNK) * sizeof(char *));
   for (ind = 0; ind < CHUNK; ind++)
       (*table)[*size + ind] = (char *) calloc(max_word + 1, sizeof(char));
   *size += CHUNK;
}


int power(int base, int exp)
{
   int result;

   result = base;
   while (--exp)
       result *= base;
   return result;
}


/* qsort compare routine */
int compare (const void *str1, const void *str2)
{
   return(strcmp(*(char **) str1, *(char **) str2));
}

/* write out the index entries */
write_index(int db_index, struct RAB *idxptr)
{
   int ind, status, dup_count;

   /* write out the words */
   /* sort keys for counts and $put performance */
   qsort(words, words_index, sizeof(char *), compare);
   dup_count = 1;
   strcpy(prev_keyword, words[0]);
   for (ind = 1; ind < words_index; ind++)
       if (strcmp(words[ind], prev_keyword) == 0)
           dup_count++;
       else {
           sprintf(idx_record, "%-*s%0*d%0*d",
               max_word, prev_keyword,
               max_topic, db_index,
               max_count, dup_count);
           status = sys$put(idxptr);
           if ((status & 1) != SS$_NORMAL)
               lib$stop(status);
           strcpy(prev_keyword, words[ind]);
           dup_count = 1;
       }
   /* write out the last word */
   sprintf(idx_record, "%-*s%0*d%0*d",
           max_word, prev_keyword,
           max_topic, db_index,
           max_count, dup_count);
   status = sys$put(idxptr);
   if ((status & 1) != SS$_NORMAL)
       lib$stop(status);
}


void reset_topic(FILE *src, char *desc, int *start_pos, topic_str *topics)
{
   int ind;

   strncpy(desc, "", sizeof(desc));
   *start_pos = ftell(src);          /* init start position */
   for (ind = 0; ind < words_index; ind++)  /* clear out words[] */
       *words[ind] = '\0';
   words_index = 0;
   for (ind = 0; ind < TOPIC_SIZE; ind++)
       if (topics[ind].found)
           *topics[ind].found = '\0';
}


/* write out Gopher command, write out words */
void write_words(FILE *src, FILE *lnk, struct RAB *selptr, struct RAB *idxptr,
                int *start_pos, int *end_pos, char *desc, topic_str *topics)
{
   static int db_index = 0;
   int ind, status;
   char filename[256], *ptr, temp_desc[512] = "";
   char hostname[100], portname[10];

   keyword_index = -1;             /* stop indexing at end of section */
   if (!switch_present("link")) {
       if (words_index == 0) {
           reset_topic(src, desc, start_pos, topics);
           return;      /* no words to write */
       }
       if ((db_index + 2) >= power(10, max_topic)) {
           printf("You have reached %d topics in this index\n", db_index);
           printf("Please re-index with /MAX_TOPIC larger than %d\n", max_topic);
           exit(5);
       }
   }
   fgetname(src, filename);
   if (!switch_present("version")) {  /* if /noversion */
       ptr = strchr(filename, ';');  /* get rid of version number */
       if (ptr) *ptr = '\0';
   }
   for(ptr = filename; *ptr; ptr++)
       *ptr = _tolower(*ptr);  /* force filename lowercase */
   for (ind = 0; ind < TOPIC_SIZE; ind++)
       if (topics[ind].found && (strlen(topics[ind].found) > 0)) {
           if (strlen(temp_desc) > 0)
               strcat(temp_desc, " ");
           strcat(temp_desc, topics[ind].found);
       }
   if (strlen(temp_desc) > 0)
       strncpy(desc, temp_desc, DESC_SIZE);

   if (strlen(desc) > 0) {  /* no description, no index */
       if (switch_present("selector.text") || strlen(selector_spec)) {
           if (!switch_present("selector.both") && /* selectors only */
               (strlen(selector_spec) == 0)) {     /* and no selector found */
               reset_topic(src, desc, start_pos, topics);  /* reset topic stuff */
               return;                             /* and quit */
           }
           if (switch_present("selector.ignore") && /* should we ignore this? */
               (strchr(switch_value("selector.ignore"), *selector_spec))) {
                   reset_topic(src, desc, start_pos, topics);
                   return;                          /* yes, ignore this */
           }
           strncpy(hostname, "", sizeof(hostname));  /* init hostname */
           strncpy(portname, "", sizeof(portname));  /* init portname */
           if (ptr = strchr(selector_spec, '|')) {
               *ptr = '\0';                /* mark off selector from host */
               sprintf(hostname, "\t%s", ptr + 1);  /* copy host/port */
               if (ptr = strchr(hostname, '|')) {
                   *ptr = '\0';            /* mark off port from host */
                   sprintf(portname, "\t%s", ptr + 1);
               }
           }
           if (switch_present("link")) {
               fprintf(lnk, "Name=%s\nType=%c\n", desc, *selector_spec);
               fprintf(lnk, "Path=%s\n", selector_spec+1);
               fprintf(lnk, "Port=%s\nHost=%s\n\n",
                       strlen(hostname) ? hostname + 1 : "+",
                       strlen(portname) ? portname + 1 : "+");
           }
           else {
               sprintf(sel_record, "%0*d%c%s\t%s%s%s",
                   max_topic, ++db_index, *selector_spec, desc,
                   selector_spec+1, hostname, portname);
               selptr->rab$w_rsz = strlen(sel_record);
               if (((status = sys$put(selptr)) & 1) != SS$_NORMAL)
                   lib$stop(status);
               write_index(db_index, idxptr);
           }
           strcpy(selector_spec, "");  /* reset the current selector */
       }
       if (!switch_present("selector.text") || switch_present("selector.both")) {
           if (!switch_present("link")) {
               /* write out the selector */
               if (switch_present("whole")) /* whole file is a special case */
                   sprintf(sel_record, "%0*d0%s\t0%s",
                       max_topic, ++db_index, desc, filename);
               else
                   sprintf(sel_record, "%0*d0%s\tR%d-%d-%s",
                       max_topic, ++db_index, desc, *start_pos, *end_pos,
                       filename);
               selptr->rab$w_rsz = strlen(sel_record);
               if (((status = sys$put(selptr)) & 1) != SS$_NORMAL)
                   lib$stop(status);
               write_index(db_index, idxptr);
           }
           else {
               fprintf(lnk, "Name=%s\nType=0\n", desc);
               fprintf(lnk, "Path=R%d-%d-%s\n", *start_pos, *end_pos, filename);
               fprintf(lnk, "Port=+\nHost=+\n\n");
           }
       }
       printf("%s\n", desc);
   }
   reset_topic(src, desc, start_pos, topics);  /* clear words[], topics */
}


/* read in a file of words */

void load_words(char *name, char *punct, char ***table, int *table_size, int *table_index)
{
   FILE *nf;
   char *cp, *cp2, line[MAX_INPUT_LINE];
   static char file_name[256];
   short leng;
   int status;
   $DESCRIPTOR(name_dsc, file_name);

   if (switch_present(name)) {
       strcpy(file_name, switch_value(name));
       if ((nf = fopen(file_name, "r")) == NULL) {
           printf("Can't read data file %s\n", file_name);
           return;
       }
   }
   else if (strcmp(name, "noise"))
       return;
   else if ((nf = fopen("_noise_words", "r", "dna = gopher_root:[000000].dat")) == NULL)
       return;

   while (fgets(line, sizeof(line), nf)) {
       cp = strchr(line, '\n');
       if (cp) *cp = '\0';               /* remove newline */
       for (cp = line; *cp; cp++) {
           if (is_punct(*cp, punct) || iscntrl(*cp))
               *cp = ' ';            /* convert punctuation, tabs to spaces */
           *cp = _tolower(*cp);          /* force lowercase */
       }
       while ((strlen(line) > 0) &&
              (line[strlen(line)-1] == ' '))
           line[strlen(line)-1] = '\0';  /* remove trailing blanks */
       for (cp = line; *cp; cp++)
           if (*cp > ' ') break;         /* find first non-blank char */
       strcpy(line, cp);                 /* remove leading blanks */

       strcat(line, " ");                /* line ends with a space */
       cp = line;
       while(cp2 = strchr(cp, ' ')) {    /* break at space boundary */
           *cp2 = '\0';
           if (strlen(cp) > 0) {
               if (*table_index == *table_size)  /* table full */
                   expand_table(table, table_size);
               strcpy((*table)[(*table_index)++], cp);
           }
           cp = cp2 + 1;
       }
   }

   fclose(nf);
}


/* see if a char is punctuation */

int is_punct(char ch, char *punct)
{
   char *ptr;

   for (ptr = punct; *ptr; ptr++)
       if (*ptr == ch)
           return TRUE;
   return FALSE;
}


/* see if field is spaces */

int is_spaces(char *line, int pos, int size)
{
   int index;

   if (strlen(line) < pos)
       return(TRUE);
   for (index = 0; index < size; index++)
       if (!isspace(line[pos + index - 1]))
           return(FALSE);
   return(TRUE);
}


/* see if the word is noise */

int is_noise(char *word, int size, int minimum_word)
{
   int ind;

   if (strlen(word) < minimum_word)  /* simple heuristic saves lots of noise entries */
       return(TRUE);
   if ((!switch_present("numbers")) && isdigit(*word))
       return(TRUE);
   for(ind = 0; ind < size; ind++) {
       if (noise[ind] == NULL) return (FALSE);
       if (strcmp(noise[ind], word) == 0)
           return (TRUE);
   }
   return (FALSE);
}


/* see if the word is candidate */

int is_candidate(char *word, int size)
{
   int ind;

   for(ind = 0; ind < size; ind++) {
       if (candidate[ind] == NULL) return (FALSE);
       if (strcmp(candidate[ind], word) == 0)
           return (TRUE);
   }
   return (FALSE);
}


char *lc(char *str)
{
# define N_STRING 4
   static char strings[N_STRING][MAX_INPUT_LINE];
   static int cur_string = -1;
   char *cp;

   if (++cur_string >= N_STRING) cur_string = 0;
   for (cp = strings[cur_string];; cp++) {
       *cp = tolower(*str++);
       if (*cp == '\0') break;
   }
   return strings[cur_string];
}


/* find where the string starts (origin 1) in record */

int find_str(char *record, char *str)
{
   char *cp, *lcr;

   if ((str == NULL) || (strlen(str) == 0))
       return (0);  /* zero means string not found */
   lcr = lc(record);
   cp = strstr(lcr, lc(str));
   if (cp == NULL) return (0);
   return (cp - lcr + 1);
}


/* descr() creates character descriptor and return the address
of the descriptor to the caller. */
# define N_DESCR 10
static struct dsc$descriptor_s str_desc[N_DESCR];
static int cur_descr = -1;

struct dsc$descriptor_s *descr(char *string)
{
   if(++cur_descr >= N_DESCR) cur_descr = 0;
   str_desc[cur_descr].dsc$w_length=(short)strlen(string);
   str_desc[cur_descr].dsc$b_dtype=DSC$K_DTYPE_T;
   str_desc[cur_descr].dsc$b_class=DSC$K_CLASS_S;
   str_desc[cur_descr].dsc$a_pointer=string;
   return (&str_desc[cur_descr]);
}


int get_decimal(char *ptr)
{
   do
       ptr++;
   while ((*ptr != '=')       /* skip to the keyword/parameter */
       && (*ptr != ':'));     /* separator character */
   while (isspace(*++ptr));   /* skip spaces */
   return ((atoi(ptr) < 256) ? atoi(ptr) : 256);
}


char *get_text(char **dest, char *ptr)
{
   char *start, *cp;

   do
       ptr++;
   while ((*ptr != '=')         /* skip to the keyword/parameter */
          && (*ptr != ':'));    /* separator character */
   while (isspace(*++ptr));     /* skip spaces */
   if (*ptr == '"') {           /* if quoted string */
       start = ++ptr;           /* skip over quote */
       for (; *ptr; ptr++) {    /* skip to ending quote */
           if ((*ptr == '"') && (*(ptr+1) == '"')) {  /* doubled quotes? */
               ptr++;           /* yes, skip it */
               continue;
           }
           if (*ptr == '"')     /* un-doubled quote? */
               break;           /* yes, stop here */
       }
   }
   else {                       /* else non-quoted string */
       start = ptr;             /* start of string */
       while (*ptr
              && (*ptr != ' ')
              && (*ptr != ',')
              && (*ptr != '/')
              && (*ptr != ')'))
           ptr++;               /* skip to string terminator */
   }
   *dest = (char *) calloc((ptr - start) + 1, sizeof(char));
   strncpy(*dest, start, ptr - start);
   for (cp = *dest; *cp; *cp++)   /* collapse doubled quotes to single */
       if ((*cp == '"') && (*(cp+1) == '"'))
           strcpy(cp, cp+1);
   return (ptr);
}


/* parse command line for /topic */
void parse_topic(char *line, topic_str *topics)
{
   char *ptr, *start, **dest;
   static int index = -1;

   for (ptr = line; *ptr; ptr++)
       *ptr = _tolower(*ptr);         /* force command line lowercase */
   ptr = line;                        /* point to start of line */
   for (;;) {                         /* search for /topic until end of line */
       if (index == TOPIC_SIZE)
           return;                    /* exit if we can't hold any more */
       ptr = strchr(ptr, '/');        /* search for switch start */
       if (ptr == NULL)
           return;                    /* no more switches */
       while (isspace(*++ptr));       /* skip spaces */
       if (*ptr != 't')               /* topic is unique to one character */
           continue;                  /* not /topic, keep scanning */
       do
           ptr++;
       while ((*ptr != '=')           /* skip to the keyword/parameter */
              && (*ptr != ':'));      /* separator character */
       while (isspace(*++ptr));       /* skip spaces */
       if (*ptr == '(')               /* if start of list */
           while (isspace(*++ptr));   /* skip spaces */
       index++;                       /* next topics structure */
       topics[index].used = TRUE;     /* this topic index is used */
       topics[index].pos = DEFAULT_POS; /* default position */
       topics[index].end = "</";      /* default end to HTML end tag */
       for (;;) {                     /* parse all /topic list elements */
           if (*ptr == 'e')           /* "end" or "exclude" */
               ptr++;                 /* so we match on 'n' or 'x' */
           switch (*ptr) {
               case 't':                        /* text */
                   ptr = get_text(&topics[index].text, ptr);
                   break;
               case 'n':                        /* end */
                   get_text(&topics[index].end, ptr);
                   break;
               case 'p':                        /* position */
                   topics[index].pos = get_decimal(ptr);
                   break;
               case 's':                        /* size */
                   topics[index].size = get_decimal(ptr);
                   break;
               case 'o':                        /* offset */
                   topics[index].offset = get_decimal(ptr);
                   break;
               case 'x':                         /* exclude */
                   topics[index].exclude = TRUE; /* has no parameters */
                   break;
               case 'b':                              /* break */
                   topics[index].force_break = TRUE;  /* has no parameters */
                   break;
           }
           while (*ptr
                  && (*ptr != ' ')              /* skip to end of */
                  && (*ptr != ',')              /* keyword */
                  && (*ptr != '/')              /* switch */
                  && (*ptr != ')'))             /* or parameter */
               ptr++;
           while (*ptr &&
                  ((*ptr <= ' ') ||             /* skip spaces, junk */
                   (*ptr == ',')))              /* list seperators */
               ptr++;
           if (*ptr == '\0')
               return;                          /* end of the line */
           if ((*ptr == ')') || (*ptr == '/'))
               break;                           /* end of the list */
       }                                        /* scan for more list elements */
   }
}


/* parse command line for /keyword */
void parse_keyword(char *line, topic_str *keywords)
{
   char *ptr, *start, **dest;
   static int index = -1;

   for (ptr = line; *ptr; ptr++)
       *ptr = _tolower(*ptr);         /* force command line lowercase */
   ptr = line;                        /* point to start of line */
   for (;;) {                         /* search for /keyword until end of line */
       if (index == TOPIC_SIZE)
           return;                    /* exit if we can't hold any more */
       ptr = strchr(ptr, '/');        /* search for switch start */
       if (ptr == NULL)
           return;                    /* no more switches */
       while (isspace(*++ptr));       /* skip spaces */
       if (*ptr != 'k')               /* keyword is unique to one character */
           continue;                  /* not /keyword, keep scanning */
       do
           ptr++;
       while ((*ptr != '=')           /* skip to the keyword/parameter */
              && (*ptr != ':'));      /* separator character */
       while (isspace(*++ptr));       /* skip spaces */
       if (*ptr == '(')               /* if start of list */
           while (isspace(*++ptr));   /* skip spaces */
       index++;                       /* next keywords structure */
       keywords[index].used = TRUE;   /* this keyword index is used */
       for (;;) {                     /* parse all /keyword list elements */
           if (*ptr == 'e')           /* "end" or "exclude" */
               ptr++;                 /* so we match on 'n' or 'x' */
           switch (*ptr) {
               case 't':                        /* text */
                   ptr = get_text(&keywords[index].text, ptr);
                   break;
               case 'n':                        /* end */
                   get_text(&keywords[index].end, ptr);
                   break;
               case 'x':                        /* exclude */
                   keywords[index].exclude = TRUE; /* has no parameters */
                   break;
           }
           while (*ptr
                  && (*ptr != ' ')              /* skip to end of */
                  && (*ptr != ',')              /* keyword */
                  && (*ptr != '/')              /* switch */
                  && (*ptr != ')'))             /* or parameter */
               ptr++;
           while (*ptr &&
                  ((*ptr <= ' ') ||             /* skip spaces, junk */
                   (*ptr == ',')))              /* list seperators */
               ptr++;
           if (*ptr == '\0')
               return;                          /* end of the line */
           if ((*ptr == ')') || (*ptr == '/'))
               break;                           /* end of the list */
       }                                        /* scan for more list elements */
   }
}


void parse_switch(char *name, switch_str sw[], int def_sw, char *def_value)
{
   int status, ind;
   short leng;
   static char value[100];
   $DESCRIPTOR(switch_dsc, value);

   for (ind = 0; sw[ind].name; ind++)  /* find end of sw[] */
       if (strcmp(sw[ind].name, name) == 0)  /* or a pre-existing switch */
           break;

   if (sw[ind].name == NULL) {         /* register name first time */
       sw[ind].name = calloc(strlen(name) + 1, sizeof(char));
       strcpy(sw[ind].name, name);
       sw[ind].state = def_sw;         /* and set its default state */
   }

   if (def_sw)    /* if default on, remember if it's turned off */
       sw[ind].state &= (cli$present(descr(name)) & 1);
   else           /* if default off, remember if it's turned on */
       sw[ind].state |= (cli$present(descr(name)) & 1);

   status = cli$get_value(descr(name), &switch_dsc, &leng);
   if (status & 1) {
       value[leng] = '\0';
       /* if this is the first value or a non-default value, save it */
       if ((sw[ind].value == NULL) || strcmp(def_value, value)) {
           if (*value == '"') {  /* remove quotes from quoted string */
             strncpy(value, value + 1, leng - 2);
             value[leng - 2] = '\0';
           }
           sw[ind].value = calloc(strlen(value) + 1, sizeof(char));
           strcpy(sw[ind].value, value);
       }
   }
   else    /* hack around CLI bug that doesn't do selector.end right */
       if (def_value) {  /* no value, but default exists */
           sw[ind].value = calloc(strlen(def_value) + 1, sizeof(char));
           strcpy(sw[ind].value, def_value);
           sw[ind].state = def_sw;
       }
}


void parse_commands(dsc *input, switch_str sw[])
{
   cli$dcl_parse(input, index_commands);

   parse_switch("candidates", sw, 0, 0);
   parse_switch("character", sw, 0, 0);
   parse_switch("dash", sw, 0, 0);
   parse_switch("default_topic", sw, 1, 0);
   parse_switch("equal", sw, 0, 0);
   parse_switch("ff", sw, 0, 0);
   parse_switch("field", sw, 0, 0);
   parse_switch("field.position", sw, 0, 0);
   parse_switch("field.size", sw, 0, 0);
   parse_switch("file", sw, 0, 0);
   parse_switch("helpfile.selector", sw, 0, 0);
   parse_switch("helpfile.title", sw, 0, 0);
   parse_switch("keyword", sw, 0, 0);
   parse_switch("line", sw, 0, 0);
   parse_switch("link", sw, 0, 0);
   parse_switch("link.sort", sw, 0, 0);
   parse_switch("max_topics", sw, 1, "6");
   parse_switch("minimum_word", sw, 1, "3");
   parse_switch("noise", sw, 0, 0);
   parse_switch("numbers", sw, 1, 0);
   parse_switch("output", sw, 0, 0);
   parse_switch("paragraph", sw, 0, 0);
   parse_switch("punctuation", sw, 0, 0);
   parse_switch("selector.text", sw, 0, 0);
   parse_switch("selector.end", sw, 1, " -->");
   parse_switch("selector.both", sw, 0, 0);
   parse_switch("selector.ignore", sw, 0, 0);
   parse_switch("sequential", sw, 0, 0);
   parse_switch("version", sw, 1, 0);
   parse_switch("whole", sw, 0, 0);
   parse_switch("word_length", sw, 1, "20");

   if (switch_present("keyword")) {
       keyword_index = -1;
       parse_keyword(input->dsc$a_pointer, keywords); /* fill keywords[] */
   }

   parse_topic(input->dsc$a_pointer, topics); /* parse the command line and fill topics */
}


int switch_present(char *name)
{
   int ind;

   for (ind = 0; switches[ind].name; ind++)
       if (strcmp(name, switches[ind].name) == 0)
           break;
   if (switches[ind].name == NULL) return (FALSE);
   return (switches[ind].state);
}


char *switch_value(char *name)
{
   int ind;

   for (ind = 0; switches[ind].name; ind++)
       if (strcmp(name, switches[ind].name) == 0)
           break;
   return (switches[ind].value);
}


void *my_realloc(void *mem, int size)
{
   if (mem == (void *) 0)
       return ((void *) malloc(size));
   else
       return((void *) realloc(mem, size));
}