#!/usr/bin/perl

# install in cgi-bin/picknewsgroups
# [email protected]
# v1.0; Fri Apr 21 10:34:19 MDT 1995

require 5.0;

##############################################
# BEGIN CONFIG SECTION
##############################################
$EGREP = 'egrep';                # prefer to use gnu egrep for speed
$SORT  = 'sort -t. -u +0 -1 ';   # maybe +1 -2 if you live in a sad world

# run from cron now and then : nntplist newsgroups > $NGFILE
$NGFILE = '/usr/local/lib/news/newsgroups';

# I'd rather you chose yourself. :-(
$HOST = 'mox.perl.com';
### $HOST = 'localhost';
### ($HOST) = gethostbyname(chop($host=`hostname`), $host);
### chop($HOST=`hostname`);

##############################################
# END CONFIG SECTION
##############################################

if ($HOST) { $HOST = "//$HOST"; }

%Groups = ();

die "No $NGFILE: $!" unless -f $NGFILE && -r _;

if ($ENV{HTTP_USER_AGENT} =~ m#Mozilla/1.1#) {
   $file_icon = "<IMG BORDER=0 SRC=internal-news-newsgroup>";
   $dir_icon = "<IMG BORDER=0 SRC=internal-news-newsgroups>";
}

if  (@ARGV) {
   $Target = shift;
} else {
   get_request();

   if (!($Target = $rqpairs{'newsgroup'})) {
       ($Target) = each %rqpairs;  # top level
   }
}


die "Cannot fork: $!" unless defined ($pid = open(INPUT, "-|"));

unless ($pid) {
   if ($Target) {
       exec $EGREP, "^\Q$Target", $NGFILE;
   } else {
       exec "$SORT $NGFILE 2>/dev/null";
   }
   die "exec failed: $!";
}

###############################
# Here's the kinda data structure we're going to be building ...
###############################
#
# %Groups = (
#    alt => {
#       DESC => "useless drivel",
#       NEXT => {
#           activism => {
#               DESC => "Activities for activists",
#               NEXT => {
#                   d =>  {
#                     DESC => "A place to discuss issues in alt.activism",
#                   },
#                   "death-penalty" => {
#                     DESC => "For people opposed to capital punishment",
#                   },
#               },
#           },
#           adoption => {
#               DESC => "For those involved with or contemplating adoption",
#           }
#       },
#    },
#    comp => {
#       DESC => "computer stuff",
#       NEXT => {
#           lang => {
#               DESC => "programming languages",
#               NEXT => {
#                   C           => "it's better than a boot to the head",
#                   perl        => "welcome to Larry's World",
#                   python      => "here be snakes, but not poisonous ones",
#                   tcl         => "but for tk, none would bother",
#                   scheme      => "job security tips in the ivory tower",
#               }
#           }
#       }
#    }
# );

while (<INPUT>) {
   chomp;
   # now for the darned tabs in the newsgroups file
   1 while s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
   ($ng, $desc) = unpack("A24 A*", $_);
   if (length($ng) == 24) {
       $desc =~ s/^(\S+)\s*//;
       $ng .= $1;
   }
   @parts = split /\./, $ng;
   $last = pop @parts;
   $gp = \%Groups;
   for $hier (@parts) {
       if (!ref $gp->{$hier}{NEXT}) {
           $gp->{$hier}{NEXT} = {};
       }
       $gp = $gp->{$hier}{NEXT};
   }
   $gp->{$last}{DESC} = $desc;

}

close(INPUT) || warn "Trouble processing input";

if (!$Target) { read_top_level_descs(); }

#hierprint(\%Groups);

$gp = \%Groups;

# this copies the DESC down a level into a blank key for groups
# like comp.lang.c that have their own raison d'etre
# apart from holding comp.lang.c.moderated, etc.
if ($Target) {
   $what = "Newsgroups under $Target";
   for $hier (split(/\./, $Target)) {
       if ( $gp->{$hier}{DESC} ) {
           $gp->{$hier}{NEXT}{""}{DESC} = $gp->{$hier}{DESC};
       }
       $gp = $gp->{$hier}{NEXT};
   }
       promote($gp);
} else {
   $what = "Top Level News Hierarchies";
}


html_header($what);

if ($Target) {
   print "<H2>$what</H2>\n";
   print "<HR>\n";
}

if (ref $gp) {
   $Target =~ s/\.$//;
   topprint($gp, $Target);
} else {
   print "Couldn't find anything interesting to print.\n"
}
print "<HR>\n";
print "</Body>\n";

html_trailer();

exit;

######################################


sub topprint {
   my ($gp, $hier) = @_;

   if (!$Target) {
       print "<h2>USENET (Big 7) Hierarchies</h2>\n";
       for $group (sort keys %Big7) {
           $fullname = $group;
           $desc = $Groups{$group}{DESC};
           print <<xxxYYYxxx;
<DL>
   <DT> $dir_icon <A HREF="http:$HOST/cgi-bin/picknewsgroups?$fullname">$fullname.*</a>
   <DD> $desc
</DL>
xxxYYYxxx
           delete $Groups{$group};
       }
       print "<hr><h2>Non-USENET Hierarchies</h2>\n";
   }

   for $name (sort keys %$gp) {
       $desc = $gp->{$name}{DESC};
       $kids = $gp->{$name}{NEXT};

       if ($desc eq '?') { $desc = ""; }
       ($fullname = $hier . ($hier && '.' ) . $name) =~ s/\.$//;
                   # the pushed down DESCS are null     ^^^^^^^^
       if (0 and !$Target) {
           print <<xxxYYYxxx;
               <A HREF="http:$HOST/cgi-bin/picknewsgroups?$fullname">$fullname</a>
xxxYYYxxx
           next;
       }

       if ($kids) {
           print <<xxxYYYxxx;
<DL>
   <DT> $dir_icon <A HREF="http:$HOST/cgi-bin/picknewsgroups?$fullname">$fullname.*</a>
   <DD> $desc
</DL>
xxxYYYxxx

           next;
       }

       print <<xxxYYYxxx if $Target;  # skip control and junk
<DL>
   <DT> $file_icon <A HREF="news:$fullname">$fullname</a>
   <DD> $desc
</DL>
xxxYYYxxx
   }
}

# this isn't used, but if you read in the whole tree,
# this will dump it all
sub hierprint {
   my ($gp,$hier) = @_;
   for $name (sort keys %$gp) {
       $desc = $gp->{$name}{DESC};
       $kids = $gp->{$name}{NEXT};
       $fullname = $hier . ($hier && '.' ) . $name;
       if ($kids) {
           $width = 2 * $indent - 40;
           iprint (sprintf("%${width}s %s\n", $fullname, $desc && "<<<$desc>>>"));
           indent();
               hierprint($kids, "$fullname");
           undent();
       } else {
           $width = 2 * $indent - 40;
           iprint (sprintf("%${width}s %s\n", $name, $desc));
       }
   }
}

sub indent { $indent++ }
sub undent { --$indent }
sub iprint {
   print "  " x $indent;
   print @_;
}

# promote alt.barney.die.die.die all the way up
# (actually, there's a bug and i only get the first couple levels)
sub promote {
   my $gp = shift;
   for $name (keys %$gp) {
       if ($gp->{$name}{NEXT}) {
           $children = keys %{$gp->{$name}{NEXT}};
           if ($children == 1 && !$gp->{$name}{DESC}) {
               ($n, $p) = each %{$gp->{$name}{NEXT}};
               delete $gp->{$name};
               $name = "$name.$n";
               %{$gp->{$name}} = %$p;
           } else {
               promote($gp->{$name}{NEXT});
           }
       }
   }
}

sub read_top_level_descs {
   for (qw(comp sci rec misc soc talk news)) { $Big7{$_} ++ }
   local($/) = '';
   while (<DATA>) {
       chomp;
       s/^(\S+)\s*//;
       $Groups{$1}{DESC} = $_;
   }
}

#####################################################################
# The CGI_HANDLERS deal with basic CGI POST or GET method request
# elements such as those delivered by an HTTPD form, i.e. a url
# encoded line of "=" separated key=value pairs separated by &'s

# Routines:
# get_request:  reads the request and returns both the raw and
#               processed version.
# url_decode:   URL decodes a string or array of strings
# html_header:  Transmits a HTML header back to the caller
# html_trailer: Transmits a HTML trailer back to the caller

# Author:
#       James Tappin: [email protected]
#       School of Physics & Space Research University of Birmingham
#       Feb 1993.

# Copyright & Disclaimer.
#       This set of routines may be freely distributed, modified and
#       used, provided this copyright & disclaimer remains intact.
#       This package is used at your own risk, if it does what you
#       want, good; if it doesn't, modify it or use something else--but
#       don't blame me. Support level = negligable (i.e. mail bugs but
#       not requests for extensions)

sub get_request {

   # Subroutine get_request reads the POST or GET form request from STDIN
   # into the variable  $request, and then splits it into its
   # name=value pairs in the associative array %rqpairs.
   # The number of bytes is given in the environment variable
   # CONTENT_LENGTH which is automatically set by the request generator.

   # Encoded HEX values and spaces are decoded in the values at this
   # stage.

   # $request will contain the RAW request. N.B. spaces and other
   # special characters are not handler in the name field.

   if ($ENV{'REQUEST_METHOD'} eq "POST") {
       read(STDIN, $request, $ENV{'CONTENT_LENGTH'});
   } elsif ($ENV{'REQUEST_METHOD'} eq "GET" ) {
       $request = $ENV{'QUERY_STRING'};
   }

   %rqpairs = &url_decode(split(/[&=]/, $request));
}

sub url_decode {

#       Decode a URL encoded string or array of strings
#               + -> space
#               %xx -> character xx

   foreach (@_) {
       tr/+/ /;
       s/%(..)/pack("c",hex($1))/ge;
   }
   @_;
}

sub html_header {

   # Subroutine html_header sends to Standard Output the necessary
   # material to form an HHTML header for the document to be
   # returned, the single argument is the TITLE field.

   local($title) = @_;

   print "Content-type: text/html\n\n";
   print "<html><head>\n";
   print "<title>$title</title>\n";
   print "</head>\n<body>\n";
}

sub html_trailer {

   # subroutine html_trailer sends the trailing material to the HTML
   # on STDOUT.

   local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst)
       = gmtime;

   local($mname) = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
                    "Aug", "Sep", "Oct", "Nov", "Dec")[$mon];
   local($dname) = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri",
                    "Sat")[$wday];



   print "<p>\nGenerated by: <var>$0</var><br>\n";
   printf "Date: %02d:%02d:%02d GMT on $dname $mday $mname $year.<p>\n",
       $hour, $min, $sec;
   print "</body></html>\n";
}

__END__


comp    Topics of interest to both computer professionals and
       hobbyists, including topics in computer science, software
       source, and information on hardware and software systems.

sci     Discussions marked by special and usually practical knowledge,
       relating to research in or application of the established
       sciences.

misc    Groups addressing themes not easily classified under any of the
       other headings or which incorporate themes from multiple
       categories.

soc     Groups primarily addressing social issues and socializing.

talk    Groups largely debate-oriented and tending to feature long
       discussions without resolution and without appreciable amounts
       of generally useful information.

news    Groups concerned with the news network and software themselves.

rec     Groups oriented towards the arts, hobbies and recreational
       activities.

alt     Largest collection of groups outside the big-7 hierarchy.
       Many Usenet sites do not receive some or all of these groups.

bionet  Groups for topics interesting to biologists
       called bionet" originating from net.bio.net and currently carried at
       over a third of the sites participating in the Arbitron readership
       survey.

bit     Groups for redistributions of the more popular BitNet LISTSERV
       mailing lists.

biz     Groups carried and propagated
       by sites interested in the world of business products around them -- in
       particular, computer products and services.  This includes product
       announcements, announcements of fixes and enhancements, product
       reviews, and postings of demo software.


clari   Groups gatewayed from commercial news services and other ``official'' sources.


eye     EYE magazine, based in Toronto, Ontario, Canada, is a free newspaper
       issued once a week.  The scope of articles is greater than just
       Toronto, so the hierarchy is available on a worldwide basis.

gnu     gnUSENET (gnUSENET is Not USENET) is a set of newsgroups
       bi-directionally gated with the Internet mailing lists of the GNU
       Project of the Free Software Foundation.

hepnet  HEPnet is a collections of networks interconnecting high-energy and
       nuclear physics research sites.

ieee    The IEEE newsgroups concern the IEEE -- the Institute of Electrical
       and Electronics Engineers.

info    Collection of mailing lists gatewayed into
       news at the University of Illinois.  The lists are selected based on
       local interests but have proven popular at a number of sites.

k12     K12Net is a collection of conferences devoted to K-12 educational
       curriculum, language exchanges with native speakers, and
       classroom-to-classroom projects designed by teachers.

relcom  Hierarchy of Russian-language newsgroups distributed
       mostly on the territory of the former Soviet Union (non-CIS countries
       included).

u3b     Groups dealing with AT&T 3B{2,5,15,20,4000} computers -- everything
       except for the UNIX PC/3B1.

vmsnet  Hierarchy for topics of interest to VAX/VMS users (but not
       necessarily VMS-specific).

boulder Hierarchy for Boulder, Colorado.

austin  Hierarchy for Austin, Texas.

co      Hierarchy for Colorado, USA.

dfw     Hierarchy for Dallas and Fort Worth, Texas.

cu      Hierarchy of the University of Colorado at Boulder.

cu-den  Hierarchy of the University of Colorado at Denver.

csu     Hierarchy of the University of Colorado state system.

de      Hierarchy for German-language groups.

ny      Hierarchy for New York (state), USA.

nj      Hierarchy for New Jersey, USA.

sun     Sun internal newsgroups.

convex  Convex internal newsgroups.

tx      Hierarchy for Texas, USA.

ucb     Hierarchy of the University of California at Berkeley.

ucsd    Hierarchy of the University of California at San Diego.

ba      Hierarchy for the Bay Area in northern California.

ncar    Hierarchy for the National Center for Atmospheric Research.

bu      Hierarchy for Boston University.

csn     Hierarchy for Colorado SuperNet.

us      Hierarchy for the United States.

uk      Hierarchy for the United Kingdom.

courts  Current court cases.

humanities      Hierarchy for the arts and humanities.

to      Administrative groups with log info on news transfers.