#/usr/local/bin/perl
# gopherclone - clone gophers

# usage:
# gopherclone [www style gopher reference]
# gopherclone gopher://gopher.msen.com:70/cicnet

# original NNTP client suggested by eci386!clewis
# socket code mailed to me by [email protected] (Carl M. Fongheiser)
# adaptation for gopher by [email protected] (Edward Vielmetti)
# modification to indexer by [email protected] (Bob Alberti)

# Configuration information -- change to reflect your site.

$_ = $ARGV[0] ? $ARGV[0] : 'gopher://gopher.micro.umn.edu:70/';

#If an argument exists, use it, otherwise use default

($service, $host, $port, $path) = (/^(gopher:\/\/)([^:]+):(\d+)(.*)/);

#If debug = 0, gopherclone runs silent.  =1 is a verbose run.  Commented
#debug lines are annoyingly thorough

$DEBUG = 1;              #set this to 0 for silent operation

if ($host && $port && $path) {
                        $DEBUG && print "host=$host; port=$port; path=$path\n";
                        # Here's how to make your own socket.ph
                        # cp /usr/include/sys/socket.h socket.h
  require 'socket.ph';  # h2ph socket
  chop($hostname = `hostname`);        # get host name in variable

  ($N) = &tcpconnect($host, $hostname);# open connection
  if ($path eq "/") {
     $path = "";
  }
  &gopherlevel($host, $hostname, $path, N); # clone the gopher

  close(N);                            # close the connection.  NOTHING TO IT!
}
else {
  print "Command format:\n\n";
  print "   gopherclone service://host.name:port/path/\n\n";
  print "If a directory in the path includes multiple words separated by spaces,\n";
  print "(i.e. /path name/), surround the parameter string with single quotes:\n\n";
  print "   goppherclone 'service://host.name:port/path name/'\n\n";

}

sub gopherlevel {          # Build a level of gopher directory before recursion
  local($host, $hostname, $path, $N) = @_;
                          $DEBUG && print "sending path=$path\n";
  send(N,"$path\r\n",0);
                          $DEBUG && print STDERR "$path\r\n";
  local($dirnum, $docnum, $i, @doc, @dir); #avoid scoping errors
  @doc = 0;               #call me a fuddy-duddy but I like to Know
  @dir = 0;

  while(<N>)  {                 #While receiving data
     chop;chop;                 # trim data
     next if /^[\. ]*$/;        # quit if a period
     s/^(.)// && ( $type = $1); # otherwise Type is first character
     @G= split(/\t/);           # and split other fields on tabs

                                #$DEBUG &&  print "Type=$type\n";
                                #$DEBUG &&  print "Name=$G[0]\n";
                                #$DEBUG &&  print "Path=$G[1]\n";
                                #$DEBUG &&  print "Host=$G[2]\n";
                                #$DEBUG &&  print "Port=$G[3]\n";

     next if (($host ne $G[2]) || ($G[3] ne $port)); # if a link, skip

     if ($type == 1)  {         # Add directories to the list of directories
        $dirnum += 1;
        $dir[$dirnum] = $G[1];  # to be built after all information received
                     $DEBUG && print "$dirnum: $dir[$dirnum]\n";
     }

     if ($type == 0) {          # Add documents to the list of items
        $docnum += 1;           #$DEBUG && print "document\n";
        $doc[$docnum] = $G[1];  # to be fetched after all information received
                    # $DEBUG && print "$docnum: $doc[$docnum]\n";
     }
  }
  close(N);

                                #$DEBUG && print "\n";

  for ($i = 1; $i <= $docnum; $i++) {   #Documents first, they're easy
     @path = split('/',$doc[$i]);       #split along slashes
     $filename = $path[$#path];         #take last item as filename
     $filename = $filename ? $filename : "/";
     open(FILE, ">$filename") || die "Couldn't open new file $filename: $!\n";

     ($N) = &tcpconnect($host, $hostname);

     if ($N) {                    # If connection good
        send(N,"$doc[$i]\r\n", 0) || die "Send $d to $host barfed with: $!\n";
        $DEBUG && print "Receiving $filename\n";
        while (<N>) {
                                  #$DEBUG && print $_;
           next if /^[\.]*$/;     #loop til lone period
           print FILE $_;         # Put the text in the file
        }
        close(FILE);
     }
     else {
        die "Couldn't open tcp connection $N: $!\n";
     }
     close(N);
  }

  for ($i = 1; $i <= $dirnum; $i++) {     # Make directories
     @path = split('/',$dir[$i]);         # split off leading entries in path;
     $dirname = $path[$#path];            # take last item as name
                             $DEBUG && print "dirname: $dirname\n";
     $_ = $dirname;                       #Bah, this is ungraceful, but
     if (/^\S/) {                         #sometimes $dirname is blank.
        mkdir ($dirname, 0xfff) || die print "Mkdir $dirname: $!\n";
     }
     else {
          next;
     }
     chdir ($dirname)        || die print "Chdir $dirname: $!\n";

     ($N) = &tcpconnect($host, $hostname);

     if ($N) {
        &gopherlevel($host, $hostname, $dir[$i], N);
        sleep(2);     #arbitrary sleeps give sockets time to close
        chdir("..")          || die print "chdir up: $!\n";
     }
     else {
        die "Couldn't open tcp connection $N: $!\n";
     }
     close(N);
  }
}


sub tcpconnect {                    #Get TCP info in place
  local($host, $hostname) = @_;
  $sockaddr = 'S n a4 x8';

                           #$DEBUG && print "host: $host, me: $hostname\n";

  ($name,$aliases,$proto) = getprotobyname('tcp');
  ($name,$aliases,$port) = getservbyname($port, 'tcp')
       unless $port =~ /^\d+$/;
  ($name,$aliases,$type,$len,$thisaddr) = gethostbyname($hostname);
  ($name,$aliases,$type,$len,$thataddr) = gethostbyname($host);

  $this = pack($sockaddr, &AF_INET, 0, $thisaddr);
  $that = pack($sockaddr, &AF_INET, $port, $thataddr);

  sleep(2);

  socket(N, &PF_INET, &SOCK_STREAM, $proto) || die "socket: $!";
  bind(N, $this)                            || die "bind: $!";
  connect(N, $that)                         || die "connect: $!";

  return(N);
}