\raggedright
\documentclass{seminar}
\usepackage{fancybox}
%\usepackage{semhelv}
%\twoup
\slideframe{shadow}
\begin{document}
\begin{slide}


\centerline{\Large \bf PERL}
\bigskip
\centerline{\large \sl a language by Larry Wall}
\bigskip
\centerline{\large \bf Practical Extraction and Report Language}
\bigskip
\centerline{\large \sl or}
\bigskip
\centerline{\large \bf Pathologically Eclectic Rubbish Lister}
\bigskip
\centerline{\large \sl Tom Christiansen}
%\smallskip
%\centerline{\large \sl CONVEX Computer Corporation}
\newslide


\centerline{\bf Overview}
\medskip
\begin{itemize}
\item What is {\sl Perl}: features, where to get it, preview
\item Data Types: scalars and arrays
\item Operators
\item Flow Control
\item Regular Expressions
\item I/O: regular I/O, system functions, directory  access,
\hfil\break formatted I/O
\item Functions and Subroutines: built-in array and  string
functions
\item Esoterica: debugging, packages,  command line options
\item Examples
\end{itemize}
\newslide


\centerline{\bf What is Perl?}
\medskip
\begin{itemize}
\item An interpreted language that looks a lot like C with
built-in  {\sl sed},  {\sl awk},  and {\sl sh}, as well as bits of {\sl
csh}, Pascal, FORTRAN, BASIC-PLUS thrown in.
\item Highly optimized for manipulating printable text,  but
also able to handle binary data.
\item Especially suitable for system management tasks due
to  interfaces to most common system calls.
\item Rich enough for most general programming tasks.
\item {\sl ``A shell for C programmers.''} [Larry Wall]
\end{itemize}
\newslide


%\centerline{\bf Where to get it?}
%\medskip
%\begin{itemize}
%\item Version 4.0 is at all gnu archive sites.
%\item Version 5.0 is in alpha test, details available on
%     comp.lang.perl.
%\end{itemize}
%\newslide


\centerline{\bf Where to find out more?}
\medskip
\begin{itemize}
\item {\it Learning perl} by Randal Schwartz (the llama book)
\item {\it Programming perl} by Larry Wall and Randal Schwartz (the
camel book)
\item The {\it Quick Reference Guide} by Johan Vromans
\item The man pages
\item comp.lang.perl
\end{itemize}
\newslide


\centerline{\bf Features}
\medskip
\begin{itemize}
\item Easy to learn because much derives from existing tools.
\item More rapid program development because it's an
interpreter
\item Faster execution than shell script equivalents.
\item More powerful than {\sl sed}, {\sl awk}, or {\sl sh};
{\sl a2p} and {\sl s2p} translators supplied for your old scripts.
\item Portable across many different architectures.
\item Absence of arbitrary limits like string length.
\item Fits nicely into UNIX tool and filter philosophy.
\item It's free!
\end{itemize}
\newslide


\centerline{\bf Preview}
\medskip
\begin{itemize}
\item It's not for nothing  that  {\sl perl}  is  sometimes
called  the ``pathologically  eclectic rubbish lister.''  Before you
drown in a deluge of features, here's a  simple  example  to  whet
your  appetites  that demonstrates the principal features of the
language, all of which have been present  since  version 1.
\end{itemize}

\begin{verbatim}
   while (<>) {
       next if /^#/;
       ($x, $y, $z) = /(\S+)\s+(\d\d\d)\s+(foo|bar)/;
       $x =~ tr/a-z/A-Z/;
       $seen{$x}++;
       $z =~ s/foo/fear/ && $scared++;
       printf "%s %08x %-10s\n", $z, $y, $x
           if $seen{$x} > $y;
   }
\end{verbatim}
\newslide


\centerline{\bf Data Types}
\smallskip
\begin{itemize}
\item Basic data types are scalars, indexed arrays of
scalars, and associative arrays of scalars.
\item Scalars themselves are either string, numeric,  or boolean,
depending  on  context.   Values  of 0 (zero) and \verb+""+ (null
string) are false; all else is true.
\item Type of variable determined by leading special
character.
\begin{tabbing}
foobar\= foobar\= \kill
\> {\bf \$}  \> scalar \\
\> {\bf @}   \> indexed array (lists) \\
\> {\bf \%}  \> associative array \\
\> {\bf \&}  \> function
\end{tabbing}
\item All data types have their own separate namespaces, as
do labels, functions, and file and directory handles.
\end{itemize}
\newslide


\centerline{\bf Data Types (scalars)}
\medskip
\begin{itemize}
\item Use a \$ to indicate a scalar value
\begin{verbatim}
    $foo = 3.14159;
    $foo = 'red';
    $foo = "was $foo before";
    $host = `hostname`;           # note backticks
    ($foo, $bar, $glarch) = ('red', 'blue', 'green');
    ($foo, $bar) = ($bar, $foo);  # exchange
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf Special Scalar Variables}
\medskip
\begin{itemize}
\item Special scalars are named with punctuation (except
{\bf \$0}).  Examples are
\begin{tabbing}
foob\= foob\=  \kill
\> {\bf \$0}    \> name of the currently executing script \\
\> {\bf \$\_}   \> default for pattern operators and implicit I/O \\
\> {\bf \$\$}   \> the current pid \\
\> {\bf \$!}    \> the current system error message from {\sl errno} \\
\> {\bf \$?}    \> status of last `backtick`, pipe, or system \\
\> {\bf \$$|$}  \> whether output is buffered \\
\> {\bf \$.}    \> the current line number of last input \\
\> {\bf \$[}    \> array base, 0 by default; {\sl awk} uses 1 \\
\> {\bf \$$<$}  \> the real uid of the process \\
%\> {\bf \$(}    \> the real gid of the process \\
\> {\bf \$$>$}  \> the effective uid of the process \\
%\> {\bf \$)}    \> the effective gid of the process \\
\end{tabbing}
\end{itemize}
\newslide


\centerline{\bf Data types (arrays)}
\medskip
\begin{itemize}
\item Indexed arrays (lists); \$ for one scalar element, @ for all
\begin{verbatim}
   $foo[$i+2] = 3;         # set one element to 3
   @foo = ( 1, 3, 5 );     # init whole array
   @foo = ( ) ;            # initialize empty array
   @foo = @bar;            # copy whole @array
   @foo = @bar[$i..$i+5];  # copy slice of @array
\end{verbatim}
\item \$\#ARRAY is index of highest subscript, so the script's
name is {\bf \$0} and its arguments run from \$ARGV[0]  through
\$ARGV[\$\#ARGV], inclusive.
\end{itemize}
\newslide


\centerline{\bf Data types (arrays)}
\medskip
\begin{itemize}
\item Associative (hashed) arrays; \$ for one scalar element,
\% for all
\begin{verbatim}
# 23 more green frogs
   $frogs{'green'} += 23;
# multi-dim array
   $location{$x, $y, $z} = 'troll';
# copy whole %array
   %foo = %bar;
   @frogs{'green', 'blue', 'yellow'} = (3, 6, 9);
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf Special Array Variables}
\medskip
\begin{tabbing}
foobar\= foobarbiggnuma\= \kill
\> {\bf @ARGV}  \> command line arguments \\
\> {\bf @INC}   \> search path for files called with {\bf do} \\
\> {\bf @\_}    \> default for {\bf split} and subroutine parameters \\
\> {\bf \%ENV}  \> the current enviroment; e.g. \$ENV\{'HOME'\} \\
\> {\bf \%SIG}  \> used to set signal handlers \\
\end{tabbing}
\begin{verbatim}
   sub trapped {
       print STDERR "Interrupted\007\n";
       exit 1;
   }
   $SIG{'INT'} = 'trapped';
\end{verbatim}
\newslide


\centerline{\bf Operators}
\medskip
\noindent
{\sl Perl} uses all of C's operators except for type casting and
{\bf `\&'} and {\bf `*'} as address operators, plus these
\begin{itemize}
\item exponentiation:  {\bf **, **=}
\item range operator: {\bf ..}
\begin{verbatim}
   $inheader = 1 if /^From / .. /^$/;
   if (1..10) { do foo(); }
   for $i (60..75) { do foo($i); }
   @new = @old[30..50];
\end{verbatim}
\item string concatenation: {\bf ., .=}
\begin{verbatim}
   $x = $y . &frob(@list) . $z;
   $x .= "\n";
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf Operators (continued)}
\medskip
\begin{itemize}
\item string repetition: {\bf x, x=}
\begin{verbatim}
   $bar = '-' x 72; # row of 72 dashes
\end{verbatim}
\item string tests: {\bf eq}, {\bf ne}, {\bf lt}, {\bf gt}, {\bf le},
{\bf ge}
\begin{verbatim}
   if ($x eq 'foo') { }
   if ($x ge 'red' ) { }
\end{verbatim}
\item file test operators like augmented {\sl /bin/test} tests
work  on strings or filehandles
\end{itemize}
\begin{verbatim}
   if (-e $file) { }    # file exists
   if (-z $file) { }    # zero length
   if (-O LOG) { }      # LOG owned by real uid
   die "$file not a text file" unless -T $file;
\end{verbatim}


\centerline{\bf Flow Control}
\medskip
\begin{itemize}
\item Unlike C, blocks always require enclosing braces {\bf \{\}}
\item {\bf unless} and {\bf until} are just {\bf if} and {\bf while}
negated
\begin{verbatim}
   if (EXPR) BLOCK else BLOCK
   if (EXPR) BLOCK elsif (EXPR) BLOCK else BLOCK
   while (EXPR) BLOCK
   do BLOCK while EXPR
   for (EXPR; EXPR; EXPR) BLOCK
   foreach $VAR (LIST) BLOCK
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf Flow Control (continued)}
\medskip
\begin{itemize}
\item For readability, {\bf if}, {\bf unless}, {\bf while}, and {\bf
until} may be used as trailing statement modifiers as in BASIC-PLUS
\begin{verbatim}
   return -1 unless $x > 0;
\end{verbatim}
\item Use {\bf next} and {\bf last} rather than C's {\bf continue}
and {\bf break}
\item {\bf redo} restarts the current iteration, ignoring the loop
test
\item Blocks (and {\bf next}, {\bf last}, and {\bf redo}) take optional
labels  for clearer loop control, avoiding the use of {\bf goto} to
exit nested loops.
\item No {\bf switch} statement, but it's easy to roll your own
\end{itemize}
\newslide


\centerline{\bf Flow Control (continued)}
\medskip
\begin{itemize}
\item {\bf do} takes 3 forms
\begin{itemize}
\item execute a block
\begin{verbatim}
     do { $x += $a[$i++] } until $i > $j;
\end{verbatim}
\item execute a subroutine
\begin{verbatim}
     do foo($x, $y);
\end{verbatim}
\item execute a file in current context
\begin{verbatim}
     do 'subroutines.pl';
\end{verbatim}
\end{itemize}
\end{itemize}
\newslide


\centerline{\bf Regular Expressions}
\medskip
\begin{itemize}
\item Understands {\sl egrep} regexps, plus
\begin{tabbing}
foob\= foobargnubig\= \kill
\> {\bf $\backslash$w, $\backslash$W} \> alphanumerics plus \_ (and
negation) \\
\> {\bf $\backslash$d, $\backslash$D} \> digits (and negation) \\
\> {\bf $\backslash$s, $\backslash$S} \> white space (and negation) \\
\> {\bf $\backslash$b, $\backslash$B} \> word boundaries (and negation)
\end{tabbing}
\item C-style escapes recognized, like {\bf $\backslash$t,
$\backslash$n, $\backslash$034}
\end{itemize}
\newslide


\centerline{\bf Regular Expressions (continued)}
\medskip
\begin{itemize}
\item Don't  escape  these  characters  for  their  special
meaning:  {\bf ( ) $|$ \{ \} +}
\item Character classes may contain metas, e.g. {\bf [$\backslash$w.\$]}
\item Special variables: {\bf \$\&} means all text matched, {\bf \$`} is
text before match, {\bf \$'} is text after match.
\item Use {\bf $\backslash$1 .. $\backslash$9} within rexprs;
{\bf \$1 .. \$9} outside
\begin{verbatim}
   if (/^this (red|blue|green) (bat|ball) is \1/)
       { ($color, $object) = ($1, $2); }
   ($color, $object) =
       /^this (red|blue|green) (bat|ball) is \1/;
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf Regular Expressions (continued)}
\medskip
\begin{itemize}
\item Substitute and translation operators are like {\sl sed}'s
{\bf s} and {\bf y}.
\begin{verbatim}
   s/alpha/beta/;
   s/(.)\1/$1/g;
   y/A-Z/a-z/;
\end{verbatim}
\item Use =\~{ } and !\~{ } to match against variables
\begin{verbatim}
   if ($foo !~ /^\w+$/) { exit 1; }
   $foo =~ s/\btexas\b/TX/i;
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf I/O}
\medskip
\begin{itemize}
\item Filehandles have their own distinct namespaces, but are
typically  all upper case for clarity.  Pre-defined filehandles are
STDIN, STDOUT, STDERR.
\item Mentioning a filehandle in angle brackets reads next
line in scalar  context, all lines in an array context; newlines are
left intact.
\begin{verbatim}
   $line = <TEMP>;
   @lines = <TEMP>;
\end{verbatim}
\item $<>$ means all files supplied on command  line  (or
STDIN  if none). When used this way, \$ARGV is the current filename.
\end{itemize}
\newslide


\centerline{\bf I/O (continued)}
\medskip
\begin{itemize}
\item When used in a {\bf while} construct, input lines are
automatically assigned to the {\bf \$\_} variable.
\item Usually iterate over file a line at a time, assigning
to  {\bf \$\_} each time and using that as the default operand.
\begin{verbatim}
   while ( <> ) {
       next if /^#/;       # skip comments
       s/left/right/g;     # global substitute
       print;              # print $_
   }
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf I/O (continued)}
\medskip
\begin{itemize}
\item If not using the pseudo-file $<>$, open a filehandle:
\begin{verbatim}
   open (PWD,      "/etc/passwd");
   open (TMP,      ">/tmp/foobar.$$");
   open (LOG,      ">>logfile");
   open (TOPIPE,   "| lpr");
   open (FROMPIPE, "/usr/etc/netstat -a |");
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf I/O (continued)}
\medskip
\begin{itemize}
\item May also use {\bf getc} for character I/O and {\bf read} for raw I/O
\item Access to {\bf eof}, {\bf seek}, {\bf close}, {\bf flock}, {\bf
ioctl}, {\bf fcntl}, and {\bf select} calls for use with filehandles.
\item Access to {\bf mkdir}, {\bf rmdir}, {\bf chmod}, {\bf chown},
{\bf link}, {\bf symlink} (if supported), {\bf stat}, {\bf rename},
{\bf unlink} calls for use with filenames.
\item Pass {\bf printf} a filehandle as its first argument unless
printing to STDOUT
\begin{verbatim}
   printf LOG "%-8s %s: weird bits: %08x\n",
       $program, &ctime, $bits;
\end{verbatim}
\item Associative arrays may be bound to {\bf dbm} files with {\bf
tie()}
\end{itemize}
\newslide


\centerline{\bf System Functions}
\medskip
\noindent
A plethora of functions  from  the  C  library  are  provided  as
built-ins, including most system calls.  These include
\begin{itemize}
\item {\bf chdir}, {\bf chroot}, {\bf exec}, {\bf exit}, {\bf fork},
{\bf getlogin}, {\bf getpgrp}, {\bf getppid}, {\bf kill},  {\bf
setpgrp},  {\bf setpriority}, {\bf sleep}, {\bf syscall}, {\bf system},
{\bf times}, {\bf umask}, {\bf wait}.
\item If your system has Berkeley-style networking, {\bf bind}, {\bf
connect}, {\bf send},  {\bf getsockname},  {\bf getsockopt},  {\bf
getpeername}, {\bf recv}, {\bf listen}, {\bf socket}, {\bf
socketpair}.
\item {\bf getpw*}, {\bf getgr*}, {\bf gethost*}, {\bf getnet*}, {\bf
getserv*}, and {\bf getproto*}.
\item {\bf pack} and {\bf unpack} can be used for manipulating binary
data.
\end{itemize}
\newslide


\centerline{\bf Directory Access}
\medskip
\noindent
Three methods of accessing directories are provided.
\begin{itemize}
\item You may open a pipe from {\sl /bin/ls} like this:
\begin{verbatim}
   open(FILES,"/bin/ls *.c |");
   while ($file = <FILES>) { chop($file); ... }
\end{verbatim}
\item The directory-reading routines are provided as built-ins and
operate  on directory handles.  Supported routines are {\bf opendir},
{\bf readdir}, {\bf closedir}, {\bf seekdir}, {\bf telldir}, and {\bf
rewinddir}.
\end{itemize}
\newslide


\centerline{\bf Directory Access (continued)}
\medskip
\begin{itemize}
\item The easiest way is to use {\sl perl}'s file globbing
notation.   A string  enclosed  in  angle  brackets containing shell
metacharacters evaluates to a list of matching filenames.
\begin{verbatim}
   foreach $x ( <*.[ch]> ) { rename($x, "$x.old"); }
   chmod 0644, <*.c>;
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf Subroutines}
\medskip
\begin{itemize}
\item Subroutines called either with {\bf `do'} operator  or  with {\bf
`\&'}.  Any  of  the  three  principal  data  types may be passed as
parameters or used as a return value.
\end{itemize}
\begin{verbatim}
   do foo(1.43);
   do foo(@list)
   $x = &foo('red', 3, @others);
   @list = &foo(@olist);
   %foo = &foo($foo, @foo);
\end{verbatim}
\newslide


\centerline{\bf Subroutines (continued)}
\medskip
\begin{itemize}
\item Parameters are received by the subroutine in the
special array {\bf @\_}.  If desired, these can be copied to local
variables.  This is especially useful for recursive subroutines.
\vspace{-1 mm}
\begin{verbatim}
   $result = &simple($alpha, $beta, @tutti);
   sub simple {
       local($x, $y, @rest) = @_;
       local($sum, %seen);
       return $sum;
   }
\end{verbatim}
\item Subroutines may also be called indirectly
\vspace{-1 mm}
\begin{verbatim}
   $foo = 'some_routine';
   do $foo(@list)
   ($x, $y, $z) = do $foo(%maps);
\end{verbatim}
\end{itemize}
\newslide


\centerline{\bf Formatted I/O}
\smallskip
\begin{itemize}
\item Besides {\bf printf}, formatted I/O can be done  with
{\bf format} and {\bf write} statements.
\vspace{-.5 mm}
\item Automatic pagination and printing of headers.
\vspace{-.5 mm}
\item Picture description facilitates lining up multi-line
output
\vspace{-.5 mm}
\item Fields in picture may be left or right-justified or
centered
\vspace{-.5 mm}
\item Multi-line text-block filling is  provided,  something like
having a {\bf \%s} format string with a built-in pipe to {\bf fmt}
\vspace{-.5 mm}
\item These special scalar variables are useful:
\vspace{-.5 mm}
\begin{tabbing}
foob\= foobar\= \kill
\> {\bf \$\%}\>  for current page number, \\
\> {\bf \$=} \> for current page length (default 60) \\
\> {\bf \$$-$} \> for lines left on page \\
\end{tabbing}
\end{itemize}
\newslide


\centerline{\bf Formatted I/O (example)}

\begin{verbatim}
# a report from a bug report form; from perl man page
format top =
                     Bug Reports
@<<<<<<<<<<<<<<<<<<<<     @|||      @>>>>>>>>>>>>>>>>>>>>
$system,                  $%,       $date
---------------------------------------------------------