#!/usr/bin/perl
##
##  sgmlstripper - Strip SGML markup from input.
##
##  by Robert J Seymour <[email protected]>
##     Copyright 1995, 1996, Robert Seymour and Springer-Verlag.
##     All rights reserved.  This program may be distributed and/or
##     modified in electronic form under the same terms as Perl
##     itself.
##
##  CPAN menu:
#
# File Name: sgmlstripper
# File Size in BYTES: 1469
# Sender/Author/Poster: Robert J. Seymour <[email protected]>
# Subject: sgmlstripper - Strip SGML markup from input.
#
# sgmlstripper removes SGML markup tags from input (taken through
# specified files or STDIN).  sgmlstripper uses a
# character-by-character read mode which, though not as fast as a
# regexp, is guaranteed to strip tags which fall across line or
# paragraph boundaries and preserves whitespace so that line numbers
# will be the same (the latter is useful for search engines which
# don't want to index markup, but want line numbers to be preserved).


##  Use STDIN if no files are given
$ARGV[0] = "-" unless @ARGV;

##  Strip out anything contained in an SGML markup tag.  This is not
##  very pretty and rather inefficient, but it does take care of tags
##  which cross line or paragraph boundaries.
foreach $file (@ARGV) {
 open(INPUT,$file);
 while($char = getc(INPUT)) {
   if($char eq "<") {
     IGNORE: for(;;) {
       last IGNORE if (getc(INPUT) eq ">");
     }
   } else {
     print $char;
   }
 }
 close(INPUT);
}