#!/usr/pkg/bin/perl
#
use strict;
use warnings;
use HTML::Strip;

my $infile = shift;
my $outfile = shift;

$infile = "-" unless $infile;
$outfile = "STDOUT" unless $outfile;

my $hs = HTML::Strip->new(emit_spaces => 0);
my $text_ref = _slurp_file ($infile);
my $clean_text = $hs->parse($$text_ref);
_burp_file ($outfile, \$clean_text);

sub _slurp_file {
 my $infile = shift;
 open( my $fh, $infile ) or die "Unable to open $infile in _slurp_file: $!\n";
 my $text = do { local( $/ ) ; <$fh> } ;
 return \$text;
}

sub _burp_file {
 my $outfile = shift;
 my $text_ref = shift;

 if ($outfile eq "STDOUT") {
   print $$text_ref;
 } else {
   open( my $fh, ">$outfile" ) or die "Unable to open $outfile in _burp_file: $!\n" ;
   print $fh $$text_ref ;
 }
}

=head1 SYNOPSIS

unhtml is a perl script that strips HTML tags from text.

=head1 VERSION

This documentation describes version 1.3 of unhtml

=head1 DESCRIPTION

Uses HTML::Strip to do the real work; this is a wrapper around that
module that allows you to specify command line arguments - standard
input/output is assumed if no args are given. If only one arg is
given, it is assumed to be the input pathname.

=head1 USAGE

Examples (the following have equivalent results):

=over 4

=item unhtml < foo.html > foo.txt

=item unhtml foo.html > foo.txt

=item unhtml foo.html foo.txt

=back

=head1 REQUIRED ARGUMENTS

None. Acts as a STDIN/STDOUT pipe with no arguments.

=head1 OPTIONS

None.

=head1 DEPENDENCIES

Requires HTML::Strip (perl -MCPAN -e 'install HTML::Strip' as root on
any Unix-based OS will work).

=head1 LICENSE

Copyright (c) 2010 [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see
<http://www.gnu.org/licenses/>.

=cut