Article 6614 of comp.lang.perl:
Xref: feenix.metronet.com comp.lang.perl:6614
Path: feenix.metronet.com!news.ecn.bgu.edu!usenet.ins.cwru.edu!howland.reston.ans.net!agate!ames!purdue!not-for-mail
From: [email protected] (Gene Spafford)
Newsgroups: comp.lang.perl
Subject: Re: Perl .newsrc cleanup script?
Date: 10 Oct 1993 10:47:30 -0500
Organization: Department of Computer Sciences, Purdue University
Lines: 175
Message-ID: <[email protected]>
References: <[email protected]>
NNTP-Posting-Host: uther.cs.purdue.edu
In-reply-to: [email protected]'s message of 7 Oct 1993 14:20:18 GMT

The following doesn't do exactly what Steve requested, and it is
*realy* ugly and it has localisms for our news and my preference for
newsgroup order, but....it works.

This takes as argument the .newsrc file and the news/lib/active file.
It updates all the range lists in newsgroup lines, and sorts them
according to a prespecified order (see the pattern list).  Then it
writes out a new copy with ! lines at the end.

Happy hacking.

#!/usr/local/perl/perl -s

@Patterns = ( 'announce', 'general', '^[^.]+$', '^serc\.',
       '^rec\.humor\.funny', 'comp\.risks', '^purdue\.',
       '^in\.', '^ieee\.', '^gnu\.', '^news\.', '^comp\.', '^sci\.',
       '^misc\.', '^rec\.', '^soc\.', '^alt\.', '^talk\.');


die "usage: $0 newsrc-file [active-file]\n" unless ($Fname = shift);
open(NRC, "<$Fname") || die "Cannot open $Fname: $!";
(rename ($Fname, "$Fname.old") || die "Cannot rename $Fname: $!")
       unless $debug;
$, = $\ = "\n";


@slurp = (<NRC>);
close NRC;
chop @slurp;
warn (scalar(@slurp), " newsgroup lines found in $Fname\n") if $debug;

if ($active = shift) {
   open(ACTIVE, "<$active") || die "Cannot open active file $active: $!";

   while (<ACTIVE>) {
       split;
       $Low{@_[0]} = @_[2]+0;
       $High{@_[0]} = @_[1]+0;
   }
   close ACTIVE;

   $index = 0; until ($index > $#slurp) {
       ($group) = $slurp[$index] =~ m/^(\S+)[:!]/;
       next if defined $Low{$group};
       warn "Deleting bogus group: $group\n";
       splice (@slurp, $index, 1) && redo;
   } continue {++$index;}

   for ($_ = 0; $_ <= $#slurp; ++$_) {
       ($group, $sep, $numbs) =
               $slurp[$_] =~ m/^(\S+)([:!])\s*(\d+.*)$/;
       next unless $numbs =~ m/\S/;
       warn "Group $group has read $numbs\n" if $debug;
       $numbs = &CheckRead($group, $numbs);
       warn "New range is $numbs\n\n" if $debug;
       $slurp[$_] = "$group$sep $numbs";
   }
}

foreach $_ (sort {$a cmp $b;} @slurp) {
   push (@On, $_) if m/:/;
   push (@Off, $_) unless m/:/;
   warn "Don't know what to do about: $_\n" unless m/[:!]/;
}

open(NEW, ">$Fname") || die "Cannot open $Fname: $!" unless $debug;
open(NEW, "|cat") if $debug;

foreach $pattern (@Patterns) {
eval <<"EOF" ;
   \$index = $[;
   until (\$index > \$#On) {
       next unless \$On[\$index] =~ m/$pattern/o;
       print NEW splice(\@On, \$index, 1, ());
       redo;
      } continue {++\$index;}
EOF
warn $@ if $@;
}

print NEW @On, @Off;
close NEW;
exit;


sub CheckRead {
   local($group, $numbers) = @_;
   local($low, $high, $result) = ($Low{$group}, $High{$group}, '');
   local(@bool, $start, $flag, $next, $first, $index);

   warn "active file has $low to $high for $group\n"
       if $debug;

# Simplest case -- all articles expired (empty group)
   return "1-$high" if $high < $low;

# Next simplest case -- I'm already caught up
   $numbers =~ m/^\s+((\d+)-(\d+))$/ && $2 == 1 && $3 == $high && return $1;

# We construct a boolean array indicating what has been seen
   foreach $index (split(/,/, $numbers)) {
       if ($index =~ m/(\d+)-(\d+)/) {
           next if $2 < $low;
           $first = $1 < $low ? $low : $1;
           foreach $_ ($first .. $2) {$bool[$_] = 1;}
       } else {
           $bool[$index]++ unless $index < $low;
       }
   }
   warn "max article read is $#bool\n" if $debug;

# If the most recently-read article has expired, the case is simple
   if ($#bool < $low) {
       $low--;
       return "1-$low";
   }
# If the group has been here awhile, we can reduce the array
   elsif ($low > 1) {
       $start = $low-1;
       @bool = @bool[$start .. $high];
       $flag = 1;
       $bool[0] = 0;
   }

# Now step through for each set of values
   while (($next = &NextVal(1)) != -1) {
       $start += $next;
       $next = &NextVal(0);
       if ($next < 0) {
           $result .= ",$start-$high" unless $flag;
           last;
       }
       elsif ($next == 0) {
           if ($flag) {
               $result = $result . ",1-" .
                   (($start == $low) ? "$start" : $low-1 . ",$start");
               $flag = 0;
           } else {
               $result .= ",$start";
           }
           $start += 2;
       }
       else {
           $next += $start;
           $start = 1 if $flag;
           $flag = 0;
           $result .= ",${start}-$next";
           $start = $next + 2;
       }
   }

   return "1-$high" unless $result =~ m/\S/;
   substr($result, 1);  # chop the leading ,
}

sub NextVal {
   local($val) = @_;
   local($count, $tmp) = 0;
   local($arrsize) = $#bool;

   while ($count <= $arrsize) {
       $tmp = shift @bool;
       return $count if ($tmp && $val);
       return $count unless ($tmp || $val);
       $count++;
   }

   -1;
}

--
Gene Spafford, COAST Project Director
Software Engineering Research Center & Dept. of Computer Sciences
Purdue University, W. Lafayette IN 47907-1398
Internet:  [email protected]   phone:  (317) 494-7825