Article 6614 of comp.lang.perl:
Xref: feenix.metronet.com comp.lang.perl:6614
Path: feenix.metronet.com!news.ecn.bgu.edu!usenet.ins.cwru.edu!howland.reston.ans.net!agate!ames!purdue!not-for-mail
From:
[email protected] (Gene Spafford)
Newsgroups: comp.lang.perl
Subject: Re: Perl .newsrc cleanup script?
Date: 10 Oct 1993 10:47:30 -0500
Organization: Department of Computer Sciences, Purdue University
Lines: 175
Message-ID: <
[email protected]>
References: <
[email protected]>
NNTP-Posting-Host: uther.cs.purdue.edu
In-reply-to:
[email protected]'s message of 7 Oct 1993 14:20:18 GMT
The following doesn't do exactly what Steve requested, and it is
*realy* ugly and it has localisms for our news and my preference for
newsgroup order, but....it works.
This takes as argument the .newsrc file and the news/lib/active file.
It updates all the range lists in newsgroup lines, and sorts them
according to a prespecified order (see the pattern list). Then it
writes out a new copy with ! lines at the end.
Happy hacking.
#!/usr/local/perl/perl -s
@Patterns = ( 'announce', 'general', '^[^.]+$', '^serc\.',
'^rec\.humor\.funny', 'comp\.risks', '^purdue\.',
'^in\.', '^ieee\.', '^gnu\.', '^news\.', '^comp\.', '^sci\.',
'^misc\.', '^rec\.', '^soc\.', '^alt\.', '^talk\.');
die "usage: $0 newsrc-file [active-file]\n" unless ($Fname = shift);
open(NRC, "<$Fname") || die "Cannot open $Fname: $!";
(rename ($Fname, "$Fname.old") || die "Cannot rename $Fname: $!")
unless $debug;
$, = $\ = "\n";
@slurp = (<NRC>);
close NRC;
chop @slurp;
warn (scalar(@slurp), " newsgroup lines found in $Fname\n") if $debug;
if ($active = shift) {
open(ACTIVE, "<$active") || die "Cannot open active file $active: $!";
while (<ACTIVE>) {
split;
$Low{@_[0]} = @_[2]+0;
$High{@_[0]} = @_[1]+0;
}
close ACTIVE;
$index = 0; until ($index > $#slurp) {
($group) = $slurp[$index] =~ m/^(\S+)[:!]/;
next if defined $Low{$group};
warn "Deleting bogus group: $group\n";
splice (@slurp, $index, 1) && redo;
} continue {++$index;}
for ($_ = 0; $_ <= $#slurp; ++$_) {
($group, $sep, $numbs) =
$slurp[$_] =~ m/^(\S+)([:!])\s*(\d+.*)$/;
next unless $numbs =~ m/\S/;
warn "Group $group has read $numbs\n" if $debug;
$numbs = &CheckRead($group, $numbs);
warn "New range is $numbs\n\n" if $debug;
$slurp[$_] = "$group$sep $numbs";
}
}
foreach $_ (sort {$a cmp $b;} @slurp) {
push (@On, $_) if m/:/;
push (@Off, $_) unless m/:/;
warn "Don't know what to do about: $_\n" unless m/[:!]/;
}
open(NEW, ">$Fname") || die "Cannot open $Fname: $!" unless $debug;
open(NEW, "|cat") if $debug;
foreach $pattern (@Patterns) {
eval <<"EOF" ;
\$index = $[;
until (\$index > \$#On) {
next unless \$On[\$index] =~ m/$pattern/o;
print NEW splice(\@On, \$index, 1, ());
redo;
} continue {++\$index;}
EOF
warn $@ if $@;
}
print NEW @On, @Off;
close NEW;
exit;
sub CheckRead {
local($group, $numbers) = @_;
local($low, $high, $result) = ($Low{$group}, $High{$group}, '');
local(@bool, $start, $flag, $next, $first, $index);
warn "active file has $low to $high for $group\n"
if $debug;
# Simplest case -- all articles expired (empty group)
return "1-$high" if $high < $low;
# Next simplest case -- I'm already caught up
$numbers =~ m/^\s+((\d+)-(\d+))$/ && $2 == 1 && $3 == $high && return $1;
# We construct a boolean array indicating what has been seen
foreach $index (split(/,/, $numbers)) {
if ($index =~ m/(\d+)-(\d+)/) {
next if $2 < $low;
$first = $1 < $low ? $low : $1;
foreach $_ ($first .. $2) {$bool[$_] = 1;}
} else {
$bool[$index]++ unless $index < $low;
}
}
warn "max article read is $#bool\n" if $debug;
# If the most recently-read article has expired, the case is simple
if ($#bool < $low) {
$low--;
return "1-$low";
}
# If the group has been here awhile, we can reduce the array
elsif ($low > 1) {
$start = $low-1;
@bool = @bool[$start .. $high];
$flag = 1;
$bool[0] = 0;
}
# Now step through for each set of values
while (($next = &NextVal(1)) != -1) {
$start += $next;
$next = &NextVal(0);
if ($next < 0) {
$result .= ",$start-$high" unless $flag;
last;
}
elsif ($next == 0) {
if ($flag) {
$result = $result . ",1-" .
(($start == $low) ? "$start" : $low-1 . ",$start");
$flag = 0;
} else {
$result .= ",$start";
}
$start += 2;
}
else {
$next += $start;
$start = 1 if $flag;
$flag = 0;
$result .= ",${start}-$next";
$start = $next + 2;
}
}
return "1-$high" unless $result =~ m/\S/;
substr($result, 1); # chop the leading ,
}
sub NextVal {
local($val) = @_;
local($count, $tmp) = 0;
local($arrsize) = $#bool;
while ($count <= $arrsize) {
$tmp = shift @bool;
return $count if ($tmp && $val);
return $count unless ($tmp || $val);
$count++;
}
-1;
}
--
Gene Spafford, COAST Project Director
Software Engineering Research Center & Dept. of Computer Sciences
Purdue University, W. Lafayette IN 47907-1398
Internet:
[email protected] phone: (317) 494-7825