#! /usr/bin/perl
#
# reads a tin filter file with regexp filters on STDIN and turns all case
# insensitive regexp into case sensitive ones whenever possible, as case
# sensitive regexp are (a bit) faster.
#
# 2000-04-27 <
[email protected]>
#
# NOTE: the case= line must come before any line with a regexp pattern,
# (that is the order tin saves the filter file, if you created the
# filter by hand and never let tin rewrite the file, you might want to
# check that first)
#
# NOTE: don't use opt-case.pl on wildmat filters, transform them into regexp
# filter via w2r.pl first
# version number
# $VERSION = "0.2.3";
# perl 5 is needed for lookahead assertions and perl < 5.004 is known to be
# buggy
require 5.004;
use strict;
use warnings;
my $line;
my $mod=""; # (?i) modifier
while (defined($line = <>)) {
chomp $line;
# ignore comments
if ($line =~ m/^[#\s]/o) {
print "$line\n";
next;
}
# skip 'empty' patterns, they are nonsense
next if ($line =~ m/^[^=]+=$/o);
# new scope || case sensitive rule
if ($line =~ m/^group=/o || $line =~ m/^case=0/) {
$mod=""; # clean modifier
print "$line\n";
next;
}
# case insensitive rule
if ($line =~ m/^case=1/o) {
$mod="(?i)"; # set modifier
print "case=0\n"; # set case to sensitive
next;
}
# check if regexp-line needs (?i)-modifer
# [^\W\d_] is just a charset independent way to look for any
# upper/lowercase letters, this will miss a few possible
# optimizations (on lines with \s, \S, \d, \D as only 'letters') but
# that won't hurt, it just doesn't optimize'em
if ($line =~ m/^(subj|from|msgid(?:|_last|_only)|refs_only|xref|path)=(.*[^\W\d_].*)$/o) {
if ($mod ne "") {
print "# rule rewritten, it might be possible that it can be further optimized\n";
print "# check lines with (?i) if they really need to be case insensitive and if\n";
print "# not remove leading (?i) manually\n";
}
print "$1=$mod$2\n";
next;
}
# other lines don't need to be translated
print "$line\n";
}
__END__
=head1 NAME
opt-case.pl - Optimize case insensitive regexp filters for tin
=head1 SYNOPSIS
B<opt-case.pl> E<lt> I<input> [E<gt> I<output>]
=head1 DESCRIPTION
B<opt-case.pl> reads a L<tin(1)> filter-file (L<tin(5)>) with regexp
filters on STDIN and turns all case insensitive regexp into case
sensitive ones whenever possible, as case sensitive regexp are (a
bit) faster.
=head1 NOTES
The case= line must come before any line with a regexp pattern, (that
is the order L<tin(1)> saves the filter file, if you created the
filter by hand and never let L<tin(1)> rewrite the file, you might
want to check that first).
Don't use B<opt-case.pl> on wildmat filters, transform them into
regexp filter via L<w2r.pl(1)> first.
=head1 AUTHOR
Urs Janssen E<lt>
[email protected]<gt>
=head1 SEE ALSO
L<tin(1)>, L<tin(5)>, L<w2r.pl(1)>
=cut