news.utdallas.edu!wupost!uunet!caen!uvaarpa!mmdf Tue Dec 15 10:08:50 CST 1992
Article: 29 of comp.lang.perl
Xref: feenix.metronet.com comp.lang.perl:29
Newsgroups: comp.lang.perl
Path: feenix.metronet.com!news.utdallas.edu!wupost!uunet!caen!uvaarpa!mmdf
From: Dov Grobgeld <
[email protected]>
#Subject: Re: Statistic routines ?
Message-ID: <
[email protected]>
Sender:
[email protected] (Mail System)
Reply-To:
[email protected]
Organization: The Internet
Date: Mon, 14 Dec 1992 07:56:02 GMT
Lines: 270
For all it's worth. Here are a couple of routines that I have written
that I find very useful in my data manipulations.
<<File: histbin>>
#/usr/local/bin/perl
#############################################################################
# histbin
#
# version 0.1
#
# Bin data for a histogram.
#
# Dov Grobgeld
# Department of Chemical Physics
# The Weizmann Institute of Science
# Rehovot, Israel
# Email:
[email protected]
#
# 1992-06-16
#############################################################################
# Get options
while (($_ = $ARGV[0]) =~ /^-/) {
shift;
if (/-nBins$/i) {
$nBins= $ARGV[0];
shift;
}
elsif (/-intv$/i) {
$binMin= $ARGV[0]; shift;
$binMax= $ARGV[0]; shift;
}
elsif (/-n(orm)?/i) {
$normalize++;
}
elsif (/-h(elp)?$|-u(sage)?$/i) {
print<<EOH
histbin - Bins data according to user specifications
Syntax:
histbin [-nBins n] [-intv min max] [-norm] <file>
where <file> is a text file containing a column of values to bin.
Options:
-nBins n Number of bins. Default is n=100.
-intv min max Interval for binning. Default is min=0 and max=1.
-norm Normalize the histogram.
EOH
;
exit;
}
}
# Set default values
$binMin=0.0 unless defined $binMin;
$binMax=1.0 unless defined $binMax;
$nBins=100 unless defined $nBins;
# Construct index array...
for $i (0..$nBins-1) {
$xBins[$i]= $binMin + (1.0 * $i / $nBins) * ($binMax - $binMin);
}
# Clear the bin array
for $i (0..$nBins-1) {
$binCount[$i]=0;
}
# Read the data file
while (<>) {
next if (/^#/); # Skip comments
($x=$_) =~ s/^\s+//; $x=~ s/\n//;
next unless $x=~ /^\d+(\.\d*)?([de][\+\-]?\d+)?$/o;
if (++$numEntries % 100 == 0) {printf STDERR ("%5d\r",$numEntries); }
$binIdx= int(1.0 * ($x-$binMin) * $nBins / ($binMax - $binMin));
$binCount[$binIdx]++ unless $binIdx < 0;
if ($binIdx >= $nBins || $binIdx < 0) {
printf STDERR "Warning! Value $x outside of binning range\n";
}
}
# print the data
for $i (0..$nBins-1) {
if ($normalize) {$binCount[$i] = $binCount[$i]/$numEntries; }
printf ("%12.8f %15.8g\n",$xBins[$i], $binCount[$i]);
}
print STDERR "Done binning $numEntries values!\n";
<<File: histrebin>>
#/usr/local/bin/perl
##########################################################################
# histrebin
#
# version 0.1
#
# Reads a file of bin values and frequency count and rebins it by
# a user defined factor. Optionally normalizes the histogram.
#
# Dov Grobgeld
# Department of Chemical Physics
# The Weizmann Institute of Science
# Rehovot, Israel
# Email:
[email protected]
#
# 1992-06-16
##########################################################################
# Get options
while (($_ = $ARGV[0]) =~ /^-/) {
shift;
if (/-n(orm)?$/) {
$normalize++;
}
elsif (/-s(trip)?$/) {
$stripComment++;
}
elsif (/-h(elp)?$|-u(sage)$/) {
print <<EOHELP
histrebin - rebin a histogram
Syntax:
histrebin [-n] [-s] <file> <factor>
where <file> is the file to rebin by a a factor of <factor>. <factor>
will default to 1, if left out.
histrebin will sum the values of each <factor> bins and create one
new bin with the accumulated sum.
Options:
-n will cause the histogram to be normalized.
-s will strip comments.
Examples:
1. Rebin a histogram by a factor of 4. This would e.g. create 25
bins instead of 100.
histrebin binned100.dat 4 > binned25.dat
2. Sum all the 100 bins in the file binned100.dat
histrebin binned100.dat 100
3. Normalize the histogram in binned100.dat
histrebin -norm binned100.dat > binned100norm.dat
EOHELP
; exit;
}
else {
die "Unknown option $_!\n";
}
}
$infile=shift;
open (IN, $infile) || die "No such file $infile!\n";
($factor=shift) || ($factor=1);
if ($factor==0) { die "Invalid factor!\n"; }
# If normalize then go through the whole file and sum the histogram...
$totalsum=0.0;
if ($normalize) {
while (<IN>) {
next if (/^#/);
if (/^\s*(\S+)\s+(\S+)/) { $totalsum += $2; }
}
close(IN);
open(IN, $infile);
}
$bunchindex=0; # Number of bins summed into the bunch
$bunchsum=0; # Sum of all entries in the bin
$binlow=0; # Lower limit of the bin
while (<IN>) {
if (/^#/) {
print unless ($stripComment);
}
elsif (/^\s*(\S+)\s+(\S+)/) { # Assume naively that these are numbers...
$bunchindex++;
$binlow = 0.0 + $1 if ($bunchindex == 1); # Convert to floating point
$bunchsum += $2;
if ($bunchindex == $factor) {
$bunchsum= 1.0 * $bunchsum/$totalsum if $normalize;
printf("%8.5f %10.8g\n", $binlow, $bunchsum);
$bunchsum=0;
$bunchindex=0;
}
}
}
<<File: cutcol>>
#/usr/local/bin/perl
##############################################################################
# cutcol
#
# Version 0.1
#
# Cut columns out of a data file. Great as a prefilter for xgraph.
#
# Dov Grobgeld
# Email:
[email protected]
# 1992-06-16
###############################################################################
while (($_= $ARGV[0]) =~ /^-/) {
shift;
if (/-c$/) { # Other columns than default ones
$col1= $ARGV[0]; shift;
$col2= $ARGV[0]; shift;
}
elsif (/-s$/) {
$strip++;
}
elsif (/^-h(elp)?$|^-u(sage)?$/) {
print <<EOH
cutcol - Cuts 2 columns out of a file
Syntax:
cutcol [-c n1 n2] [-h] [-s]
Options:
-c n1 n2 Specify columns to cut out other than the default n1=1, n2=2
-h This help file
-s Strip comments
EOH
;
exit;
}
}
# Default values
$col1=1 unless defined $col1;
$col2=2 unless defined $col2;
$col1--; $col2--; # Normalize to zero
while(<>) {
if (/^\#/) {
print unless $strip;
next;
}
s/^\s+//; # Strip leading spaces
@columns= split;
print "$columns[$col1] $columns[$col2]\n";
}
--
___ ___
/ o \ o \
Dov Grobgeld ( o o ) o |
The Weizmann Institute of Science, Israel \ o /o o /
"Where the tree of wisdom carries oranges" | | | |
_| |_ _| |_