news.utdallas.edu!wupost!uunet!caen!uvaarpa!mmdf Tue Dec 15 10:08:50 CST 1992
Article: 29 of comp.lang.perl
Xref: feenix.metronet.com comp.lang.perl:29
Newsgroups: comp.lang.perl
Path: feenix.metronet.com!news.utdallas.edu!wupost!uunet!caen!uvaarpa!mmdf
From: Dov Grobgeld <[email protected]>
#Subject: Re: Statistic routines ?
Message-ID: <[email protected]>
Sender: [email protected] (Mail System)
Reply-To: [email protected]
Organization: The Internet
Date: Mon, 14 Dec 1992 07:56:02 GMT
Lines: 270

For all it's worth. Here are a couple of routines that I have written
that I find very useful in my data manipulations.

<<File: histbin>>
#/usr/local/bin/perl
#############################################################################
#  histbin
#
#  version 0.1
#
#  Bin data for a histogram.
#
#  Dov Grobgeld
#  Department of Chemical Physics
#  The Weizmann Institute of Science
#  Rehovot, Israel
#  Email: [email protected]
#
#  1992-06-16
#############################################################################

# Get options
while (($_ = $ARGV[0]) =~ /^-/) {
   shift;
   if (/-nBins$/i) {
       $nBins= $ARGV[0];
       shift;
   }
   elsif (/-intv$/i) {
       $binMin= $ARGV[0]; shift;
       $binMax= $ARGV[0]; shift;
   }
   elsif (/-n(orm)?/i) {
       $normalize++;
   }
   elsif (/-h(elp)?$|-u(sage)?$/i) {
       print<<EOH
histbin -  Bins data according to user specifications

Syntax:

   histbin [-nBins n] [-intv min max] [-norm] <file>

   where <file> is a text file containing a column of values to bin.

Options:

   -nBins n        Number of bins. Default is n=100.
   -intv min max   Interval for binning. Default is min=0 and max=1.
   -norm           Normalize the histogram.
EOH
;
   exit;
   }
}


# Set default values
$binMin=0.0 unless defined $binMin;
$binMax=1.0 unless defined $binMax;
$nBins=100 unless defined $nBins;

# Construct index array...
for $i (0..$nBins-1) {
   $xBins[$i]= $binMin + (1.0 * $i / $nBins) * ($binMax - $binMin);
}

# Clear the bin array
for $i (0..$nBins-1) {
   $binCount[$i]=0;
}

# Read the data file
while (<>) {
  next if (/^#/);  # Skip comments
  ($x=$_) =~ s/^\s+//; $x=~ s/\n//;
  next unless $x=~ /^\d+(\.\d*)?([de][\+\-]?\d+)?$/o;
  if (++$numEntries % 100 == 0) {printf STDERR ("%5d\r",$numEntries); }
  $binIdx= int(1.0 * ($x-$binMin) * $nBins / ($binMax - $binMin));
  $binCount[$binIdx]++ unless $binIdx < 0;
  if ($binIdx >= $nBins || $binIdx < 0) {
      printf STDERR "Warning! Value $x outside of binning range\n";
  }
}

# print the data
for $i (0..$nBins-1) {
  if ($normalize) {$binCount[$i] = $binCount[$i]/$numEntries; }
  printf ("%12.8f %15.8g\n",$xBins[$i], $binCount[$i]);
}

print STDERR "Done binning $numEntries values!\n";


<<File: histrebin>>
#/usr/local/bin/perl
##########################################################################
#  histrebin
#
#  version 0.1
#
#  Reads a file of bin values and frequency count and rebins it by
#  a user defined factor. Optionally normalizes the histogram.
#
#  Dov Grobgeld
#  Department of Chemical Physics
#  The Weizmann Institute of Science
#  Rehovot, Israel
#  Email: [email protected]
#
#  1992-06-16
##########################################################################

# Get options
while (($_ = $ARGV[0]) =~ /^-/) {
   shift;
   if (/-n(orm)?$/) {
       $normalize++;
   }
   elsif (/-s(trip)?$/) {
       $stripComment++;
   }
   elsif (/-h(elp)?$|-u(sage)$/) {
   print <<EOHELP
histrebin - rebin a histogram

Syntax:

    histrebin [-n] [-s] <file> <factor>

    where <file> is the file to rebin by a a factor of <factor>. <factor>
    will default to 1, if left out.

    histrebin will sum the values of each <factor> bins and create one
    new bin with the accumulated sum.

Options:

 -n   will cause the histogram to be normalized.
 -s   will strip comments.

Examples:

   1. Rebin a histogram by a factor of 4. This would e.g. create 25
      bins instead of 100.

      histrebin binned100.dat 4 > binned25.dat

   2. Sum all the 100 bins in the file binned100.dat

      histrebin binned100.dat 100

   3. Normalize the histogram in binned100.dat

      histrebin -norm binned100.dat > binned100norm.dat

EOHELP
;   exit;
   }
   else {
       die "Unknown option $_!\n";
   }
}

$infile=shift;
open (IN, $infile) || die "No such file $infile!\n";
($factor=shift) || ($factor=1);
if ($factor==0) { die "Invalid factor!\n"; }

# If normalize then go through the whole file and sum the histogram...
$totalsum=0.0;
if ($normalize) {
   while (<IN>) {
       next if (/^#/);
       if (/^\s*(\S+)\s+(\S+)/) { $totalsum += $2; }
   }
   close(IN);
   open(IN, $infile);
}

$bunchindex=0;     # Number of bins summed into the bunch
$bunchsum=0;       # Sum of all entries in the bin
$binlow=0;         # Lower limit of the bin
while (<IN>) {
   if (/^#/) {
     print unless ($stripComment);
   }
   elsif (/^\s*(\S+)\s+(\S+)/) {   # Assume naively that these are numbers...
       $bunchindex++;
       $binlow = 0.0 + $1 if ($bunchindex == 1);  # Convert to floating point
       $bunchsum += $2;
       if ($bunchindex == $factor) {
           $bunchsum= 1.0 * $bunchsum/$totalsum if $normalize;
           printf("%8.5f %10.8g\n", $binlow, $bunchsum);
           $bunchsum=0;
           $bunchindex=0;
       }
   }
}

<<File: cutcol>>
#/usr/local/bin/perl
##############################################################################
#  cutcol
#
#  Version 0.1
#
#  Cut columns out of a data file. Great as a prefilter for xgraph.
#
#  Dov Grobgeld
#  Email: [email protected]
#  1992-06-16
###############################################################################
while (($_= $ARGV[0]) =~ /^-/) {
   shift;
   if (/-c$/) {    # Other columns than default ones
       $col1= $ARGV[0]; shift;
       $col2= $ARGV[0]; shift;
   }
   elsif (/-s$/) {
       $strip++;
   }
   elsif (/^-h(elp)?$|^-u(sage)?$/) {
       print <<EOH
cutcol  -  Cuts 2 columns out of a file

Syntax:

   cutcol [-c n1 n2] [-h] [-s]

Options:

   -c n1 n2    Specify columns to cut out other than the default n1=1, n2=2

   -h          This help file

   -s          Strip comments
EOH
;
       exit;
   }
}

# Default values

$col1=1 unless defined $col1;
$col2=2 unless defined $col2;

$col1--; $col2--; # Normalize to zero

while(<>) {
   if (/^\#/) {
     print unless $strip;
     next;
   }
   s/^\s+//; # Strip leading spaces
   @columns= split;
   print "$columns[$col1] $columns[$col2]\n";
}



--
                                                       ___   ___
                                                     /  o  \   o \
Dov Grobgeld                                         ( o  o  ) o   |
The Weizmann Institute of Science, Israel             \  o  /o  o /
"Where the tree of wisdom carries oranges"              | |   | |
                                                      _| |_ _| |_