Article 11097 of comp.lang.perl:
Path: feenix.metronet.com!news.ecn.bgu.edu!usenet.ins.cwru.edu!howland.reston.ans.net!europa.eng.gtefsd.com!MathWorks.Com!noc.near.net!newshost.meiko.com!not-for-mail
From:
[email protected] (Mike Stok)
Newsgroups: comp.lang.perl
Subject: soundex.pl
Date: 2 Mar 1994 09:47:16 -0500
Organization: Meiko Scientific, Inc., MA
Lines: 255
Message-ID: <
[email protected]>
[ Archivists note: this item has been replaced with the
latest version, at the authors request. ]
#!/bin/sh
# This is a shell archive (produced by shar 3.49)
# To extract the files from this archive, save it to a file, remove
# everything above the "!/bin/sh" line above, and type "sh file_name".
#
# made 03/24/1994 00:35 UTC by
[email protected]
# Source directory /tmp_mnt/develop/sw/misc/mike/soundex
#
# existing files will NOT be overwritten unless -c is specified
#
# This shar contains:
# length mode name
# ------ ---------- ------------------------------------------
# 1677 -r--r--r-- soundex.pl
# 2408 -r-xr-xr-x soundex.t
#
# ============= soundex.pl ==============
if test -f 'soundex.pl' -a X"$1" != X"-c"; then
echo 'x - skipping soundex.pl (File already exists)'
else
echo 'x - extracting soundex.pl (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'soundex.pl' &&
package soundex;
X
;# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $
;#
;# Implementation of soundex algorithm as described by Knuth in volume
;# 3 of The Art of Computer Programming, with ideas stolen from Ian
;# Phillips <
[email protected]>.
;#
;# Mike Stok <
[email protected]>, 2 March 1994.
;#
;# Knuth's test cases are:
;#
;# Euler, Ellery -> E460
;# Gauss, Ghosh -> G200
;# Hilbert, Heilbronn -> H416
;# Knuth, Kant -> K530
;# Lloyd, Ladd -> L300
;# Lukasiewicz, Lissajous -> L222
;#
;# $Log: soundex.pl,v $
;# Revision 1.2 1994/03/24 00:30:27 mike
;# Subtle bug (any excuse :-) spotted by Rich Pinder <
[email protected]>
;# in the way I handles leasing characters which were different but had
;# the same soundex code. This showed up comparing it with Oracle's
;# soundex output.
;#
;# Revision 1.1 1994/03/02 13:01:30 mike
;# Initial revision
;#
;#
;##############################################################################
X
;# $soundex'noCode is used to indicate a string doesn't have a soundex
;# code, I like undef other people may want to set it to 'Z000'.
X
$noCode = undef;
X
;# main'soundex
;#
;# usage:
;#
;# @codes = &main'soundex (@wordList);
;# $code = &main'soundex ($word);
;#
;# This strenuously avoids $[
X
sub main'soundex
{
X local (@s, $f, $fc, $_) = @_;
X
X foreach (@s)
X {
X tr/a-z/A-Z/;
X tr/A-Z//cd;
X
X if ($_ eq '')
X {
X $_ = $noCode;
X }
X else
X {
X ($f) = /^(.)/;
X tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/;
X ($fc) = /^(.)/;
X s/^$fc+//;
X tr///cs;
X tr/0//d;
X $_ = $f . $_ . '000';
X s/^(.{4}).*/$1/;
X }
X }
X
X wantarray ? @s : shift @s;
}
X
1;
SHAR_EOF
chmod 0444 soundex.pl ||
echo 'restore of soundex.pl failed'
Wc_c="`wc -c < 'soundex.pl'`"
test 1677 -eq "$Wc_c" ||
echo 'soundex.pl: original size 1677, current size' "$Wc_c"
fi
# ============= soundex.t ==============
if test -f 'soundex.t' -a X"$1" != X"-c"; then
echo 'x - skipping soundex.t (File already exists)'
else
echo 'x - extracting soundex.t (Text)'
sed 's/^X//' << 'SHAR_EOF' > 'soundex.t' &&
#!./perl
;#
;# $Id: soundex.t,v 1.2 1994/03/24 00:30:27 mike Exp $
;#
;# test module for soundex.pl
;#
;# $Log: soundex.t,v $
;# Revision 1.2 1994/03/24 00:30:27 mike
;# Subtle bug (any excuse :-) spotted by Rich Pinder <
[email protected]>
;# in the way I handles leasing characters which were different but had
;# the same soundex code. This showed up comparing it with Oracle's
;# soundex output.
;#
;# Revision 1.1 1994/03/02 13:03:02 mike
;# Initial revision
;#
;#
X
require '../lib/soundex.pl';
X
$test = 0;
print "1..13\n";
X
while (<DATA>)
{
X chop;
X next if /^\s*;?#/;
X next if /^\s*$/;
X
X ++$test;
X $bad = 0;
X
X if (/^eval\s+/)
X {
X ($try = $_) =~ s/^eval\s+//;
X
X eval ($try);
X if ($@)
X {
X $bad++;
X print "not ok $test\n";
X print "# eval '$try' returned $@";
X }
X }
X elsif (/^\(/)
X {
X ($in, $out) = split (':');
X
X $try = "\@expect = $out; \@got = &soundex $in;";
X eval ($try);
X
X if (@expect != @got)
X {
X $bad++;
X print "not ok $test\n";
X print "# expected ", scalar @expect, " results, got ", scalar @got, "\n";
X print "# expected (", join (', ', @expect),
X ") got (", join (', ', @got), ")\n";
X }
X else
X {
X while (@got)
X {
X $expect = shift @expect;
X $got = shift @got;
X
X if ($expect ne $got)
X {
X $bad++;
X print "not ok $test\n";
X print "# expected $expect, got $got\n";
X }
X }
X }
X }
X else
X {
X ($in, $out) = split (':');
X
X $try = "\$expect = $out; \$got = &soundex ($in);";
X eval ($try);
X
X if ($expect ne $got)
X {
X $bad++;
X print "not ok $test\n";
X print "# expected $expect, got $got\n";
X }
X }
X
X print "ok $test\n" unless $bad;
}
X
__END__
#
# 1..6
#
# Knuth's test cases, scalar in, scalar out
#
'Euler':'E460'
'Gauss':'G200'
'Hilbert':'H416'
'Knuth':'K530'
'Lloyd':'L300'
'Lukasiewicz':'L222'
#
# 7..8
#
# check default bad code
#
'2 + 2 = 4':undef
undef:undef
#
# 9
#
# check array in, array out
#
('Ellery', 'Ghosh', 'Heilbronn', 'Kant', 'Ladd', 'Lissajous'):('E460', 'G200', 'H416', 'K530', 'L300', 'L222')
#
# 10
#
# check array with explicit undef
#
('Mike', undef, 'Stok'):('M200', undef, 'S320')
#
# 11..12
#
# check setting $soundex'noCode
#
eval $soundex'noCode = 'Z000';
('Mike', undef, 'Stok'):('M200', 'Z000', 'S320')
#
# 13
#
# a subtle difference between me & oracle, spotted by Rich Pinder
# <
[email protected]>
#
CZARKOWSKA:C622
SHAR_EOF
chmod 0555 soundex.t ||
echo 'restore of soundex.t failed'
Wc_c="`wc -c < 'soundex.t'`"
test 2408 -eq "$Wc_c" ||
echo 'soundex.t: original size 2408, current size' "$Wc_c"
fi
exit 0
--
The "usual disclaimers" apply. | Meiko
Mike Stok | 130C Baker Ave. Ext
[email protected] | Concord, MA 01742
Meiko tel: (508) 371 0088 |
Article 11044 of comp.lang.perl:
Path: feenix.metronet.com!news.utdallas.edu!convex!cs.utexas.edu!howland.reston.ans.net!pipex!pipex!not-for-mail
From:
[email protected] (Ian Phillipps)
Newsgroups: comp.lang.perl
Subject: Soundex Function for Perl ??
Supersedes: <
[email protected]>
Date: 27 Feb 1994 21:24:14 -0000
Organization: PIPEX Ltd, Cambridge, UK.
Lines: 38
Message-ID: <
[email protected]>
References: <
[email protected]>
NNTP-Posting-Host: tank.pipex.net
In article <
[email protected]>, Rich Pinder <
[email protected]> wrote:
>There is a 'standard' function to assist in name matching algorighims
>called 'Soundex'.
>
>I believe its in the public domain, and I also am sure the specs of the
>function are available.....but.....
Since it's post-your-soundex time, here's mine. No disclaimers. If it
doesn't work at your site, I'll come out free of charge and fix it. Just
send me the air tickets :-)
Ian
----
[This article supersedes an earlier posting of mine - which failed on
'Lloyd'. Here's a replacement. Thanks to Mike Stok...]
# This implements the "Soundex" algorithm.
# Each argument is treated as a single word, and is reduced to its
# Soundex four character (Z999) code. The resulting code or array of
# codes is passed back as a the result.
#
[email protected] 11 Sep 92
sub soundex
{
local( @res ) = @_;
local($i, $t, $_);
for ( @res )
{
tr/a-zA-Z//cd;
tr/a-zA-Z/A-ZA-Z/s;
($i,$t) = /(.)(.*)/;
$t =~ tr/BFPVCGJKQSXZDTLMNRAEHIOUWY/111122222222334556/sd;
$_ = substr(($i||'Z').$t.'000', 0, 4 );
}
wantarray ? @res : $res[0];
}
1;
Article 10948 of comp.lang.perl:
Path: feenix.metronet.com!news.utdallas.edu!convex!cs.utexas.edu!howland.reston.ans.net!europa.eng.gtefsd.com!news.uoregon.edu!gaia.ucs.orst.edu!ruby.oce.orst.edu!tardis.co.uk!bill
From:
[email protected] (William Hails)
Newsgroups: comp.lang.perl
Subject: Re: Soundex Function for Perl ??
Date: Thu, 24 Feb 94 11:24:04 GMT
Organization: University Computing Services - Oregon State University
Lines: 57
Message-ID: <
[email protected]>
NNTP-Posting-Host: ruby.oce.orst.edu
Originator:
[email protected]
on 23 Feb 1994 11:12:33 -0800
[email protected] (Rich Pinder) wrote:
rpinder> There is a 'standard' function to assist in name matching algorighims
rpinder> called 'Soundex'.
rpinder> I believe its in the public domain, and I also am sure the specs of the
rpinder> function are available.....but.....
rpinder> I'd be interested in knowing if there is any such function already \
compiled
rpinder> that would be accessible from Perl.
Heres one I knocked up a while back, usual disclaimers etc.
8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---
# soundex subroutine
sub soundex {
package soundex;
local ($name) = @_;
local($buf, $c, $pc, $idx) = ("Z000");
$name =~ tr/a-z/A-Z/;
for ($pc = '0'; $name && $idx < 4; substr($name, 0, 1) = '') {
if (($c) = ($name =~ /^([A-Z])/)) {
if ($idx == 0 || ($map{$c} ne '0' && $map{$c} ne $pc)) {
substr($buf, $idx, 1) = $idx ? $map{$c} : $c;
++$idx;
}
$pc = $map{$c};
}
}
$buf;
}
package soundex;
%map = (
A, 0, B, 1, C, 2, D, 3, E, 0, F, 1, G, 2, H, 0, I, 0,
J, 2, K, 2, L, 4, M, 5, N, 5, O, 1, P, 0, Q, 2, R, 6,
S, 2, T, 3, U, 0, V, 1, W, 0, X, 2, Y, 0, Z, 2
);
1;
8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---
--
Cheers
Bill
Bill Hails <
[email protected]>
Tel (UK) 0483 300 200