#!/usr/bin/perl -w
use strict;
use Compress::Zlib ();
# Copyright (C) 2004 Nicholas Clark
# Distribute with same terms as perl5 - GPL or Artistic licence.
# Decompression code written while reading Compress::LZW by
# Sean O'Rourke & Matt Howard, so strongly influenced by it.
# Using chunks of 4 and 1 bits is not as efficient as 8 bits.
# TODO Wacky cominations such as 9 and 10 bits, might turn out to be more
# effective than bytes
my $bits = 8;
sub init_output {
[""];
}
# Find the number of bits needed for this number
sub bits_needed {
(unpack "b32", pack "V", $_[0]) =~ /(.*1)/ ? length $1 : die $_[0];
}
open C, ">C";
open D, ">D";
sub output {
my ($state, $code, $max_code) = @_;
die "Code empty" unless length $code;
die "Code $code too big" if $code > 65535;
my $b = bits_needed($max_code);
my $crossover = 1 << ($b - 1);
if ($code < $crossover && $code > ($max_code - $crossover)) {
print C "$code $b $crossover $max_code save\n";
--$b;
} else {
print C "$code $b $crossover $max_code\n";
}
my $new = unpack "b$b", pack "V", $code;
$state->[1] .= $new;
my $l = length $state->[1];
if ($l > 8) {
# Rip multiples of 8 bits off the front
$state->[0] .= pack "b*", substr $state->[1], 0, $l & ~7, "";
}
}
sub final {
my $state = shift;
return $state->[0] .= pack "b*", $state->[1];
}
sub init_input {
["", $_[0]];
}
sub getbits {
my ($state, $want) = @_;
my $l = length $state->[0];
if ($l < $want) {
return if !length $state->[1]; # EOF
my $add = 1 + (($want - $l) >> 3);
# warn "Getting $add $want $l";
# Put some more bits on the bit accumulator
$state->[0] .= unpack "b*", substr $state->[1], 0, $add, "";
}
return substr $state->[0], 0, $want, "";
}
sub input {
my ($state, $max_code) = @_;
my $b = bits_needed($max_code) - 1;
my $bits = getbits ($state, $b);
return unless defined $bits;
# warn "$state->[0] ", length $state->[0];
my $code = unpack "V", pack "b32", $bits;
# warn "$code '$state->[0]'";
my $crossover = 1 << $b;
if ($code <= ($max_code - $crossover)) {
$code += $crossover if getbits ($state, 1);
++$b; print D "$code $b $crossover $max_code\n";
} else {
++$b; print D "$code $b $crossover $max_code save\n";
}
$code;
}
sub compress {
my $input;
my $output = init_output;
my $next_code = 1<<$bits;
# TODO - map these to a frequency table with least likely bytes (or nibbles or
# bits) first.
my %codes;
if ($bits == 8) {
$input = shift;
$codes{chr ($next_code - $_)} = $_ - 1 foreach 1..$next_code;
} elsif ($bits == 4) {
$input = unpack "h*", shift;
$codes{sprintf "%x", ($next_code - $_)} = $_ - 1 foreach 1..$next_code;
} elsif ($bits == 1) {
$input = unpack "b*", shift;
@codes{1, 0} = (0, 1);
} else {
die "Can't do bits $bits";
}
my $accumulator;
my $pos = -length $input;
while ($pos) {
$accumulator .= substr($input, $pos++, 1);
if (exists $codes{$accumulator}) {
# good
next;
}
# Oops. One too many.
$codes{$accumulator} = $next_code;
# Back up
--$pos; chop $accumulator;
output($output, $codes{$accumulator}, $next_code);
$next_code++;
# Yes, we go round the first bit of the loop again once too many.
$accumulator = '';
}
output($output, $codes{$accumulator}, $next_code);
return final($output);
}
sub uncompress {
my $input = init_input @_;
my $output;
my %codes;
my $next_code = 1<<$bits;
# TODO - map these to a frequency table with least likely bytes (or nibbles or
# bits) first.
if ($bits == 8) {
$codes{$_ - 1} = chr ($next_code - $_) foreach 1..$next_code;
} elsif ($bits == 4) {
$codes{$_ - 1} = sprintf "%x", ($next_code - $_) foreach 1..$next_code;
} elsif ($bits == 1) {
@codes{1, 0} = (0, 1);
} else {
die "Can't do bits $bits";
}
my $prev = $output = $codes{input ($input, $next_code)};
while (1) {
# Not quire sure why we need the +1
my $code = input ($input, 1 + $next_code);
last unless defined $code;
if (exists $codes{$code}) {
my $this = $codes{$code};
$codes{$next_code++} = $prev . substr ($this, 0, 1);
$prev = $this;
} else {
die "$next_code $code" unless $next_code == $code;
$prev .= substr ($prev, 0, 1);
$codes{$next_code++} = $prev;
}
$output .= $prev;
}
if ($bits == 8) {
return $output;
} elsif ($bits == 4) {
pack "h*", $output;
} elsif ($bits == 1) {
pack "b*", $output;
}
}
sub test {
my ($name, $input) = @_;
my $compress = compress $input;
my $def = Compress::Zlib::compress($input);
printf "We manage %d, Zlib managed %d\n", length $compress, length $def;
#print "$compress\n";
my $output = uncompress $compress;
printf "Output Was %d now %d\n", length $input, length $output;
print $output eq $input ? "Pass\n" : "Fail\n";
}
# Test code - compress ourselves.
seek DATA, 0, 0 or die $!;
undef $/;
test ("self", <DATA>);
open FH, $^X or die;
test ($^X,<FH>);