#!/usr/bin/perl -w
use strict;
use Compress::Zlib ();

# Copyright (C) 2004 Nicholas Clark
# Distribute with same terms as perl5 - GPL or Artistic licence.
# Decompression code written while reading Compress::LZW by
# Sean O'Rourke & Matt Howard, so strongly influenced by it.

# Using chunks of 4 and 1 bits is not as efficient as 8 bits.
# TODO Wacky cominations such as 9 and 10 bits, might turn out to be more
# effective than bytes
my $bits = 8;

sub init_output {
 [""];
}

# Find the number of bits needed for this number
sub bits_needed {
 (unpack "b32", pack "V", $_[0]) =~ /(.*1)/ ? length $1 : die $_[0];
}

open C, ">C";
open D, ">D";
sub output {
 my ($state, $code, $max_code) = @_;
 die "Code empty" unless length $code;
 die "Code $code too big" if $code > 65535;
 my $b = bits_needed($max_code);
 my $crossover = 1 << ($b - 1);
 if ($code < $crossover && $code > ($max_code - $crossover)) {
   print C "$code $b $crossover $max_code save\n";
   --$b;
 } else {
   print C "$code $b $crossover $max_code\n";
 }

 my $new = unpack "b$b", pack "V", $code;
 $state->[1] .= $new;
 my $l = length $state->[1];
 if ($l > 8) {
   # Rip multiples of 8 bits off the front
   $state->[0] .= pack "b*", substr $state->[1], 0, $l & ~7, "";
 }
}
sub final {
 my $state = shift;
 return $state->[0] .= pack "b*", $state->[1];
}

sub init_input {
 ["", $_[0]];
}

sub getbits {
 my ($state, $want) = @_;
 my $l = length $state->[0];
 if ($l < $want) {
   return if !length $state->[1]; # EOF
   my $add = 1 + (($want - $l) >> 3);
   # warn "Getting $add $want $l";
   # Put some more bits on the bit accumulator
   $state->[0] .= unpack "b*", substr $state->[1], 0, $add, "";
 }
 return substr $state->[0], 0, $want, "";
}
sub input {
 my ($state, $max_code) = @_;
 my $b = bits_needed($max_code) - 1;
 my $bits = getbits ($state, $b);
 return unless defined $bits;
 # warn "$state->[0] ", length $state->[0];
 my $code = unpack "V", pack "b32", $bits;
 # warn "$code '$state->[0]'";

 my $crossover = 1 << $b;

 if ($code <= ($max_code - $crossover)) {
   $code += $crossover if getbits ($state, 1);
   ++$b; print D "$code $b $crossover $max_code\n";
 } else {
   ++$b; print D "$code $b $crossover $max_code save\n";
 }
 $code;
}

sub compress {
 my $input;
 my $output = init_output;

 my $next_code = 1<<$bits;
 # TODO - map these to a frequency table with least likely bytes (or nibbles or
 # bits) first.
 my %codes;
 if ($bits == 8) {
   $input = shift;
   $codes{chr ($next_code - $_)} = $_ - 1 foreach 1..$next_code;
 } elsif ($bits == 4) {
   $input = unpack "h*", shift;
   $codes{sprintf "%x", ($next_code - $_)} = $_ - 1 foreach 1..$next_code;
 } elsif ($bits == 1) {
   $input = unpack "b*", shift;
   @codes{1, 0} = (0, 1);
 } else {
   die "Can't do bits $bits";
 }

 my $accumulator;
 my $pos = -length $input;
 while ($pos) {
   $accumulator .= substr($input, $pos++, 1);
   if (exists $codes{$accumulator}) {
     # good
     next;
   }
   # Oops. One too many.
   $codes{$accumulator} = $next_code;
   # Back up
   --$pos; chop $accumulator;
   output($output, $codes{$accumulator}, $next_code);
   $next_code++;
   # Yes, we go round the first bit of the loop again once too many.
   $accumulator = '';
 }
 output($output, $codes{$accumulator}, $next_code);
 return final($output);
}

sub uncompress {
 my $input = init_input @_;
 my $output;

 my %codes;
 my $next_code = 1<<$bits;
 # TODO - map these to a frequency table with least likely bytes (or nibbles or
 # bits) first.
 if ($bits == 8) {
   $codes{$_ - 1} = chr ($next_code - $_) foreach 1..$next_code;
 } elsif ($bits == 4) {
   $codes{$_ - 1} = sprintf "%x", ($next_code - $_) foreach 1..$next_code;
 } elsif ($bits == 1) {
   @codes{1, 0} = (0, 1);
 } else {
   die "Can't do bits $bits";
 }

 my $prev = $output = $codes{input ($input, $next_code)};
 while (1) {
   # Not quire sure why we need the +1
   my $code = input ($input, 1 + $next_code);
   last unless defined $code;
   if (exists $codes{$code}) {
     my $this = $codes{$code};
     $codes{$next_code++} = $prev . substr ($this, 0, 1);
     $prev = $this;
   } else {
     die "$next_code $code" unless $next_code == $code;
     $prev .= substr ($prev, 0, 1);
     $codes{$next_code++} = $prev;
   }
   $output .= $prev;
 }
 if ($bits == 8) {
   return $output;
 } elsif ($bits == 4) {
   pack "h*", $output;
 } elsif ($bits == 1) {
   pack "b*", $output;
 }
}

sub test {
 my ($name, $input) = @_;
 my $compress = compress $input;
 my $def = Compress::Zlib::compress($input);
 printf "We manage %d, Zlib managed %d\n", length $compress, length $def;
 #print "$compress\n";
 my $output = uncompress $compress;
 printf "Output Was %d now %d\n", length $input, length $output;
 print $output eq $input ? "Pass\n" : "Fail\n";
}

# Test code - compress ourselves.
seek DATA, 0, 0 or die $!;
undef $/;
test ("self", <DATA>);
open FH, $^X or die;
test ($^X,<FH>);

__END__