#!/usr/bin/perl
# Copyright 2003-2007 Vlado Keselj http://www.cs.dal.ca/~vlado

sub help { print <<"#EOT" }
# Find equal files in a directory tree, version $VERSION
#
# Relies on diff.
#
# Usage: find-equal-files [switches] [directories]
#  -n  find equal files even if they have different names
#  -i  report equal files as found, beside the final report
#  -h  Print help and exit.
#  -r pathp Remove redundant files, not with path prefix pathp
#      Removes only if there is at least one file not ~pathp
#  -t  Test mode, used with -r, but only report what would be
#      removed instad of removing it.
#  -v  Print version of the program and exit.
#EOT

use strict;
use vars qw( $VERSION %Tab );
$VERSION = sprintf "%d.%d", q$Revision: 1.7 $ =~ /(\d+)/g;

use Getopt::Std;
use vars qw($opt_v $opt_h $opt_n $opt_i $opt_r $opt_t);
getopts("vhnitr:");

if ($opt_v) { print "$VERSION\n"; exit; }
elsif ($opt_h || !@ARGV) { &help(); exit; }

$| = 1;
&find_equal_files(@ARGV);

print "FINAL REPORT:\n";
foreach my $k (keys %Tab) {
   foreach my $e (@{ $Tab{$k} }) {
       next unless @{ $e->{otherfiles} };
       print "equal files: $e->{file0}\n";
       foreach my $f (@{ $e->{otherfiles} }) { print "        and: $f\n" }
   }
}

if (length($opt_r) > 0) { &remove_files() }

sub remove_files {
   print "REMOVING FILES with prefix \"$opt_r\"";
   print " (TEST MODE)" if $opt_t; print ":\n";
   foreach my $k (keys %Tab) {
       foreach my $e (@{ $Tab{$k} }) {
           my @files = ($e->{file0}, @{ $e->{otherfiles} } );
           next unless @files > 1;
           my @fileswithp = grep { index($_,$opt_r)==0 } @files;
           my @filesnop   = grep { index($_,$opt_r)!=0 } @files;
           next if @fileswithp == 0 or @filesnop == 0;
           print "Keep: @filesnop\n";
           foreach my $f (@fileswithp) {
               if ($opt_t) { print " TO RM   $f\n" }
               else { print "   Removing $f\n"; unlink($f); }
           }
       }
   }
}

sub find_equal_files {
   while ($#_ > -1) {
       my $dir = shift;

       next if -l $dir || !-e $dir; # symbolic link or does not exist: ignore it

       if (not -d $dir) {                        # a file
           my $size = ((stat $dir)[7]);
           my $basename = $dir;
           if ($dir =~ /\/([^\/]+)$/) { $basename = $1 }
           my $key = $opt_n ? $size : "$basename $size";

           if (exists $Tab{$key}) {              # Could be equal
               local $_;
               foreach ( @{ $Tab{$key} } ) {
                   local(*SAVEOUT, *SAVEERR); # temporarily redirect STDOUT
                   open(SAVEOUT, ">&STDOUT");
                   open(SAVEERR, ">&STDERR");
                   open(STDOUT, ">/dev/null") ||
                       die "Can't redirect stdout to /dev/null";
                   open(STDERR, ">/dev/null") ||
                       die "Can't redirect stdout to /dev/null";

                   my $r = system('diff', $_->{file0}, $dir) / 256;

                   close(STDERR); open(STDERR, ">&SAVEERR");
                   close(STDOUT); open(STDOUT, ">&SAVEOUT");

                   if ($r == 0) {
                       push @{ $_->{otherfiles} }, $dir;
                       if ($opt_i)
                       { print "equal files:$_->{file0}\n        and:$dir\n" }
                       goto FOUND_SAME;
                   }
               }
               push @{ $Tab{$key} }, { file0=>$dir, otherfiles=>[] };
             FOUND_SAME:
           }
           else { $Tab{$key} = [ { file0=>$dir, otherfiles=>[] } ] }
           next;
       }

       local ($_, *DIR);                         # recursively enter directory
       opendir(DIR, $dir) || die "can't opendir $dir: $!";
       map { /^\.\.?$/ ? '' : (&find_equal_files("$dir/$_")) } readdir(DIR);
       closedir(DIR);
   }
}