#!/usr/bin/perl
# Copyright 2003-2007 Vlado Keselj
http://www.cs.dal.ca/~vlado
sub help { print <<"#EOT" }
# Find equal files in a directory tree, version $VERSION
#
# Relies on diff.
#
# Usage: find-equal-files [switches] [directories]
# -n find equal files even if they have different names
# -i report equal files as found, beside the final report
# -h Print help and exit.
# -r pathp Remove redundant files, not with path prefix pathp
# Removes only if there is at least one file not ~pathp
# -t Test mode, used with -r, but only report what would be
# removed instad of removing it.
# -v Print version of the program and exit.
#EOT
use strict;
use vars qw( $VERSION %Tab );
$VERSION = sprintf "%d.%d", q$Revision: 1.7 $ =~ /(\d+)/g;
use Getopt::Std;
use vars qw($opt_v $opt_h $opt_n $opt_i $opt_r $opt_t);
getopts("vhnitr:");
if ($opt_v) { print "$VERSION\n"; exit; }
elsif ($opt_h || !@ARGV) { &help(); exit; }
$| = 1;
&find_equal_files(@ARGV);
print "FINAL REPORT:\n";
foreach my $k (keys %Tab) {
foreach my $e (@{ $Tab{$k} }) {
next unless @{ $e->{otherfiles} };
print "equal files: $e->{file0}\n";
foreach my $f (@{ $e->{otherfiles} }) { print " and: $f\n" }
}
}
if (length($opt_r) > 0) { &remove_files() }
sub remove_files {
print "REMOVING FILES with prefix \"$opt_r\"";
print " (TEST MODE)" if $opt_t; print ":\n";
foreach my $k (keys %Tab) {
foreach my $e (@{ $Tab{$k} }) {
my @files = ($e->{file0}, @{ $e->{otherfiles} } );
next unless @files > 1;
my @fileswithp = grep { index($_,$opt_r)==0 } @files;
my @filesnop = grep { index($_,$opt_r)!=0 } @files;
next if @fileswithp == 0 or @filesnop == 0;
print "Keep: @filesnop\n";
foreach my $f (@fileswithp) {
if ($opt_t) { print " TO RM $f\n" }
else { print " Removing $f\n"; unlink($f); }
}
}
}
}
sub find_equal_files {
while ($#_ > -1) {
my $dir = shift;
next if -l $dir || !-e $dir; # symbolic link or does not exist: ignore it
if (not -d $dir) { # a file
my $size = ((stat $dir)[7]);
my $basename = $dir;
if ($dir =~ /\/([^\/]+)$/) { $basename = $1 }
my $key = $opt_n ? $size : "$basename $size";
if (exists $Tab{$key}) { # Could be equal
local $_;
foreach ( @{ $Tab{$key} } ) {
local(*SAVEOUT, *SAVEERR); # temporarily redirect STDOUT
open(SAVEOUT, ">&STDOUT");
open(SAVEERR, ">&STDERR");
open(STDOUT, ">/dev/null") ||
die "Can't redirect stdout to /dev/null";
open(STDERR, ">/dev/null") ||
die "Can't redirect stdout to /dev/null";
my $r = system('diff', $_->{file0}, $dir) / 256;
close(STDERR); open(STDERR, ">&SAVEERR");
close(STDOUT); open(STDOUT, ">&SAVEOUT");
if ($r == 0) {
push @{ $_->{otherfiles} }, $dir;
if ($opt_i)
{ print "equal files:$_->{file0}\n and:$dir\n" }
goto FOUND_SAME;
}
}
push @{ $Tab{$key} }, { file0=>$dir, otherfiles=>[] };
FOUND_SAME:
}
else { $Tab{$key} = [ { file0=>$dir, otherfiles=>[] } ] }
next;
}
local ($_, *DIR); # recursively enter directory
opendir(DIR, $dir) || die "can't opendir $dir: $!";
map { /^\.\.?$/ ? '' : (&find_equal_files("$dir/$_")) } readdir(DIR);
closedir(DIR);
}
}