#!/usr/bin/perl -w

=head1 NAME

fdupes-ln.pl - Find duplicates and hardlink them

=head1 SYNOPSIS

  fdupes-ln.pl dir [dir dir...]

=head1 DESCRIPTION

Small script which uses fdupes from L<http://premium.caribe.net/~adrian2/fdupes.html>
to find and hardlink all files which are similar.

=head1 AUTHOR

Dobrica Pavlinusic <dpavlin@rot13.org>

L<http:E<sol>E<sol>www.rot13.orgE<sol>~dpavlinE<sol>>

=cut

use strict;

my $debug = 0;

foreach my $dir (@ARGV) {
	next unless -d $dir;

	warn "finding duplicates in $dir\n";

	open(my $p, '-|', "fdupes -1 -r $dir") || die "can't run fdupes -r $dir: $!\n";

	while(<$p>) {
		next if (/md5sum/);

		s/\\ /_%20_/g;

		my @dupes = split(/\s/, $_);

		my $l;
		map {
			my $p = $_;
			$p =~ s/_%20_/ /g;
			$l->{$p} = (stat($p))[3];	# nr links
		} @dupes;

		my @sorted = sort { $l->{$b} <=> $l->{$a} } @dupes;

		my $master = shift @sorted;
		$master =~ s/_%20_/ /g;
		warn "using $master [", $l->{$master}, "] for ", scalar(@dupes) , " dupes\n";

		foreach my $path ( @sorted ) {
			$path =~ s/_%20_/ /g;
			warn "ln -f $master $path # ", (stat($master))[3], ":", (stat($path))[3], "\n" if ($debug);
			unlink $path or die  "can't unlink $path: $!\n";
			link $master, $path or die "can't link $master -> $path: $!\n";
		}
	}		
}

