package Lingua::Spelling::Alternative;
require 5.001;

use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

use Data::Dumper;

use Exporter;
$VERSION = '0.10';
@ISA = ('Exporter');

#@EXPORT = qw();
@EXPORT_OK = qw(
	&alternatives
	);

my $debug=0;

=head1 NAME

Lingua::Spelling::Alternative - alternative spelling of a given word in a given language

=head1 SYNOPSIS

  use Lingua::Spelling::Alternative;

  my $en = new Lingua::Spelling::Alternative;
  $en->load_affix('/usr/lib/ispell/default.aff') or die $!;
  print join(" ",$en->alternatives("cars")),"\n";

=head1 DESCRIPTION

This module is designed to return all forms of a given word
(for example when you want to see all possible forms of some word
entered in search engine) which can be generated using affix file (from
ispell) or findaffix output file (also part of ispell package) or using
OpenOffice.org affix files.

=head1 PUBLIC METHODS

=head2 new

The new() constructor (without parameters) create container for new language.

 my $en = new Lingua::Spelling::Alternative(
 	debug => 1,
	min_length => 3,
 );

Options:

=over 4

=item debug

Turns debugging which will be spilled on C<STDOUT>.

=item min_length

Minimum word length (by default B<3>) which will be considered for
C<alternatives>.

=back

=cut

sub new {
	my $class = shift;
	my $self = {@_};
	bless($self, $class);
	$debug = 1 if ($self->{DEBUG} || $self->{debug});
	$self->clear;
	$self->{'min_length'} ||= 3;
	$self ? return $self : return undef;
}

=head2 clear

This method will clear internal affix and is called every time you use
C<load_affix>, C<load_findaffix> or C<load_oooaffix> on same object.

=cut

sub clear {
	my $self = shift;

	@{$self->{affix}} = ();

	return 1;
}

=head2 load_affix

Loads ispell's affix file for later usage. It will create internal
structures needed for other methods.

 $en->load_affix('/etc/dictionaries-common/default.aff');

=cut

sub load_affix {
	my $self = shift;
	my $filename = shift;

	my ($prefix,$suffix,$combine)=('','',0);

	print STDERR "reading affix file $filename\n" if ($debug);

	open (A,$filename) || die "Can't open affix file $filename: $!";
	while(<A>) {
		chomp;
		next if (/^#|^[\s\t\n\r]*$/);

		if (/^suffixes/i) {
			($prefix,$suffix) = ('','$');
			next;
		}

		next if (! $suffix && ! $prefix);

		if (/^flag[\s\t]+(\*?)(.):/i) {
			$combine = $1;
			print STDERR "pattern $2",($combine && " combine with prefix"),"\n" if ($debug);
			next;
		}

		my ($reg,$sub,$add);

		if (/^[\s\t]+([^>#\s\t]+)[\s\t]*>[\s\t]*-?([^\,\s\t]+)?,?([^\s\t]+)?/) {
			($reg,$sub,$add) = ($1,$2,$3);
			print STDERR "rule: $_\n" if ($debug);
			if (! $add) {
				$add = $sub;
				$sub = '';
			}

		} else {
			print STDERR "skip: $_\n";
			next;
		}

		printf STDERR "adding: /$reg/ -> s/$sub/$add/i\n" if ($debug);

		push @{$self->{'affix'}}, [ qr/${prefix}${reg}${suffix}/i, qr/^(.+)${sub}${suffix}/i, $add ];
		push @{$self->{'affix'}}, [ qr/${prefix}${add}${suffix}/i, qr/^(.+)${add}${suffix}/i, $sub ];

	}

	print STDERR Dumper($self->{'affix'}) if ($debug); # XXX

	return 1;
}

=head2 load_findaffix

This function loads output of findaffix program from ispell package.
This is better idea (if you are creating affix file for particular language
yourself or you can get your hands on one) because affix file from ispell
is limited to 26 entries (because each entry is denoted by single character).

 $en->load_findaffix('findaffix.out');

=cut

sub load_findaffix {
	my $self = shift;
	my $filename = shift;

	print STDERR "reading findaffix output $filename\n" if ($debug);

	open (A,$filename) || die "Can't open findaffix output $filename: $!";
	while(<A>) {
		chomp;
		my ($sub,$add,undef,undef) = split(m;/;,$_,4);
		if ($sub && $add) {
			push @{$self->{'affix'}}, [ qr/./, qr/^(.+)${sub}$/i, $add ];
		}
	}
	return 1;
}

=head2 alternatives

Return array of all alternative spellings of particular
word(s). It will also return spellings which are not lexically correct if
there is rule like that in affix file.

 print $en->alternatives(qw(cat dog));
 print $en->alternatives('demo');

=cut

sub alternatives {
	my $self = shift;
	my @out;
	foreach my $word (@_) {
		# save original word
		push @out,$word;
		# skip short words
		next if (length($word) < $self->{'min_length'});

		foreach my $a (@{$self->{'affix'}}) {
			next if ($word !~ $a->[0]);
			if ($word =~ $a->[1]) {
				push @out,lc($1.$a->[2]);
				print STDERR $word," -> ",$1.$a->[2], " [",$a->[0]," ... ",$a->[1],"]\n" if ($debug);
			}
		}
	}
	return @out;
}

=head2 minimal

This function returns minimal of all alternatives of a given word(s). It's
a poor man's version of normalize (because we don't know grammatic of
particular language, just some spelling rules).

 print $en->minimal('informations');

Special case is when there is only one argument, and result is expecte in
scalar context. In this case it will return just minimal length alternative
of this word (new in version 0.10).

=cut

sub minimal {
	my $self = shift;
	my @out;
	my $argc = 0;	# argument count
	foreach my $word (@_) {
		$argc++;
		my @alt = $self->alternatives($word);
		my $minimal = shift @alt;
		foreach (@alt) {
			$minimal=$_ if (length($_) < length($minimal));
		}
		push @out, $minimal;
	}
	return @out if wantarray;
	warn "called in scalar context with more than one word" if ($argc > 1);
	return shift @out;
}

###############################################################################
1;
__END__

=head1 EXAMPLES

Please see the test program in distribution which exercises some
aspects of Alternative.pm.

=head1 BUGS

There are no known bugs. If you find any, please report it in CPAN's
request tracker at: http://rt.cpan.org/

=head1 CONTACT AND COPYRIGHT

Copyright 2002-2005 Dobrica Pavlinusic (dpavlin@rot13.org). All
rights reserved.  This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

=cut
