package Lingua::ES::Hyphenate;

use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter);

our %EXPORT_TAGS = ( 'all' => [ qw(
       hyphenate
       syllable_cnt
) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw(
       hyphenate
       syllable_cnt
);

our $VERSION = '.01';

=head1 NAME

Lingua::ES::Hyphenate - Separates Spanish words into syllables

=head1 SYNOPSIS

 use Lingua::ES::Hyphenate;

 @syllables = hyphenate('oportunidad')# @syllables now holds ('o','por','tu','ni','dad')

 # or

 $word = new Lingua::ES::Hyphenate->new('oportunidad');
 @syllables = $word->hyphenate;

=head1 DESCRIPTION

       Separates Spanish words into syllables.

=head1 SPANISH SYLLABLE STRUCTURE

       <From Wikipedia>
       The Spanish syllable structure can be summarized as follows: C1 C2 S1 V S2 C3 C4
       Spanish syllable structure allows a maximum of two consonants in its onset,
       a nucleus of a vowel followed by and/or preceded by a semivowel,
       and a maximum of two consonants in its coda.
       The following restrictions apply:
       Onset
               First consonant (C1): Can be any consonant.
               Second consonant (C2): If and only if the first consonant is a plosive
               /p, t, k, b, d, g/ or a voiceless labiodental fricative /f/,
               then the second consonant can be a liquid /l, r/.
               Although they occur, the onsets /tl/ and /dl/ are not native to Spanish.
       Nucleus
               Semivowel (S1)
               Vowel (V)
               Semivowel (S2)
       Coda
               First consonant (C3): Can be any consonant.
               Second consonant (C4): Must be /s/.

=head1 SEE ALSO

       http://en.wikipedia.org/wiki/Spanish_phonology#Phonotactics

=cut

my $cnt;# global variable for number of syllables in last parsed word

my $letters = qr/[A�BCDE�FGHI�JKLMN��OPQRSTU�VWXYZ]/i;# Apparently perl doesn't know that � is lowercase for �
#prevent backtracking here; otherwise two letter consonants won't work.
my $anyCons = qr/(?>RR|LL|CH|QU|[BCDFGHJKLMN�PQRSTVWXYZ])/i;# any consonant


my $preR = qr/[PKCBGFTD]/i; # These may precede R in an onset
my $preL = qr/[PKCBGF]/i;       # These may precede L in an onset
my $C2 = qr/
       (?<=^$preR)L                    # At the beginning of a word, a TL or DL (loan words)
               |                                       # or
       (?<=$preR)R                             # PR KR CR BR GR FR TR DR
               |                                       # OR
       (?<=$preL)L                             # PL KL CL BL GL FL
/ix;#
my $onset = qr/$anyCons$C2?/i;# C2 is optional

my $semiVowel = qr/[UI]/i;
my $vowel = qr/[A�E�O���]/i;
my $allVows = qr/[UIA�E�O���]/i;
my $nucleus = qr/(?:$semiVowel?$vowel$semiVowel?)|$semiVowel/i;

my $coda = qr/${anyCons}S?/i;# separate $C4 variable seemed worthless.

my $syllable = qr/
       $onset? # onsets are optional
       $nucleus # nuclei are not optional
       (?: $coda
               # We must make sure that the letters after the coda cannot be an
               # onset to another syllable; if they are, we forget the coda and
               # parse the next consonants as the onset of the next syllable.
               (?(?<=$preL)                    # IF the matched $coda was a pre L consonant
                       (?!L)                           # don't match a following L
               )
               (?(?<=$preR)                    # IF the matched $coda was a pre R consonant
                       (?!R)                           # don't match a following R
               )
               (?!$allVows)                    # don't match a following vowel or semivowel
       )? # coda is optional
/ix;# ignore case

=head1 CONSTRUCTOR

       Not necessary, since functions are exported.

       my $hyphenater = Lingua::ES::Hyphenate->new('charlar');

=cut

sub new {
 my ($self, $word) = @_;
 bless \$word, $self;
}

=head1 hyphenate

       Returns array of syllables from input word.

       my $hyphenater = Lingua::ES::Hyphenate->new('charlar');
       @syllabes = $hyphenater->hyphenate();

       # or

       @syllables = hyphenate('tomarlo')

=cut

sub hyphenate {
       $_[0] || return ();

       my $word;
       if (ref($_[0]) eq 'Lingua::ES::Hyphenate')
       {
               my $self = shift;
               $word = $$self;
       }
       else
       {
               $word = shift;
       }
       $word =~ /^$letters+$/ || return ();
       $cnt = $word =~ s/$syllable/$&=/g;
       split '=', $word;
}

=head1 syllable_cnt

       Returns number of syllables in string argument.
       If no argument is provided, returns the number of
       syllables in the last word parsed.

       my $cnt = syllable_cnt('tomarlo');

       # or

       my $hyphenater = Lingua::ES::Hyphenate->new('charlar');
       my $cnt = $hyphenater->syllable_cnt('escuela');

       # or

       my @syllables = hyphenate('majader�as');
       $cnt = syllable_cnt();
       # same as
       $cnt = @syllables;

=cut

sub syllable_cnt{
       my $word = '';
       if (ref($_[0]) eq 'Lingua::ES::Hyphenate')
       {
               my $self = shift;
               $word = $$self;
               $cnt = $word =~ s/$syllable//g;
               return $cnt;
       }
       elsif(@_ == 1)
       {
               $word = shift;
       }
       if($word ne '')
       {
               $cnt = $word =~ s/$syllable//g;
               return $cnt;
       }
       return $cnt; # default: return number of syllables in last word
}

1;
=head1 AUTHOR

Nathan Glenn, <[email protected]>

=head1 COPYRIGHT AND LICENSE

Copyright 2010 by Nathan Glenn

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 NEEDS WORK

       Atlanta splits as 'A-tlan-ta'. Is that correct? 'tl' and 'dl' and not
       native sounds, and Atlanta is a lone word, so maybe it's okay.
       'At-lan-ta' seems more natural to me.

=cut