"Dr.Ruud" schreef:
> Slight revision, that fails on the last line:
More assuming revision:
#!/usr/bin/perl
use warnings ;
use strict ;
sub SOB { '\A' }
sub EOB { '\z' }
sub OR { join '|', @_ }
sub sp { '[[:blank:]]+' }
sub capture { "(@_)" }
sub optional { "(?:@_)?" }
sub optimany { "(?:@_)*" }
sub ungreedy { "@_?" }
sub ahead { "([EMAIL PROTECTED])" }
sub REnumber { '\d+' }
sub REword { '\w+' }
sub RElang { '
(?:
a[ly]|b[gs]|cs|d[ae]|e[nst]|
f[ir]|gr|h[eruy]|it|ja|kk|lv|nl|
p[blt]|r[ou]|s[klqrv]|t[hr]|uk|zh)
' }
sub REwordlist { REword
. ungreedy(optimany( sp . REword ))
. ahead(OR(sp,EOB))
}
sub RElanglist { RElang . optimany( ',' . RElang ) }
my $re = SOB
. optional(capture(REnumber).sp)
. capture(REwordlist)
. optional(sp.capture(RElanglist))
. optional(sp.capture(REnumber).'cd')
. EOB ;
print "re/$re/\n\n\n" ;
my $qr = qr/ $re /x ;
while ( <DATA> )
{
print "\n" ;
print ;
s/\A[[:blank:]]+// ;
s/\s+\z// ; # chomps as well
s/[[:blank:]]+,[[:blank:]]*|,[[:blank:]]+/,/g ;
{ no warnings ;
/$qr/ and print "($1) ($2) ($3) ($4)\n" ;
}
}
__DATA__
word
word word
word word word
1 word
1 word word word
1 word en,pt,sk
1 word en 1cd
1 word word en 1cd
--
Affijn, Ruud
"Gewoon is een tijger."
--
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
<http://learn.perl.org/> <http://learn.perl.org/first-response>