Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_portuguese.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_portuguese.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_portuguese.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_portuguese.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include sep_approx_french
Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_spanish.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_spanish.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_spanish.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_approx_spanish.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include sep_approx_french Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_any.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_any.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_any.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_any.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"E" "" "" "e" \ No newline at end of file Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_approx_common.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_approx_common.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_approx_common.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_approx_common.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Sephardic + +"h" "" "$" "" + +// VOICED - UNVOICED CONSONANTS +"b" "" "[fktSs]" "p" +"b" "" "p" "" +"b" "" "$" "p" +"p" "" "[vgdZz]" "b" +"p" "" "b" "" + +"v" "" "[pktSs]" "f" +"v" "" "f" "" +"v" "" "$" "f" +"f" "" "[vbgdZz]" "v" +"f" "" "v" "" + +"g" "" "[pftSs]" "k" +"g" "" "k" "" +"g" "" "$" "k" +"k" "" "[vbdZz]" "g" +"k" "" "g" "" + +"d" "" "[pfkSs]" "t" +"d" "" "t" "" +"d" "" "$" "t" +"t" "" "[vbgZz]" "d" +"t" "" "d" "" + +"s" "" "dZ" "" +"s" "" "tS" "" + +"z" "" "[pfkSt]" "s" +"z" "" "[sSzZ]" "" +"s" "" "[sSzZ]" "" +"Z" "" "[sSzZ]" "" +"S" "" "[sSzZ]" "" + +// SIMPLIFICATION OF CONSONANT CLUSTERS +"nm" "" "" "m" + +// DOUBLE --> SINGLE +"ji" "^" "" "i" + +"a" "" "a" "" +"b" "" "b" "" +"d" "" "d" "" +"e" "" "e" "" +"f" "" "f" "" +"g" "" "g" "" +"i" "" "i" "" +"k" "" "k" "" +"l" "" "l" "" +"m" "" "m" "" +"n" "" "n" "" +"o" "" "o" "" +"p" "" "p" "" +"r" "" "r" "" +"t" "" "t" "" +"u" "" "u" "" +"v" "" "v" "" +"z" "" "z" "" Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_common.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_common.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_common.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_common.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include sep_exact_approx_common + +"h" "" "" "" +//"C" "" "" "k" // c that can actually be � + +// VOICED - UNVOICED CONSONANTS +"s" "[^t]" "[bgZd]" "z" +"Z" "" "[pfkst]" "S" +"Z" "" "$" "S" +"S" "" "[bgzd]" "Z" +"z" "" "$" "s" + +//special character to deal correctly in Hebrew match +"B" "" "" "b" +"V" "" "" "v" Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_french.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_french.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_french.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_french.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Sephadic \ No newline at end of file Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_hebrew.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_hebrew.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_hebrew.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_hebrew.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_italian.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_italian.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_italian.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_italian.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_portuguese.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_portuguese.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_portuguese.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_portuguese.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_spanish.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_spanish.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_spanish.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_exact_spanish.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// empty \ No newline at end of file Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_hebrew_common.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_hebrew_common.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_hebrew_common.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_hebrew_common.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include sep_exact_approx_common + +"E" "" "" "" // final French "e": only in Sephardic + +"ts" "" "" "C" // for not confusion Gutes [=guts] and Guts [=guc] +"tS" "" "" "C" // same reason +"S" "" "" "s" +"p" "" "" "f" +"b" "^" "" "b" +"b" "" "" "(b|v)" + +"ja" "" "" "i" +"je" "" "" "i" +"aj" "" "" "i" +"j" "" "" "i" + +"a" "^" "" "1" +"e" "^" "" "1" +"a" "" "$" "1" +"e" "" "$" "1" + +"a" "" "" "" +"e" "" "" "" + +"oj" "^" "" "(u|vi)" +"uj" "^" "" "(u|vi)" + +"oj" "" "" "u" +"uj" "" "" "u" + +"ou" "^" "" "(u|v|1)" +"o" "^" "" "(u|v|1)" +"u" "^" "" "(u|v|1)" + +"o" "" "$" "(u|1)" +"u" "" "$" "(u|1)" + +"ou" "" "" "u" +"o" "" "" "u" + +"VV" "" "" "u" // alef/ayin + vov from ruleshebrew +"L" "^" "" "1" // alef/ayin from ruleshebrew +"L" "" "$" "1" // alef/ayin from ruleshebrew +"L" "" "" " " // alef/ayin from ruleshebrew +"WW" "^" "" "(vi|u)" // vav-yod from ruleshebrew +"WW" "" "" "u" // vav-yod from ruleshebrew +"W" "^" "" "(u|v)" // vav from ruleshebrew +"W" "" "" "u" // vav from ruleshebrew + +// "g" "" "" "(g|Z)" +// "z" "" "" "(z|Z)" +// "d" "" "" "(d|dZ)" + +"T" "" "" "t" // tet from ruleshebrew + +// "k" "" "" "(k|x)" +// "x" "" "" "(k|x)" +"K" "" "" "k" // kof and initial kaf from ruleshebrew +"X" "" "" "x" // khet and final kaf from ruleshebrew + +// special for Spanish initial B/V +"B" "" "" "v" +"V" "" "" "b" + +"H" "^" "" "(x|1)" +"H" "" "$" "(x|1)" +"H" "" "" "(x|)" +"h" "^" "" "1" +"h" "" "" "" Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_languages.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_languages.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_languages.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_languages.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +any +french +hebrew +italian +portuguese +spanish Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_any.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_any.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_any.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_any.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// SEPHARDIC: INCORPORATES Portuguese + Italian + Spanish(+Catalan) + French + +// CONSONANTS +"ph" "" "" "f" // foreign +"sh" "" "" "S" // foreign +"kh" "" "" "x" // foreign + +"gli" "" "" "(gli|l[italian])" +"gni" "" "" "(gni|ni[italian+french])" +"gn" "" "[aeou]" "(n[italian+french]|nj[italian+french]|gn)" +"gh" "" "" "g" // It + translit. from Arabic +"dh" "" "" "d" // translit. from Arabic +"bh" "" "" "b" // translit. from Arabic +"th" "" "" "t" // translit. from Arabic +"lh" "" "" "l" // Port +"nh" "" "" "nj" // Port + +"ig" "[aeiou]" "" "(ig|tS[spanish])" +"ix" "[aeiou]" "" "S" // Sp +"tx" "" "" "tS" // Sp +"tj" "" "$" "tS" // Sp +"tj" "" "" "dZ" // Sp +"tg" "" "" "(tg|dZ[spanish])" + +"gi" "" "[aeou]" "dZ" // italian +"g" "" "y" "Z" // french +"gg" "" "[ei]" "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])" +"g" "" "[ei]" "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])" + +"guy" "" "" "gi" +"gue" "" "$" "(k[french]|ge)" +"gu" "" "[ei]" "(g|gv)" // not It +"gu" "" "[ao]" "gv" // not It + +"ñ" "" "" "(n|nj)" +"ny" "" "" "nj" + +"sc" "" "[ei]" "(s|S[italian])" +"sç" "" "[aeiou]" "s" // not It +"ss" "" "" "s" +"ç" "" "" "s" // not It + +"ch" "" "[ei]" "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])" +"ch" "" "" "(S|tS[spanish]|dZ[spanish])" + +"ci" "" "[aeou]" "(tS[italian]|si)" +"cc" "" "[eiyéèê]" "(tS[italian]|ks[portuguese+french+spanish])" +"c" "" "[eiyéèê]" "(tS[italian]|s[portuguese+french+spanish])" +//"c" "" "[aou]" "(k|C[portuguese+spanish])" // "C" means that the actual letter could be "ç" (cedille omitted) + +"s" "^" "" "s" +"s" "[aáuiÃoóeéêy]" "[aáuiÃoóeéêy]" "(s[spanish]|z[portuguese+french+italian])" +"s" "" "[dglmnrv]" "(z|Z[portuguese])" + +"z" "" "$" "(s|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr +"z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr +"z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp +"z" "" "" "(z|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp + +"que" "" "$" "(k[french]|ke)" +"qu" "" "[eiu]" "k" +"qu" "" "[ao]" "(kv|k)" // k is It + +"ex" "" "[aáuiÃoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" +"ex" "" "[cs]" "(e[portuguese]|ek)" + +"m" "" "[cdglnrst]" "(m|n[portuguese])" +"m" "" "[bfpv]" "(m|n[portuguese+spanish])" +"m" "" "$" "(m|n[portuguese])" + +"b" "^" "" "(b|V[spanish])" +"v" "^" "" "(v|B[spanish])" + +// VOWELS +"eau" "" "" "o" // Fr + +"ouh" "" "[aioe]" "(v[french]|uh)" +"uh" "" "[aioe]" "(v|uh)" +"ou" "" "[aioe]" "v" // french +"uo" "" "" "(vo|o)" +"u" "" "[aie]" "v" + +"i" "[aáuoóeéê]" "" "j" +"i" "" "[aeou]" "j" +"y" "[aáuiÃoóeéê]" "" "j" +"y" "" "[aeiÃou]" "j" +"e" "" "$" "(e|E[french])" + +"ão" "" "" "(au|an)" // Port +"ãe" "" "" "(aj|an)" // Port +"ãi" "" "" "(aj|an)" // Port +"õe" "" "" "(oj|on)" // Port +"où" "" "" "u" // Fr +"ou" "" "" "(ou|u[french])" + +"â" "" "" "a" // Port & Fr +"à " "" "" "a" // Port +"á" "" "" "a" // Port & Sp +"ã" "" "" "(a|an)" // Port +"é" "" "" "e" +"ê" "" "" "e" // Port & Fr +"è" "" "" "e" // Sp & Fr & It +"Ã" "" "" "i" // Port & Sp +"î" "" "" "i" // Fr +"ô" "" "" "o" // Port & Fr +"ó" "" "" "o" // Port & Sp & It +"õ" "" "" "(o|on)" // Port +"ò" "" "" "o" // Sp & It +"ú" "" "" "u" // Port & Sp +"ü" "" "" "u" // Port & Sp + +// LATIN ALPHABET +"a" "" "" "a" +"b" "" "" "(b|v[spanish])" +"c" "" "" "k" +"d" "" "" "d" +"e" "" "" "e" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "i" +"j" "" "" "(x[spanish]|Z)" // not It +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"q" "" "" "k" +"r" "" "" "r" +"s" "" "" "(s|S[portuguese])" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "(v|b[spanish])" +"w" "" "" "v" // foreign +"x" "" "" "(ks|gz|S[portuguese+spanish])" // S/ks Port & Sp, gz Sp, It only ks +"y" "" "" "i" +"z" "" "" "z" Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_french.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_french.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_french.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_french.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Sephardic + +// CONSONANTS +"kh" "" "" "x" // foreign +"ph" "" "" "f" + +"ç" "" "" "s" +"x" "" "" "ks" +"ch" "" "" "S" +"c" "" "[eiyéèê]" "s" +"c" "" "" "k" +"gn" "" "" "(n|gn)" +"g" "" "[eiy]" "Z" +"gue" "" "$" "k" +"gu" "" "[eiy]" "g" +//"aill" "" "e" "aj" // non Jewish +//"ll" "" "e" "(l|j)" // non Jewish +"que" "" "$" "k" +"qu" "" "" "k" +"q" "" "" "k" +"s" "[aeiouyéèê]" "[aeiouyéèê]" "z" +"h" "[bdgt]" "" "" // translit from Arabic +"h" "" "$" "" // foreign +"j" "" "" "Z" +"w" "" "" "v" +"ouh" "" "[aioe]" "(v|uh)" +"ou" "" "[aeio]" "v" +"uo" "" "" "(vo|o)" +"u" "" "[aeio]" "v" + +// VOWELS +"aue" "" "" "aue" +"eau" "" "" "o" +//"au" "" "" "(o|au)" // non Jewish +"ai" "" "" "aj" // [e] is non Jewish +"ay" "" "" "aj" // [e] is non Jewish +"é" "" "" "e" +"ê" "" "" "e" +"è" "" "" "e" +"à " "" "" "a" +"â" "" "" "a" +"où" "" "" "u" +"ou" "" "" "u" +"oi" "" "" "oj" // [ua] is non Jewish +"ei" "" "" "ej" // [e] is non Jewish, in Ashk should be aj +"ey" "" "" "ej" // [e] non Jewish, in Ashk should be aj +//"eu" "" "" "(e|o)" // non Jewish +"y" "[ou]" "" "j" +"e" "" "$" "(e|)" +"i" "" "[aou]" "j" +"y" "" "[aoeu]" "j" +"y" "" "" "i" + +// TRIVIAL +"a" "" "" "a" +"b" "" "" "b" +"d" "" "" "d" +"e" "" "" "e" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "i" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"z" "" "" "z" Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_hebrew.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_hebrew.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_hebrew.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_hebrew.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Sephardic + +"××" "" "" "i" +"×¢×" "" "" "i" +"×¢×" "" "" "VV" +"××" "" "" "VV" + +"×׳" "" "" "Z" +"×׳" "" "" "dZ" + +"×" "" "" "L" +"×" "" "" "b" +"×" "" "" "g" +"×" "" "" "d" + +"×" "^" "" "1" +"×" "" "$" "1" +"×" "" "" "" + +"××" "" "" "V" +"××" "" "" "WW" +"×" "" "" "W" +"×" "" "" "z" +"×" "" "" "X" +"×" "" "" "T" +"××" "" "" "i" +"×" "" "" "i" +"×" "" "" "X" +"×" "^" "" "K" +"×" "" "" "k" +"×" "" "" "l" +"×" "" "" "m" +"×" "" "" "m" +"×" "" "" "n" +"× " "" "" "n" +"ס" "" "" "s" +"×¢" "" "" "L" +"×£" "" "" "f" +"פ" "" "" "f" +"×¥" "" "" "C" +"צ" "" "" "C" +"×§" "" "" "K" +"ר" "" "" "r" +"ש" "" "" "s" +"ת" "" "" "T" // Special for Sephardim Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_italian.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_italian.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_italian.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_italian.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"kh" "" "" "x" // foreign + +"gli" "" "" "(l|gli)" +"gn" "" "[aeou]" "(n|nj|gn)" +"gni" "" "" "(ni|gni)" + +"gi" "" "[aeou]" "dZ" +"gg" "" "[ei]" "dZ" +"g" "" "[ei]" "dZ" +"h" "[bdgt]" "" "g" // gh is It; others from Arabic translit + +"ci" "" "[aeou]" "tS" +"ch" "" "[ei]" "k" +"sc" "" "[ei]" "S" +"cc" "" "[ei]" "tS" +"c" "" "[ei]" "tS" +"s" "[aeiou]" "[aeiou]" "z" + +"i" "[aeou]" "" "j" +"i" "" "[aeou]" "j" +"y" "[aeou]" "" "j" // foreign +"y" "" "[aeou]" "j" // foreign + +"qu" "" "" "k" +"uo" "" "" "(vo|o)" +"u" "" "[aei]" "v" + +"�" "" "" "e" +"�" "" "" "e" +"�" "" "" "o" +"�" "" "" "o" + +// LATIN ALPHABET +"a" "" "" "a" +"b" "" "" "b" +"c" "" "" "k" +"d" "" "" "d" +"e" "" "" "e" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "i" +"j" "" "" "(Z|dZ|j)" // foreign +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"q" "" "" "k" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"w" "" "" "v" // foreign +"x" "" "" "ks" // foreign +"y" "" "" "i" // foreign +"z" "" "" "(ts|dz)" Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_portuguese.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_portuguese.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_portuguese.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_portuguese.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +"kh" "" "" "x" // foreign +"ch" "" "" "S" +"ss" "" "" "s" +"sc" "" "[ei]" "s" +"sç" "" "[aou]" "s" +"ç" "" "" "s" +"c" "" "[ei]" "s" +// "c" "" "[aou]" "(k|C)" + +"s" "^" "" "s" +"s" "[aáuiÃoóeéêy]" "[aáuiÃoóeéêy]" "z" +"s" "" "[dglmnrv]" "(Z|S)" // Z is Brazil + +"z" "" "$" "(Z|s|S)" // s and S in Brazil +"z" "" "[bdgv]" "(Z|z)" // Z in Brazil +"z" "" "[ptckf]" "(s|S|z)" // s and S in Brazil + +"gu" "" "[eiu]" "g" +"gu" "" "[ao]" "gv" +"g" "" "[ei]" "Z" +"qu" "" "[eiu]" "k" +"qu" "" "[ao]" "kv" + +"uo" "" "" "(vo|o|u)" +"u" "" "[aei]" "v" + +"lh" "" "" "l" +"nh" "" "" "nj" +"h" "[bdgt]" "" "" // translit. from Arabic + +"ex" "" "[aáuiÃoóeéêy]" "(ez|eS|eks)" // ez in Brazil +"ex" "" "[cs]" "e" + +"y" "[aáuiÃoóeéê]" "" "j" +"y" "" "[aeiÃou]" "j" +"m" "" "[bcdfglnprstv]" "(m|n)" // maybe to add a rule for m/n before a consonant that disappears [preceeding vowel becomes nasalized] +"m" "" "$" "(m|n)" // maybe to add a rule for final m/n that disappears [preceeding vowel becomes nasalized] + +"ão" "" "" "(au|an|on)" +"ãe" "" "" "(aj|an)" +"ãi" "" "" "(aj|an)" +"õe" "" "" "(oj|on)" +"i" "[aáuoóeéê]" "" "j" +"i" "" "[aeou]" "j" + +"â" "" "" "a" +"à " "" "" "a" +"á" "" "" "a" +"ã" "" "" "(a|an|on)" +"é" "" "" "e" +"ê" "" "" "e" +"Ã" "" "" "i" +"ô" "" "" "o" +"ó" "" "" "o" +"õ" "" "" "(o|on)" +"ú" "" "" "u" +"ü" "" "" "u" + +"aue" "" "" "aue" + +// LATIN ALPHABET +"a" "" "" "a" +"b" "" "" "b" +"c" "" "" "k" +"d" "" "" "d" +"e" "" "" "(e|i)" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "i" +"j" "" "" "Z" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "(o|u)" +"p" "" "" "p" +"q" "" "" "k" +"r" "" "" "r" +"s" "" "" "S" +"t" "" "" "t" +"u" "" "" "u" +"v" "" "" "v" +"w" "" "" "v" +"x" "" "" "(S|ks)" +"y" "" "" "i" +"z" "" "" "z" Added: commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_spanish.txt URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_spanish.txt?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_spanish.txt (added) +++ commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/sep_rules_spanish.txt Wed Jul 27 02:29:11 2011 @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//Sephardic + +// Includes both Spanish (Castillian) & Catalan + +// CONSONANTS +"ñ" "" "" "(n|nj)" +"ny" "" "" "nj" // Catalan +"ç" "" "" "s" // Catalan + +"ig" "[aeiou]" "" "(tS|ig)" // tS is Catalan +"ix" "[aeiou]" "" "S" // Catalan +"tx" "" "" "tS" // Catalan +"tj" "" "$" "tS" // Catalan +"tj" "" "" "dZ" // Catalan +"tg" "" "" "(tg|dZ)" // dZ is Catalan +"ch" "" "" "(tS|dZ)" // dZ is typical for Argentina +"bh" "" "" "b" // translit. from Arabic +"h" "[dgt]" "" "" // translit. from Arabic + +"j" "" "" "(x|Z)" // Z is Catalan +"x" "" "" "(ks|gz|S)" // ks is Spanish, all are Catalan + +//"ll" "" "" "(l|Z)" // Z is typical for Argentina, only Ashkenazic +"w" "" "" "v" // foreign words + +"v" "^" "" "(B|v)" +"b" "^" "" "(b|V)" +"v" "" "" "(b|v)" +"b" "" "" "(b|v)" +"m" "" "[bpvf]" "(m|n)" + +"c" "" "[ei]" "s" +// "c" "" "[aou]" "(k|C)" +"c" "" "" "k" + +"z" "" "" "(z|s)" // as "c" befoire "e" or "i", in Spain it is like unvoiced English "th" + +"gu" "" "[ei]" "(g|gv)" // "gv" because "u" can actually be "ü" +"g" "" "[ei]" "(x|g|dZ)" // "g" only for foreign words; dZ is Catalan + +"qu" "" "" "k" +"q" "" "" "k" + +"uo" "" "" "(vo|o)" +"u" "" "[aei]" "v" + +// "y" "" "" "(i|j|S|Z)" // S or Z are peculiar to South America; only Ashkenazic +"y" "" "" "(i|j)" + +// VOWELS +"ü" "" "" "v" +"á" "" "" "a" +"é" "" "" "e" +"Ã" "" "" "i" +"ó" "" "" "o" +"ú" "" "" "u" +"à " "" "" "a" // Catalan +"è" "" "" "e" // Catalan +"ò" "" "" "o" // Catalan + +// TRIVIAL +"a" "" "" "a" +"d" "" "" "d" +"e" "" "" "e" +"f" "" "" "f" +"g" "" "" "g" +"h" "" "" "h" +"i" "" "" "i" +"k" "" "" "k" +"l" "" "" "l" +"m" "" "" "m" +"n" "" "" "n" +"o" "" "" "o" +"p" "" "" "p" +"r" "" "" "r" +"s" "" "" "s" +"t" "" "" "t" +"u" "" "" "u" Added: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java (added) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java Wed Jul 27 02:29:11 2011 @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.language.bm; + +import static org.junit.Assert.assertEquals; + +import java.util.Collections; + +import org.apache.commons.codec.StringEncoder; +import org.apache.commons.codec.StringEncoderAbstractTest; +import org.junit.Test; + +/** + * Tests BeiderMorseEncoder. + * + * @author Apache Software Foundation + * @since 2.0 + */ +public class BeiderMorseEncoderTest extends StringEncoderAbstractTest { + @Override + protected StringEncoder createStringEncoder() { + return new BeiderMorseEncoder(); + } + + @Test(expected = IllegalStateException.class) + public void invalidLangResourceShouldRaiseException() { + Lang.loadFromResource("thisIsAMadeUpResourceName", Languages.instance(NameType.GENERIC)); + } + + @Test(expected = IllegalArgumentException.class) + public void invalidLangShouldRaiseException() { + Rule.instance(NameType.GENERIC, RuleType.APPROX, "noSuchLanguage"); + } + + @Test(expected = IllegalArgumentException.class) + public void invalidLanguageResourceShouldRaiseException() { + Languages.instance("thereIsNoSuchLanguage"); + } + + @Test(expected = IndexOutOfBoundsException.class) + public void negativeIndexForRuleMatchShouldRaiseException() { + Rule r = new Rule("a", "", "", "", Collections.<String> emptySet(), "bob"); + r.patternAndContextMatches("bob", -1); + } + + @Test + public void setConcat() { + BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); + bmpm.setConcat(false); + assertEquals("Should be able to set concat to false", false, bmpm.isConcat()); + } + + @Test + public void setNameTypeAsh() { + BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); + bmpm.setNameType(NameType.ASHKENAZI); + assertEquals("Name type should have been set to ash", NameType.ASHKENAZI, bmpm.getNameType()); + } + + @Test + public void setRuleTypeExact() { + BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); + bmpm.setRuleType(RuleType.EXACT); + assertEquals("Rule type should have been set to exact", RuleType.EXACT, bmpm.getRuleType()); + } + + @Test(expected = IllegalArgumentException.class) + public void setRuleTypeToRulesShouldRaiseException() { + BeiderMorseEncoder bmpm = new BeiderMorseEncoder(); + bmpm.setRuleType(RuleType.RULES); + } +} Added: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java (added) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/LanguageGuessingTest.java Wed Jul 27 02:29:11 2011 @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.language.bm; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.Arrays; +import java.util.List; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Tests guessLanguages API. + * + * @author Apache Software Foundation + * @since 2.0 + */ +@RunWith(Parameterized.class) +public class LanguageGuessingTest { + + private static String EXACT = "exact"; + private static String ONE_OF = "one of"; + + @Parameterized.Parameters + public static List<Object[]> data() { + return Arrays.asList(new Object[][] { + { "Renault", "french", EXACT }, + { "Mickiewicz", "polish", EXACT }, + { "Thompson", "english", ONE_OF }, // this also hits german and greeklatin + { "Nuñez", "spanish", EXACT }, { "Carvalho", "portuguese", EXACT }, { "Äapek", "czech", EXACT }, + { "Sjneijder", "dutch", EXACT }, { "Klausewitz", "german", EXACT }, { "Küçük", "turkish", EXACT }, + { "Giacometti", "italian", EXACT }, { "Nagy", "hungarian", EXACT }, { "CeauÅescu", "romanian", EXACT }, + { "Angelopoulos", "greeklatin", EXACT }, { "ÎγγελÏÏÎ¿Ï Î»Î¿Ï", "greek", EXACT }, { "ÐÑÑкин", "cyrillic", EXACT }, + { "×××", "hebrew", EXACT } }); + } + + private final String exactness; + + private final Lang lang = Lang.instance(NameType.GENERIC); + private final String language; + private final String name; + + public LanguageGuessingTest(String name, String language, String exactness) { + this.name = name; + this.language = language; + this.exactness = exactness; + } + + @Test + public void testLanguageGuessing() { + Set<String> guesses = this.lang.guessLanguages(this.name); + String guess = this.lang.guessLanguage(this.name); + + assertTrue("language predicted for name '" + this.name + "' is wrong: " + guesses + " should contain '" + this.language + "'", + guesses.contains(this.language)); + + if (this.exactness.equals(EXACT)) { + assertEquals("language predicted for name '" + this.name + "' is wrong", this.language, guess); + } else { + // System.out.println("warning: test case that maps to multiple languages: '" + + // name + "':" + language + " ~> " + guesses); + } + } +} Added: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticTest.java?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticTest.java (added) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/PhoneticTest.java Wed Jul 27 02:29:11 2011 @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.language.bm; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.List; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Tests PhoneticEngine. + * + * @author Apache Software Foundation + * @since 2.0 + */ +@RunWith(Parameterized.class) +public class PhoneticTest { + + @Parameterized.Parameters + public static List<Object[]> data() { + return Arrays + .asList(new Object[] { "Renault", "rinolt|rino|rinDlt|rinalt|rinult|rinD|rina|rinu", NameType.GENERIC, RuleType.APPROX, + true }, + new Object[] { "Renault", "rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult", NameType.ASHKENAZI, RuleType.APPROX, true }, + new Object[] { "Renault", "(rinDlt)", NameType.SEPHARDIC, RuleType.APPROX, true }, + new Object[] { "SntJohn-Smith", "(sntjonsmit)", NameType.GENERIC, RuleType.EXACT, true }, + new Object[] { "d'ortley", "ortlaj|ortlej|ortlaj|ortlej-dortlaj|dortlej|dortlaj|dortlej", NameType.GENERIC, + RuleType.EXACT, true }, + new Object[] { + "van helsing", + "helSink|helsink|helzink|xelSink|xelsink|xelzink|HelSink|Helsink|Helzink-vanhelSink|vanhelsink|vanhelzink|vanjelSink|vanjelsink|vanjelzink|fanhelSink|fanhelsink|fanhelzink|fanjelSink|fanjelsink|fanjelzink|banhelSink|banhelsink|banhelzink|banjelSink|banjelsink|banjelzink", + NameType.GENERIC, RuleType.EXACT, false }); + } + + private final boolean concat; + private final String name; + private final NameType nameType; + private final String phoneticExpected; + private final RuleType ruleType; + + public PhoneticTest(String name, String phoneticExpected, NameType nameType, RuleType ruleType, boolean concat) { + this.name = name; + this.phoneticExpected = phoneticExpected; + this.nameType = nameType; + this.ruleType = ruleType; + this.concat = concat; + } + + @Test(timeout = 10000L) + public void testPhonetic() { + PhoneticEngine engine = new PhoneticEngine(this.nameType, this.ruleType, this.concat); + + String phoneticActual = engine.encode(this.name); + + assertEquals("phoneme incorrect", this.phoneticExpected, phoneticActual); + } +} Added: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java?rev=1151311&view=auto ============================================================================== --- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java (added) +++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java Wed Jul 27 02:29:11 2011 @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.language.bm; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Tests Rule. + * + * @author Apache Software Foundation + * @since 2.0 + */ +@RunWith(Parameterized.class) +public class RuleTest { + + @Parameterized.Parameters + public static List<Object[]> data() { + return Arrays.asList( + new Object[] { "matching language sets with ALL", + new Rule("e", "", "", "o", new HashSet<String>(Arrays.asList("english", "french")), Rule.ALL), + new HashSet<String>(Arrays.asList("english", "french")), true }, + new Object[] { "non-matching language sets with ALL", + new Rule("e", "", "", "o", new HashSet<String>(Arrays.asList("english", "french")), Rule.ALL), + new HashSet<String>(Arrays.asList("english")), false }); + } + + private final String caseName; + private final boolean expected; + private final Set<String> langs; + private final Rule rule; + + public RuleTest(String caseName, Rule rule, Set<String> langs, boolean expected) { + this.caseName = caseName; + this.rule = rule; + this.langs = langs; + this.expected = expected; + } + + @Test + public void rule() { + assertEquals(this.caseName, this.expected, this.rule.languageMatches(this.langs)); + } + +}