This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git


The following commit(s) were added to refs/heads/master by this push:
     new 1f908b26 CODEC-315: Fix possible IndexOutOfBoundException (#223)
1f908b26 is described below

commit 1f908b266411c28862c3952f3a3002ed19837b1b
Author: Arthur Chan <game...@hotmail.com>
AuthorDate: Sat Nov 25 01:37:57 2023 +0000

    CODEC-315: Fix possible IndexOutOfBoundException (#223)
    
    Signed-off-by: Arthur Chan <arthur.c...@adalogics.com>
---
 .../commons/codec/language/bm/PhoneticEngine.java      |  4 ++--
 .../commons/codec/language/bm/PhoneticEngineTest.java  | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java 
b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
index ef69a2a4..b98893cf 100644
--- a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
+++ b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
@@ -409,7 +409,7 @@ public class PhoneticEngine {
         switch (this.nameType) {
         case SEPHARDIC:
             words.forEach(aWord -> {
-                final String[] parts = aWord.split("'");
+                final String[] parts = aWord.split("'", -1);
                 words2.add(parts[parts.length - 1]);
             });
             words2.removeAll(NAME_PREFIXES.get(this.nameType));
@@ -431,7 +431,7 @@ public class PhoneticEngine {
         } else if (words2.size() == 1) {
             // not a multi-word name
             input = words.iterator().next();
-        } else {
+        } else if (!words2.isEmpty()) {
             // encode each word in a multi-word name separately (normally used 
for approx matches)
             final StringBuilder result = new StringBuilder();
             words2.forEach(word -> result.append("-").append(encode(word)));
diff --git 
a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java 
b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
index 6725c492..1a7c2117 100644
--- a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
+++ b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
@@ -48,6 +48,15 @@ public class PhoneticEngineTest {
                 );
     }
 
+    public static Stream<Arguments> invalidData() {
+        return Stream.of(
+                        Arguments.of("bar", "bar|bor|var|vor", 
NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN),
+                        Arguments.of("al", "|al", NameType.SEPHARDIC, 
RuleType.APPROX, Boolean.FALSE, TEN),
+                        Arguments.of("da", "da|di", NameType.GENERIC, 
RuleType.EXACT, Boolean.FALSE, TEN),
+                        Arguments.of("'''", "", NameType.SEPHARDIC, 
RuleType.APPROX, Boolean.FALSE, TEN)
+                );
+    }
+
     // TODO Identify if there is a need to an 
assertTimeout(Duration.ofMillis(10000L) in some point, since this method was 
marked as @Test(timeout = 10000L)
     @ParameterizedTest
     @MethodSource("data")
@@ -70,4 +79,13 @@ public class PhoneticEngineTest {
             }
         }
     }
+
+    @ParameterizedTest
+    @MethodSource("invalidData")
+    public void testInvalidEncode(final String input, final String 
phoneticExpected, final NameType nameType,
+                                  final RuleType ruleType, final boolean 
concat, final int maxPhonemes) {
+        final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, 
concat, maxPhonemes);
+
+        assertEquals(engine.encode(input), phoneticExpected);
+    }
 }

Reply via email to