This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push: new 1f908b26 CODEC-315: Fix possible IndexOutOfBoundException (#223) 1f908b26 is described below commit 1f908b266411c28862c3952f3a3002ed19837b1b Author: Arthur Chan <game...@hotmail.com> AuthorDate: Sat Nov 25 01:37:57 2023 +0000 CODEC-315: Fix possible IndexOutOfBoundException (#223) Signed-off-by: Arthur Chan <arthur.c...@adalogics.com> --- .../commons/codec/language/bm/PhoneticEngine.java | 4 ++-- .../commons/codec/language/bm/PhoneticEngineTest.java | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java index ef69a2a4..b98893cf 100644 --- a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java +++ b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java @@ -409,7 +409,7 @@ public class PhoneticEngine { switch (this.nameType) { case SEPHARDIC: words.forEach(aWord -> { - final String[] parts = aWord.split("'"); + final String[] parts = aWord.split("'", -1); words2.add(parts[parts.length - 1]); }); words2.removeAll(NAME_PREFIXES.get(this.nameType)); @@ -431,7 +431,7 @@ public class PhoneticEngine { } else if (words2.size() == 1) { // not a multi-word name input = words.iterator().next(); - } else { + } else if (!words2.isEmpty()) { // encode each word in a multi-word name separately (normally used for approx matches) final StringBuilder result = new StringBuilder(); words2.forEach(word -> result.append("-").append(encode(word))); diff --git a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java index 6725c492..1a7c2117 100644 --- a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java +++ b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java @@ -48,6 +48,15 @@ public class PhoneticEngineTest { ); } + public static Stream<Arguments> invalidData() { + return Stream.of( + Arguments.of("bar", "bar|bor|var|vor", NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN), + Arguments.of("al", "|al", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN), + Arguments.of("da", "da|di", NameType.GENERIC, RuleType.EXACT, Boolean.FALSE, TEN), + Arguments.of("'''", "", NameType.SEPHARDIC, RuleType.APPROX, Boolean.FALSE, TEN) + ); + } + // TODO Identify if there is a need to an assertTimeout(Duration.ofMillis(10000L) in some point, since this method was marked as @Test(timeout = 10000L) @ParameterizedTest @MethodSource("data") @@ -70,4 +79,13 @@ public class PhoneticEngineTest { } } } + + @ParameterizedTest + @MethodSource("invalidData") + public void testInvalidEncode(final String input, final String phoneticExpected, final NameType nameType, + final RuleType ruleType, final boolean concat, final int maxPhonemes) { + final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes); + + assertEquals(engine.encode(input), phoneticExpected); + } }