This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push: new 047d2472 CODEC-312: Fix possible StringIndexOutOfBoundException thrown by MatchRatingApproachEncoder.encode() method (#220) 047d2472 is described below commit 047d24724c4aaae158331cfef0de0a9d5df2686e Author: Arthur Chan <game...@hotmail.com> AuthorDate: Sat Nov 25 03:05:19 2023 +0000 CODEC-312: Fix possible StringIndexOutOfBoundException thrown by MatchRatingApproachEncoder.encode() method (#220) * CODEC-312: Fix possible StringIndexOutOfBoundException Signed-off-by: Arthur Chan <arthur.c...@adalogics.com> * CODEC-312: Add unit test Signed-off-by: Arthur Chan <arthur.c...@adalogics.com> * Remove unmaintained comments --------- Signed-off-by: Arthur Chan <arthur.c...@adalogics.com> Co-authored-by: Gary Gregory <garydgreg...@users.noreply.github.com> --- .../codec/language/MatchRatingApproachEncoder.java | 10 ++++++++ .../language/MatchRatingApproachEncoderTest.java | 28 +++++++++++----------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java b/src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java index 86f08437..d871cc48 100644 --- a/src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java +++ b/src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java @@ -126,10 +126,20 @@ public class MatchRatingApproachEncoder implements StringEncoder { // Preprocessing name = cleanName(name); + // Bulletproof if name becomes empty after cleanName(name) + if (SPACE.equals(name) || name.isEmpty()) { + return EMPTY; + } + // BEGIN: Actual encoding part of the algorithm... // 1. Delete all vowels unless the vowel begins the word name = removeVowels(name); + // Bulletproof if name becomes empty after removeVowels(name) + if (SPACE.equals(name) || name.isEmpty()) { + return EMPTY; + } + // 2. Remove second consonant from any double consonant name = removeDoubleConsonants(name); diff --git a/src/test/java/org/apache/commons/codec/language/MatchRatingApproachEncoderTest.java b/src/test/java/org/apache/commons/codec/language/MatchRatingApproachEncoderTest.java index bf7508e8..7f10ecd8 100644 --- a/src/test/java/org/apache/commons/codec/language/MatchRatingApproachEncoderTest.java +++ b/src/test/java/org/apache/commons/codec/language/MatchRatingApproachEncoderTest.java @@ -35,8 +35,6 @@ import org.junit.jupiter.api.Test; */ public class MatchRatingApproachEncoderTest extends AbstractStringEncoderTest<MatchRatingApproachEncoder> { - // ********** BEGIN REGION - TEST SUPPORT METHODS - @Override protected MatchRatingApproachEncoder createStringEncoder() { return new MatchRatingApproachEncoder(); @@ -248,10 +246,6 @@ public class MatchRatingApproachEncoderTest extends AbstractStringEncoderTest<Ma assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "Ó ' Súilleabháin")); } - // ***** END REGION - TEST SUPPORT METHODS - - // ***** BEGIN REGION - TEST GET MRA ENCODING - @Test public final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l")); @@ -297,10 +291,6 @@ public class MatchRatingApproachEncoderTest extends AbstractStringEncoderTest<Ma assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia")); } - // ***** END REGION - TEST GET MRA ENCODING - - // ***** BEGIN REGION - TEST GET MRA COMPARISONS - @Test public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() { assertFalse(getStringEncoder().isEncodeEquals(null, " ")); @@ -433,8 +423,6 @@ public class MatchRatingApproachEncoderTest extends AbstractStringEncoderTest<Ma assertFalse(this.getStringEncoder().isEncodeEquals("", "test")); } - // **** BEGIN YIDDISH/SLAVIC SECTION **** - @Test public final void testIsEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals(null, "test")); @@ -470,8 +458,6 @@ public class MatchRatingApproachEncoderTest extends AbstractStringEncoderTest<Ma assertTrue(this.getStringEncoder().isVowel("I")); } - // **** END YIDDISH/SLAVIC SECTION **** - @Test public final void testIsVowel_SmallD_ReturnsFalse() { assertFalse(this.getStringEncoder().isVowel("d")); @@ -519,4 +505,18 @@ public class MatchRatingApproachEncoderTest extends AbstractStringEncoderTest<Ma // ***** END REGION - TEST GET MRA COMPARISONS + @Test + public final void testPunctuationOnly() { + assertEquals(this.getStringEncoder().encode(".,-"), ""); + } + + @Test + public final void testVowelOnly() { + assertEquals(this.getStringEncoder().encode("aeiouAEIOU"), "A"); + } + + @Test + public final void testVowelAndPunctuationOnly() { + assertEquals(this.getStringEncoder().encode("uoiea.,-AEIOU"), "U"); + } }