Repository: commons-lang Updated Branches: refs/heads/master dd5a0e6e1 -> ec8bf5281
LANG-1120: StringUtils.stripAccents should remove accents from "Å" and "Å" (closes #105). Project: http://git-wip-us.apache.org/repos/asf/commons-lang/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-lang/commit/a0b798c2 Tree: http://git-wip-us.apache.org/repos/asf/commons-lang/tree/a0b798c2 Diff: http://git-wip-us.apache.org/repos/asf/commons-lang/diff/a0b798c2 Branch: refs/heads/master Commit: a0b798c27a0312df2a110073bd7888993e56d05c Parents: dd5a0e6 Author: kaching88 <wa...@o2.pl> Authored: Tue Jul 14 01:54:35 2015 +0200 Committer: pascalschumacher <pascalschumac...@gmx.net> Committed: Tue May 10 21:18:10 2016 +0200 ---------------------------------------------------------------------- .../java/org/apache/commons/lang3/StringUtils.java | 15 ++++++++++++++- .../commons/lang3/StringUtilsTrimEmptyTest.java | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-lang/blob/a0b798c2/src/main/java/org/apache/commons/lang3/StringUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java b/src/main/java/org/apache/commons/lang3/StringUtils.java index f4ec00b..31a572a 100644 --- a/src/main/java/org/apache/commons/lang3/StringUtils.java +++ b/src/main/java/org/apache/commons/lang3/StringUtils.java @@ -753,11 +753,24 @@ public class StringUtils { return null; } final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$ - final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD); + final StringBuilder decomposed = new StringBuilder(Normalizer.normalize(input, Normalizer.Form.NFD)); + convertRemainingAccentCharacters(decomposed); // Note that this doesn't correctly remove ligatures... return pattern.matcher(decomposed).replaceAll(StringUtils.EMPTY); } + private static void convertRemainingAccentCharacters(StringBuilder decomposed) { + for (int i = 0; i < decomposed.length(); i++) { + if (decomposed.charAt(i) == '\u0141') { + decomposed.deleteCharAt(i); + decomposed.insert(i, 'L'); + } else if (decomposed.charAt(i) == '\u0142') { + decomposed.deleteCharAt(i); + decomposed.insert(i, 'l'); + } + } + } + // Equals //----------------------------------------------------------------------- /** http://git-wip-us.apache.org/repos/asf/commons-lang/blob/a0b798c2/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java index 70895b9..f55b28f 100644 --- a/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java +++ b/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java @@ -273,5 +273,7 @@ public class StringUtilsTrimEmptyTest { assertEquals( "Failed empty String", "", StringUtils.stripAccents("") ); assertEquals( "Failed to handle non-accented text", "control", StringUtils.stripAccents("control") ); assertEquals( "Failed to handle easy example", "eclair", StringUtils.stripAccents("\u00E9clair") ); + assertEquals("ALOSZZCN aloszzcn", StringUtils.stripAccents("\u0104\u0141\u00D3\u015A\u017B\u0179\u0106\u0143 " + + "\u0105\u0142\u00F3\u015B\u017C\u017A\u0107\u0144")); } }