Author: ggregory Date: Sun Mar 14 00:54:03 2010 New Revision: 922703 URL: http://svn.apache.org/viewvc?rev=922703&view=rev Log: https://issues.apache.org/jira/browse/LANG-607 StringUtils.containsAny methods incorrectly matches Unicode 2.0+ supplementary characters.
Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java?rev=922703&r1=922702&r2=922703&view=diff ============================================================================== --- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java (original) +++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java Sun Mar 14 00:54:03 2010 @@ -1437,20 +1437,32 @@ public class StringUtils { * <code>false</code> if no match or null input * @since 2.4 */ - public static boolean containsAny(CharSequence cs, char[] searchChars) { - if (cs == null || cs.length() == 0 || searchChars == null || searchChars.length == 0) { - return false; - } - for (int i = 0; i < cs.length(); i++) { - char ch = cs.charAt(i); - for (int j = 0; j < searchChars.length; j++) { - if (searchChars[j] == ch) { - return true; - } - } - } - return false; - } + public static boolean containsAny(CharSequence cs, char[] searchChars) { + if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) { + return false; + } + int csLength = cs.length(); + int searchLength = searchChars.length; + int csLastIndex = csLength - 1; + int searchLastIndex = searchLength - 1; + for (int i = 0; i < csLength; i++) { + char ch = cs.charAt(i); + for (int j = 0; j < searchLength; j++) { + if (searchChars[j] == ch) { + if (i < csLastIndex && j < searchLastIndex && ch >= Character.MIN_HIGH_SURROGATE && ch <= Character.MAX_HIGH_SURROGATE) { + // ch is a supplementary character + if (searchChars[j + 1] == cs.charAt(i + 1)) { + return true; + } + } else { + // ch is in the Basic Multilingual Plane + return true; + } + } + } + } + return false; + } /** * <p> Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java?rev=922703&r1=922702&r2=922703&view=diff ============================================================================== --- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java (original) +++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java Sun Mar 14 00:54:03 2010 @@ -33,7 +33,19 @@ public class StringUtilsEqualsIndexOfTes private static final String BAR = "bar"; private static final String FOOBAR = "foobar"; private static final String[] FOOBAR_SUB_ARRAY = new String[] {"ob", "ba"}; - + + /** + * Supplementary character U+20000 + * See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/ + */ + private static final String CharU20000 = "\uD840\uDC00"; + + /** + * Supplementary character U+20001 + * See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/ + */ + private static final String CharU20001 = "\uD840\uDC01"; + public StringUtilsEqualsIndexOfTest(String name) { super(name); } @@ -643,4 +655,45 @@ public class StringUtilsEqualsIndexOfTes assertEquals(true, StringUtils.containsNone(str3, chars3)); } + /** + * See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/ + */ + public void testContainsStringWithSupplementaryChars() { + assertEquals(true, StringUtils.contains(CharU20000 + CharU20001, CharU20000)); + assertEquals(true, StringUtils.contains(CharU20000 + CharU20001, CharU20001)); + assertEquals(true, StringUtils.contains(CharU20000, CharU20000)); + assertEquals(false, StringUtils.contains(CharU20000, CharU20001)); + } + + /** + * See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/ + */ + public void testContainsAnyStringWithSupplementaryChars() { + assertEquals(true, StringUtils.containsAny(CharU20000 + CharU20001, CharU20000)); + assertEquals(true, StringUtils.containsAny(CharU20000 + CharU20001, CharU20001)); + assertEquals(true, StringUtils.containsAny(CharU20000, CharU20000)); + // Sanity check: + assertEquals(-1, CharU20000.indexOf(CharU20001)); + assertEquals(0, CharU20000.indexOf(CharU20001.charAt(0))); + assertEquals(-1, CharU20000.indexOf(CharU20001.charAt(1))); + // Test: + assertEquals(false, StringUtils.containsAny(CharU20000, CharU20001)); + assertEquals(false, StringUtils.containsAny(CharU20001, CharU20000)); + } + + /** + * See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/ + */ + public void testContainsAnyCharArrayWithSupplementaryChars() { + assertEquals(true, StringUtils.containsAny(CharU20000 + CharU20001, CharU20000.toCharArray())); + assertEquals(true, StringUtils.containsAny(CharU20000 + CharU20001, CharU20001.toCharArray())); + assertEquals(true, StringUtils.containsAny(CharU20000, CharU20000.toCharArray())); + // Sanity check: + assertEquals(-1, CharU20000.indexOf(CharU20001)); + assertEquals(0, CharU20000.indexOf(CharU20001.charAt(0))); + assertEquals(-1, CharU20000.indexOf(CharU20001.charAt(1))); + // Test: + assertEquals(false, StringUtils.containsAny(CharU20000, CharU20001.toCharArray())); + assertEquals(false, StringUtils.containsAny(CharU20001, CharU20000.toCharArray())); + } }