Repository: commons-lang Updated Branches: refs/heads/master 855a52385 -> 600eb9eb9
LANG-1299 - Add method for converting string to an array of code points Project: http://git-wip-us.apache.org/repos/asf/commons-lang/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-lang/commit/600eb9eb Tree: http://git-wip-us.apache.org/repos/asf/commons-lang/tree/600eb9eb Diff: http://git-wip-us.apache.org/repos/asf/commons-lang/diff/600eb9eb Branch: refs/heads/master Commit: 600eb9eb99dc5c03b047e3c81b49ee4769787c6a Parents: 855a523 Author: duncan <dun...@wortharead.com> Authored: Sun Dec 18 09:53:26 2016 +0000 Committer: duncan <dun...@wortharead.com> Committed: Sun Dec 18 09:53:26 2016 +0000 ---------------------------------------------------------------------- src/changes/changes.xml | 1 + .../org/apache/commons/lang3/StringUtils.java | 35 ++++++++++++++++++++ .../apache/commons/lang3/StringUtilsTest.java | 24 +++++++++----- 3 files changed, 52 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-lang/blob/600eb9eb/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 12732d7..7aad38f 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -46,6 +46,7 @@ The <action> type attribute can be add,update,fix,remove. <body> <release version="3.6" date="2016-MM-DD" description="TBD"> + <action issue="LANG-1299" type="add" dev="djones">Add method for converting string to an array of code points</action> <action issue="LANG-1286" type="fix" dev="djones">RandomStringUtils random method can overflow and return characters outside of specified range</action> <action issue="LANG-660" type="add" dev="djones">Add methods to insert arrays into arrays at an index</action> <action issue="LANG-1292" type="fix" dev="djones">WordUtils.wrap throws StringIndexOutOfBoundsException</action> http://git-wip-us.apache.org/repos/asf/commons-lang/blob/600eb9eb/src/main/java/org/apache/commons/lang3/StringUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java b/src/main/java/org/apache/commons/lang3/StringUtils.java index 5d94ef4..1a0325e 100644 --- a/src/main/java/org/apache/commons/lang3/StringUtils.java +++ b/src/main/java/org/apache/commons/lang3/StringUtils.java @@ -9104,4 +9104,39 @@ public class StringUtils { return str; } + + + /** + * <p>Converts a {@code CharSequence} into an array of code points.</p> + * + * <p>Valid pairs of surrogate code units will be converted into a single supplementary + * code point. Isolated surrogate code units (i.e. a high surrogate not followed by a low surrogate or + * a low surrogate not preceeded by a high surrogate) will be returned as-is.</p> + * + * <pre> + * StringUtils.toCodePoints(null) = null + * StringUtils.toCodePoints("") = [] // empty array + * </pre> + * + * @param str the character sequence to convert + * @return an array of code points + * @since 3.6 + */ + public static int[] toCodePoints(CharSequence str) { + if (str == null) { + return null; + } + if (str.length() == 0) { + return ArrayUtils.EMPTY_INT_ARRAY; + } + + String s = str.toString(); + int[] result = new int[s.codePointCount(0, s.length())]; + int index = 0; + for (int i = 0; i < result.length; i++) { + result[i] = s.codePointAt(index); + index += Character.charCount(result[i]); + } + return result; + } } http://git-wip-us.apache.org/repos/asf/commons-lang/blob/600eb9eb/src/test/java/org/apache/commons/lang3/StringUtilsTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java index 67707a8..1da6b8d 100644 --- a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java +++ b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java @@ -16,14 +16,7 @@ */ package org.apache.commons.lang3; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.Assert.*; import java.io.UnsupportedEncodingException; import java.lang.reflect.Constructor; @@ -3180,4 +3173,19 @@ public class StringUtilsTest { assertEquals("A#", StringUtils.unwrap("A#", '#')); assertEquals("ABA", StringUtils.unwrap("AABAA", 'A')); } + + @Test + public void testToCodePoints() throws Exception { + final int orphanedHighSurrogate = 0xD801; + final int orphanedLowSurrogate = 0xDC00; + final int supplementary = 0x2070E; + + final int[] codePoints = {'a', orphanedHighSurrogate, 'b','c', supplementary, + 'd', orphanedLowSurrogate, 'e'}; + final String s = new String(codePoints, 0, codePoints.length); + assertArrayEquals(codePoints, StringUtils.toCodePoints(s)); + + assertNull(StringUtils.toCodePoints(null)); + assertArrayEquals(ArrayUtils.EMPTY_INT_ARRAY, StringUtils.toCodePoints("")); + } }