Author: jcarman Date: Fri Aug 6 01:08:57 2010 New Revision: 982844 URL: http://svn.apache.org/viewvc?rev=982844&view=rev Log: LANG-640: Add normalizeSpace to StringUtils
Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java Modified: commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java?rev=982844&r1=982843&r2=982844&view=diff ============================================================================== --- commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java (original) +++ commons/proper/lang/trunk/src/main/java/org/apache/commons/lang3/StringUtils.java Fri Aug 6 01:08:57 2010 @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Locale; +import java.util.regex.Pattern; /** * <p>Operations on {...@link java.lang.String} that are @@ -157,6 +158,11 @@ public class StringUtils { private static final int PAD_LIMIT = 8192; /** + * A regex pattern for recognizing blocks of whitespace characters. + */ + private static final Pattern WHITESPACE_BLOCK = Pattern.compile("\\s+"); + + /** * <p><code>StringUtils</code> instances should NOT be constructed in * standard programming. Instead, the class should be used as * <code>StringUtils.trim(" foo ");</code>.</p> @@ -6257,4 +6263,52 @@ public class StringUtils { int strOffset = str.length() - suffix.length(); return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length()); } + + /** + * <p> + * Similar to <a + * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize + * -space</a> + * </p> + * <p> + * The function returns the argument string with whitespace normalized by using + * <code>{...@link #trim(String)}</code> to remove leading and trailing whitespace + * and then replacing sequences of whitespace characters by a single space. + * </p> + * In XML Whitespace characters are the same as those allowed by the <a + * href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+ + * <p> + * Java's regexp pattern \s defines whitespace as [ \t\n\x0B\f\r] + * <p> + * For reference: + * <ul> + * <li>\x0B = vertical tab</li> + * <li>\f = #xC = form feed</li> + * <li>#x20 = space</li> + * <li>#x9 = \t</li> + * <li>#xA = \n</li> + * <li>#xD = \r</li> + * </ul> + * </p> + * <p> + * The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also + * normalize. Additonally <code>{...@link #trim(String)}</code> removes control characters (char <= 32) from both + * ends of this String. + * </p> + * + * @see Pattern + * @see #trim(String) + * @see <a + * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize-space</a> + * @param str the source String to normalize whitespaces from, may be null + * @return the modified string with whitespace normalized, <code>null</code> if null String input + * + * @since 3.0 + */ + public static String normalizeSpace(String str) { + if(str == null) { + return null; + } + return WHITESPACE_BLOCK.matcher(trim(str)).replaceAll(" "); + } } Modified: commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java URL: http://svn.apache.org/viewvc/commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java?rev=982844&r1=982843&r2=982844&view=diff ============================================================================== --- commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java (original) +++ commons/proper/lang/trunk/src/test/java/org/apache/commons/lang3/StringUtilsTest.java Fri Aug 6 01:08:57 2010 @@ -1855,4 +1855,17 @@ public class StringUtilsTest extends Tes assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"})); } + public void testNormalizeSpace() { + assertEquals(null, StringUtils.normalizeSpace(null)); + assertEquals("", StringUtils.normalizeSpace("")); + assertEquals("", StringUtils.normalizeSpace(" ")); + assertEquals("", StringUtils.normalizeSpace("\t")); + assertEquals("", StringUtils.normalizeSpace("\n")); + assertEquals("", StringUtils.normalizeSpace("\u000B")); + assertEquals("", StringUtils.normalizeSpace("\f")); + assertEquals("", StringUtils.normalizeSpace("\r")); + assertEquals("a", StringUtils.normalizeSpace(" a ")); + assertEquals("a b c", StringUtils.normalizeSpace(" a b c ")); + assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r b\u000B c\n")); + } }