[07/14] [text] TEXT-23: Adding remainder of the o.a.c.l.text package

chtompki Sun, 13 Nov 2016 12:34:38 -0800

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/StrTokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StrTokenizer.java 
b/src/main/java/org/apache/commons/text/StrTokenizer.java
new file mode 100644
index 0000000..a980bf9
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/StrTokenizer.java
@@ -0,0 +1,1116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Tokenizes a string based based on delimiters (separators)
+ * and supporting quoting and ignored character concepts.
+ * <p>
+ * This class can split a String into many smaller strings. It aims
+ * to do a similar job to {@link java.util.StringTokenizer StringTokenizer},
+ * however it offers much more control and flexibility including implementing
+ * the <code>ListIterator</code> interface. By default, it is set up
+ * like <code>StringTokenizer</code>.
+ * <p>
+ * The input String is split into a number of <i>tokens</i>.
+ * Each token is separated from the next String by a <i>delimiter</i>.
+ * One or more delimiter characters must be specified.
+ * <p>
+ * Each token may be surrounded by quotes.
+ * The <i>quote</i> matcher specifies the quote character(s).
+ * A quote may be escaped within a quoted section by duplicating itself.
+ * <p>
+ * Between each token and the delimiter are potentially characters that need 
trimming.
+ * The <i>trimmer</i> matcher specifies these characters.
+ * One usage might be to trim whitespace characters.
+ * <p>
+ * At any point outside the quotes there might potentially be invalid 
characters.
+ * The <i>ignored</i> matcher specifies these characters to be removed.
+ * One usage might be to remove new line characters.
+ * <p>
+ * Empty tokens may be removed or returned as null.
+ * <pre>
+ * "a,b,c"         - Three tokens "a","b","c"   (comma delimiter)
+ * " a, b , c "    - Three tokens "a","b","c"   (default CSV processing trims 
whitespace)
+ * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)
+ * </pre>
+ * <p>
+ *
+ * This tokenizer has the following properties and options:
+ *
+ * <table summary="Tokenizer Properties">
+ *  <tr>
+ *   <th>Property</th><th>Type</th><th>Default</th>
+ *  </tr>
+ *  <tr>
+ *   <td>delim</td><td>CharSetMatcher</td><td>{ \t\n\r\f}</td>
+ *  </tr>
+ *  <tr>
+ *   <td>quote</td><td>NoneMatcher</td><td>{}</td>
+ *  </tr>
+ *  <tr>
+ *   <td>ignore</td><td>NoneMatcher</td><td>{}</td>
+ *  </tr>
+ *  <tr>
+ *   <td>emptyTokenAsNull</td><td>boolean</td><td>false</td>
+ *  </tr>
+ *  <tr>
+ *   <td>ignoreEmptyTokens</td><td>boolean</td><td>true</td>
+ *  </tr>
+ * </table>
+ */
+public class StrTokenizer implements ListIterator<String>, Cloneable {
+
+    private static final StrTokenizer CSV_TOKENIZER_PROTOTYPE;
+    private static final StrTokenizer TSV_TOKENIZER_PROTOTYPE;
+    static {
+        CSV_TOKENIZER_PROTOTYPE = new StrTokenizer();
+        CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.commaMatcher());
+        
CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher());
+        CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher());
+        CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher());
+        CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
+        CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
+
+        TSV_TOKENIZER_PROTOTYPE = new StrTokenizer();
+        TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.tabMatcher());
+        
TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher());
+        TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher());
+        TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher());
+        TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
+        TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
+    }
+
+    /** The text to work on. */
+    private char chars[];
+    /** The parsed tokens */
+    private String tokens[];
+    /** The current iteration position */
+    private int tokenPos;
+
+    /** The delimiter matcher */
+    private StrMatcher delimMatcher = StrMatcher.splitMatcher();
+    /** The quote matcher */
+    private StrMatcher quoteMatcher = StrMatcher.noneMatcher();
+    /** The ignored matcher */
+    private StrMatcher ignoredMatcher = StrMatcher.noneMatcher();
+    /** The trimmer matcher */
+    private StrMatcher trimmerMatcher = StrMatcher.noneMatcher();
+
+    /** Whether to return empty tokens as null */
+    private boolean emptyAsNull = false;
+    /** Whether to ignore empty tokens */
+    private boolean ignoreEmptyTokens = true;
+
+    //-----------------------------------------------------------------------
+
+    /**
+     * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
+     *
+     * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
+     */
+    private static StrTokenizer getCSVClone() {
+        return (StrTokenizer) CSV_TOKENIZER_PROTOTYPE.clone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value strings
+     * initializing it with the given input.  The default for CSV processing
+     * will be trim whitespace from both ends (which can be overridden with
+     * the setTrimmer method).
+     * <p>
+     * You must call a "reset" method to set the string which you want to 
parse.
+     * @return a new tokenizer instance which parses Comma Separated Value 
strings
+     */
+    public static StrTokenizer getCSVInstance() {
+        return getCSVClone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value strings
+     * initializing it with the given input.  The default for CSV processing
+     * will be trim whitespace from both ends (which can be overridden with
+     * the setTrimmer method).
+     *
+     * @param input  the text to parse
+     * @return a new tokenizer instance which parses Comma Separated Value 
strings
+     */
+    public static StrTokenizer getCSVInstance(final String input) {
+        final StrTokenizer tok = getCSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value strings
+     * initializing it with the given input.  The default for CSV processing
+     * will be trim whitespace from both ends (which can be overridden with
+     * the setTrimmer method).
+     *
+     * @param input  the text to parse
+     * @return a new tokenizer instance which parses Comma Separated Value 
strings
+     */
+    public static StrTokenizer getCSVInstance(final char[] input) {
+        final StrTokenizer tok = getCSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
+     *
+     * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
+     */
+    private static StrTokenizer getTSVClone() {
+        return (StrTokenizer) TSV_TOKENIZER_PROTOTYPE.clone();
+    }
+
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings.
+     * The default for CSV processing will be trim whitespace from both ends
+     * (which can be overridden with the setTrimmer method).
+     * <p>
+     * You must call a "reset" method to set the string which you want to 
parse.
+     * @return a new tokenizer instance which parses Tab Separated Value 
strings.
+     */
+    public static StrTokenizer getTSVInstance() {
+        return getTSVClone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings.
+     * The default for CSV processing will be trim whitespace from both ends
+     * (which can be overridden with the setTrimmer method).
+     * @param input  the string to parse
+     * @return a new tokenizer instance which parses Tab Separated Value 
strings.
+     */
+    public static StrTokenizer getTSVInstance(final String input) {
+        final StrTokenizer tok = getTSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings.
+     * The default for CSV processing will be trim whitespace from both ends
+     * (which can be overridden with the setTrimmer method).
+     * @param input  the string to parse
+     * @return a new tokenizer instance which parses Tab Separated Value 
strings.
+     */
+    public static StrTokenizer getTSVInstance(final char[] input) {
+        final StrTokenizer tok = getTSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and formfeed
+     * as per StringTokenizer, but with no text to tokenize.
+     * <p>
+     * This constructor is normally used with {@link #reset(String)}.
+     */
+    public StrTokenizer() {
+        super();
+        this.chars = null;
+    }
+
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and formfeed
+     * as per StringTokenizer.
+     *
+     * @param input  the string which is to be parsed
+     */
+    public StrTokenizer(final String input) {
+        super();
+        if (input != null) {
+            chars = input.toCharArray();
+        } else {
+            chars = null;
+        }
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter character
+     */
+    public StrTokenizer(final String input, final char delim) {
+        this(input);
+        setDelimiterChar(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter string.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter string
+     */
+    public StrTokenizer(final String input, final String delim) {
+        this(input);
+        setDelimiterString(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter matcher
+     */
+    public StrTokenizer(final String input, final StrMatcher delim) {
+        this(input);
+        setDelimiterMatcher(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character
+     * and handling quotes using the specified quote character.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter character
+     * @param quote  the field quoted string character
+     */
+    public StrTokenizer(final String input, final char delim, final char 
quote) {
+        this(input, delim);
+        setQuoteChar(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher
+     * and handling quotes using the specified quote matcher.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter matcher
+     * @param quote  the field quoted string matcher
+     */
+    public StrTokenizer(final String input, final StrMatcher delim, final 
StrMatcher quote) {
+        this(input, delim);
+        setQuoteMatcher(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and formfeed
+     * as per StringTokenizer.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     */
+    public StrTokenizer(final char[] input) {
+        super();
+        if (input == null) {
+            this.chars = null;
+        } else {
+            this.chars = input.clone();
+        }
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified character.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim the field delimiter character
+     */
+    public StrTokenizer(final char[] input, final char delim) {
+        this(input);
+        setDelimiterChar(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified string.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim the field delimiter string
+     */
+    public StrTokenizer(final char[] input, final String delim) {
+        this(input);
+        setDelimiterString(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim  the field delimiter matcher
+     */
+    public StrTokenizer(final char[] input, final StrMatcher delim) {
+        this(input);
+        setDelimiterMatcher(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character
+     * and handling quotes using the specified quote character.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim  the field delimiter character
+     * @param quote  the field quoted string character
+     */
+    public StrTokenizer(final char[] input, final char delim, final char 
quote) {
+        this(input, delim);
+        setQuoteChar(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher
+     * and handling quotes using the specified quote matcher.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim  the field delimiter character
+     * @param quote  the field quoted string character
+     */
+    public StrTokenizer(final char[] input, final StrMatcher delim, final 
StrMatcher quote) {
+        this(input, delim);
+        setQuoteMatcher(quote);
+    }
+
+    // API
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the number of tokens found in the String.
+     *
+     * @return the number of matched tokens
+     */
+    public int size() {
+        checkTokenized();
+        return tokens.length;
+    }
+
+    /**
+     * Gets the next token from the String.
+     * Equivalent to {@link #next()} except it returns null rather than
+     * throwing {@link NoSuchElementException} when no tokens remain.
+     *
+     * @return the next sequential token, or null when no more tokens are found
+     */
+    public String nextToken() {
+        if (hasNext()) {
+            return tokens[tokenPos++];
+        }
+        return null;
+    }
+
+    /**
+     * Gets the previous token from the String.
+     *
+     * @return the previous sequential token, or null when no more tokens are 
found
+     */
+    public String previousToken() {
+        if (hasPrevious()) {
+            return tokens[--tokenPos];
+        }
+        return null;
+    }
+
+    /**
+     * Gets a copy of the full token list as an independent modifiable array.
+     *
+     * @return the tokens as a String array
+     */
+    public String[] getTokenArray() {
+        checkTokenized();
+        return tokens.clone();
+    }
+
+    /**
+     * Gets a copy of the full token list as an independent modifiable list.
+     *
+     * @return the tokens as a String array
+     */
+    public List<String> getTokenList() {
+        checkTokenized();
+        final List<String> list = new ArrayList<>(tokens.length);
+        for (final String element : tokens) {
+            list.add(element);
+        }
+        return list;
+    }
+
+    /**
+     * Resets this tokenizer, forgetting all parsing and iteration already 
completed.
+     * <p>
+     * This method allows the same tokenizer to be reused for the same String.
+     *
+     * @return this, to enable chaining
+     */
+    public org.apache.commons.text.StrTokenizer reset() {
+        tokenPos = 0;
+        tokens = null;
+        return this;
+    }
+
+    /**
+     * Reset this tokenizer, giving it a new input string to parse.
+     * In this manner you can re-use a tokenizer with the same settings
+     * on multiple input lines.
+     *
+     * @param input  the new string to tokenize, null sets no text to parse
+     * @return this, to enable chaining
+     */
+    public org.apache.commons.text.StrTokenizer reset(final String input) {
+        reset();
+        if (input != null) {
+            this.chars = input.toCharArray();
+        } else {
+            this.chars = null;
+        }
+        return this;
+    }
+
+    /**
+     * Reset this tokenizer, giving it a new input string to parse.
+     * In this manner you can re-use a tokenizer with the same settings
+     * on multiple input lines.
+     *
+     * @param input  the new character array to tokenize, not cloned, null 
sets no text to parse
+     * @return this, to enable chaining
+     */
+    public org.apache.commons.text.StrTokenizer reset(final char[] input) {
+        reset();
+        if (input != null) {
+            this.chars = input;
+        } else {
+            this.chars = null;
+        }
+        return this;
+    }
+
+    // ListIterator
+    //-----------------------------------------------------------------------
+    /**
+     * Checks whether there are any more tokens.
+     *
+     * @return true if there are more tokens
+     */
+    @Override
+    public boolean hasNext() {
+        checkTokenized();
+        return tokenPos < tokens.length;
+    }
+
+    /**
+     * Gets the next token.
+     *
+     * @return the next String token
+     * @throws NoSuchElementException if there are no more elements
+     */
+    @Override
+    public String next() {
+        if (hasNext()) {
+            return tokens[tokenPos++];
+        }
+        throw new NoSuchElementException();
+    }
+
+    /**
+     * Gets the index of the next token to return.
+     *
+     * @return the next token index
+     */
+    @Override
+    public int nextIndex() {
+        return tokenPos;
+    }
+
+    /**
+     * Checks whether there are any previous tokens that can be iterated to.
+     *
+     * @return true if there are previous tokens
+     */
+    @Override
+    public boolean hasPrevious() {
+        checkTokenized();
+        return tokenPos > 0;
+    }
+
+    /**
+     * Gets the token previous to the last returned token.
+     *
+     * @return the previous token
+     */
+    @Override
+    public String previous() {
+        if (hasPrevious()) {
+            return tokens[--tokenPos];
+        }
+        throw new NoSuchElementException();
+    }
+
+    /**
+     * Gets the index of the previous token.
+     *
+     * @return the previous token index
+     */
+    @Override
+    public int previousIndex() {
+        return tokenPos - 1;
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     *
+     * @throws UnsupportedOperationException always
+     */
+    @Override
+    public void remove() {
+        throw new UnsupportedOperationException("remove() is unsupported");
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     * @param obj this parameter ignored.
+     * @throws UnsupportedOperationException always
+     */
+    @Override
+    public void set(final String obj) {
+        throw new UnsupportedOperationException("set() is unsupported");
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     * @param obj this parameter ignored.
+     * @throws UnsupportedOperationException always
+     */
+    @Override
+    public void add(final String obj) {
+        throw new UnsupportedOperationException("add() is unsupported");
+    }
+
+    // Implementation
+    //-----------------------------------------------------------------------
+    /**
+     * Checks if tokenization has been done, and if not then do it.
+     */
+    private void checkTokenized() {
+        if (tokens == null) {
+            if (chars == null) {
+                // still call tokenize as subclass may do some work
+                final List<String> split = tokenize(null, 0, 0);
+                tokens = split.toArray(new String[split.size()]);
+            } else {
+                final List<String> split = tokenize(chars, 0, chars.length);
+                tokens = split.toArray(new String[split.size()]);
+            }
+        }
+    }
+
+    /**
+     * Internal method to performs the tokenization.
+     * <p>
+     * Most users of this class do not need to call this method. This method
+     * will be called automatically by other (public) methods when required.
+     * <p>
+     * This method exists to allow subclasses to add code before or after the
+     * tokenization. For example, a subclass could alter the character array,
+     * offset or count to be parsed, or call the tokenizer multiple times on
+     * multiple strings. It is also be possible to filter the results.
+     * <p>
+     * <code>StrTokenizer</code> will always pass a zero offset and a count
+     * equal to the length of the array to this method, however a subclass
+     * may pass other values, or even an entirely different array.
+     *
+     * @param srcChars  the character array being tokenized, may be null
+     * @param offset  the start position within the character array, must be 
valid
+     * @param count  the number of characters to tokenize, must be valid
+     * @return the modifiable list of String tokens, unmodifiable if null 
array or zero count
+     */
+    protected List<String> tokenize(final char[] srcChars, final int offset, 
final int count) {
+        if (srcChars == null || count == 0) {
+            return Collections.emptyList();
+        }
+        final StrBuilder buf = new StrBuilder();
+        final List<String> tokenList = new ArrayList<>();
+        int pos = offset;
+
+        // loop around the entire buffer
+        while (pos >= 0 && pos < count) {
+            // find next token
+            pos = readNextToken(srcChars, pos, count, buf, tokenList);
+
+            // handle case where end of string is a delimiter
+            if (pos >= count) {
+                addToken(tokenList, "");
+            }
+        }
+        return tokenList;
+    }
+
+    /**
+     * Adds a token to a list, paying attention to the parameters we've set.
+     *
+     * @param list  the list to add to
+     * @param tok  the token to add
+     */
+    private void addToken(final List<String> list, String tok) {
+        if (tok == null || tok.length() == 0) {
+            if (isIgnoreEmptyTokens()) {
+                return;
+            }
+            if (isEmptyTokenAsNull()) {
+                tok = null;
+            }
+        }
+        list.add(tok);
+    }
+
+    /**
+     * Reads character by character through the String to get the next token.
+     *
+     * @param srcChars  the character array being tokenized
+     * @param start  the first character of field
+     * @param len  the length of the character array being tokenized
+     * @param workArea  a temporary work area
+     * @param tokenList  the list of parsed tokens
+     * @return the starting position of the next field (the character
+     *  immediately after the delimiter), or -1 if end of string found
+     */
+    private int readNextToken(final char[] srcChars, int start, final int len, 
final StrBuilder workArea, final List<String> tokenList) {
+        // skip all leading whitespace, unless it is the
+        // field delimiter or the quote character
+        while (start < len) {
+            final int removeLen = Math.max(
+                    getIgnoredMatcher().isMatch(srcChars, start, start, len),
+                    getTrimmerMatcher().isMatch(srcChars, start, start, len));
+            if (removeLen == 0 ||
+                getDelimiterMatcher().isMatch(srcChars, start, start, len) > 0 
||
+                getQuoteMatcher().isMatch(srcChars, start, start, len) > 0) {
+                break;
+            }
+            start += removeLen;
+        }
+
+        // handle reaching end
+        if (start >= len) {
+            addToken(tokenList, "");
+            return -1;
+        }
+
+        // handle empty token
+        final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, 
start, len);
+        if (delimLen > 0) {
+            addToken(tokenList, "");
+            return start + delimLen;
+        }
+
+        // handle found token
+        final int quoteLen = getQuoteMatcher().isMatch(srcChars, start, start, 
len);
+        if (quoteLen > 0) {
+            return readWithQuotes(srcChars, start + quoteLen, len, workArea, 
tokenList, start, quoteLen);
+        }
+        return readWithQuotes(srcChars, start, len, workArea, tokenList, 0, 0);
+    }
+
+    /**
+     * Reads a possibly quoted string token.
+     *
+     * @param srcChars  the character array being tokenized
+     * @param start  the first character of field
+     * @param len  the length of the character array being tokenized
+     * @param workArea  a temporary work area
+     * @param tokenList  the list of parsed tokens
+     * @param quoteStart  the start position of the matched quote, 0 if no 
quoting
+     * @param quoteLen  the length of the matched quote, 0 if no quoting
+     * @return the starting position of the next field (the character
+     *  immediately after the delimiter, or if end of string found,
+     *  then the length of string
+     */
+    private int readWithQuotes(final char[] srcChars, final int start, final 
int len, final StrBuilder workArea,
+                               final List<String> tokenList, final int 
quoteStart, final int quoteLen) {
+        // Loop until we've found the end of the quoted
+        // string or the end of the input
+        workArea.clear();
+        int pos = start;
+        boolean quoting = quoteLen > 0;
+        int trimStart = 0;
+
+        while (pos < len) {
+            // quoting mode can occur several times throughout a string
+            // we must switch between quoting and non-quoting until we
+            // encounter a non-quoted delimiter, or end of string
+            if (quoting) {
+                // In quoting mode
+
+                // If we've found a quote character, see if it's
+                // followed by a second quote.  If so, then we need
+                // to actually put the quote character into the token
+                // rather than end the token.
+                if (isQuote(srcChars, pos, len, quoteStart, quoteLen)) {
+                    if (isQuote(srcChars, pos + quoteLen, len, quoteStart, 
quoteLen)) {
+                        // matched pair of quotes, thus an escaped quote
+                        workArea.append(srcChars, pos, quoteLen);
+                        pos += quoteLen * 2;
+                        trimStart = workArea.size();
+                        continue;
+                    }
+
+                    // end of quoting
+                    quoting = false;
+                    pos += quoteLen;
+                    continue;
+                }
+
+                // copy regular character from inside quotes
+                workArea.append(srcChars[pos++]);
+                trimStart = workArea.size();
+
+            } else {
+                // Not in quoting mode
+
+                // check for delimiter, and thus end of token
+                final int delimLen = getDelimiterMatcher().isMatch(srcChars, 
pos, start, len);
+                if (delimLen > 0) {
+                    // return condition when end of token found
+                    addToken(tokenList, workArea.substring(0, trimStart));
+                    return pos + delimLen;
+                }
+
+                // check for quote, and thus back into quoting mode
+                if (quoteLen > 0 && isQuote(srcChars, pos, len, quoteStart, 
quoteLen)) {
+                    quoting = true;
+                    pos += quoteLen;
+                    continue;
+                }
+
+                // check for ignored (outside quotes), and ignore
+                final int ignoredLen = getIgnoredMatcher().isMatch(srcChars, 
pos, start, len);
+                if (ignoredLen > 0) {
+                    pos += ignoredLen;
+                    continue;
+                }
+
+                // check for trimmed character
+                // don't yet know if its at the end, so copy to workArea
+                // use trimStart to keep track of trim at the end
+                final int trimmedLen = getTrimmerMatcher().isMatch(srcChars, 
pos, start, len);
+                if (trimmedLen > 0) {
+                    workArea.append(srcChars, pos, trimmedLen);
+                    pos += trimmedLen;
+                    continue;
+                }
+
+                // copy regular character from outside quotes
+                workArea.append(srcChars[pos++]);
+                trimStart = workArea.size();
+            }
+        }
+
+        // return condition when end of string found
+        addToken(tokenList, workArea.substring(0, trimStart));
+        return -1;
+    }
+
+    /**
+     * Checks if the characters at the index specified match the quote
+     * already matched in readNextToken().
+     *
+     * @param srcChars  the character array being tokenized
+     * @param pos  the position to check for a quote
+     * @param len  the length of the character array being tokenized
+     * @param quoteStart  the start position of the matched quote, 0 if no 
quoting
+     * @param quoteLen  the length of the matched quote, 0 if no quoting
+     * @return true if a quote is matched
+     */
+    private boolean isQuote(final char[] srcChars, final int pos, final int 
len, final int quoteStart, final int quoteLen) {
+        for (int i = 0; i < quoteLen; i++) {
+            if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + 
i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // Delimiter
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the field delimiter matcher.
+     *
+     * @return the delimiter matcher in use
+     */
+    public StrMatcher getDelimiterMatcher() {
+        return this.delimMatcher;
+    }
+
+    /**
+     * Sets the field delimiter matcher.
+     * <p>
+     * The delimitier is used to separate one token from another.
+     *
+     * @param delim  the delimiter matcher to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setDelimiterMatcher(final StrMatcher delim) {
+        if (delim == null) {
+            this.delimMatcher = StrMatcher.noneMatcher();
+        } else {
+            this.delimMatcher = delim;
+        }
+        return this;
+    }
+
+    /**
+     * Sets the field delimiter character.
+     *
+     * @param delim  the delimiter character to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setDelimiterChar(final char delim) {
+        return setDelimiterMatcher(StrMatcher.charMatcher(delim));
+    }
+
+    /**
+     * Sets the field delimiter string.
+     *
+     * @param delim  the delimiter string to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setDelimiterString(final String delim) {
+        return setDelimiterMatcher(StrMatcher.stringMatcher(delim));
+    }
+
+    // Quote
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the quote matcher currently in use.
+     * <p>
+     * The quote character is used to wrap data between the tokens.
+     * This enables delimiters to be entered as data.
+     * The default value is '"' (double quote).
+     *
+     * @return the quote matcher in use
+     */
+    public StrMatcher getQuoteMatcher() {
+        return quoteMatcher;
+    }
+
+    /**
+     * Set the quote matcher to use.
+     * <p>
+     * The quote character is used to wrap data between the tokens.
+     * This enables delimiters to be entered as data.
+     *
+     * @param quote  the quote matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setQuoteMatcher(final StrMatcher quote) {
+        if (quote != null) {
+            this.quoteMatcher = quote;
+        }
+        return this;
+    }
+
+    /**
+     * Sets the quote character to use.
+     * <p>
+     * The quote character is used to wrap data between the tokens.
+     * This enables delimiters to be entered as data.
+     *
+     * @param quote  the quote character to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setQuoteChar(final char quote) {
+        return setQuoteMatcher(StrMatcher.charMatcher(quote));
+    }
+
+    // Ignored
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the ignored character matcher.
+     * <p>
+     * These characters are ignored when parsing the String, unless they are
+     * within a quoted region.
+     * The default value is not to ignore anything.
+     *
+     * @return the ignored matcher in use
+     */
+    public StrMatcher getIgnoredMatcher() {
+        return ignoredMatcher;
+    }
+
+    /**
+     * Set the matcher for characters to ignore.
+     * <p>
+     * These characters are ignored when parsing the String, unless they are
+     * within a quoted region.
+     *
+     * @param ignored  the ignored matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setIgnoredMatcher(final StrMatcher ignored) {
+        if (ignored != null) {
+            this.ignoredMatcher = ignored;
+        }
+        return this;
+    }
+
+    /**
+     * Set the character to ignore.
+     * <p>
+     * This character is ignored when parsing the String, unless it is
+     * within a quoted region.
+     *
+     * @param ignored  the ignored character to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setIgnoredChar(final char ignored) {
+        return setIgnoredMatcher(StrMatcher.charMatcher(ignored));
+    }
+
+    // Trimmer
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the trimmer character matcher.
+     * <p>
+     * These characters are trimmed off on each side of the delimiter
+     * until the token or quote is found.
+     * The default value is not to trim anything.
+     *
+     * @return the trimmer matcher in use
+     */
+    public StrMatcher getTrimmerMatcher() {
+        return trimmerMatcher;
+    }
+
+    /**
+     * Sets the matcher for characters to trim.
+     * <p>
+     * These characters are trimmed off on each side of the delimiter
+     * until the token or quote is found.
+     *
+     * @param trimmer  the trimmer matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setTrimmerMatcher(final StrMatcher trimmer) {
+        if (trimmer != null) {
+            this.trimmerMatcher = trimmer;
+        }
+        return this;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Gets whether the tokenizer currently returns empty tokens as null.
+     * The default for this property is false.
+     *
+     * @return true if empty tokens are returned as null
+     */
+    public boolean isEmptyTokenAsNull() {
+        return this.emptyAsNull;
+    }
+
+    /**
+     * Sets whether the tokenizer should return empty tokens as null.
+     * The default for this property is false.
+     *
+     * @param emptyAsNull  whether empty tokens are returned as null
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) {
+        this.emptyAsNull = emptyAsNull;
+        return this;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Gets whether the tokenizer currently ignores empty tokens.
+     * The default for this property is true.
+     *
+     * @return true if empty tokens are not returned
+     */
+    public boolean isIgnoreEmptyTokens() {
+        return ignoreEmptyTokens;
+    }
+
+    /**
+     * Sets whether the tokenizer should ignore and not return empty tokens.
+     * The default for this property is true.
+     *
+     * @param ignoreEmptyTokens  whether empty tokens are not returned
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) {
+        this.ignoreEmptyTokens = ignoreEmptyTokens;
+        return this;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the String content that the tokenizer is parsing.
+     *
+     * @return the string content being parsed
+     */
+    public String getContent() {
+        if (chars == null) {
+            return null;
+        }
+        return new String(chars);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Creates a new instance of this Tokenizer. The new instance is reset so
+     * that it will be at the start of the token list.
+     * If a {@link CloneNotSupportedException} is caught, return 
<code>null</code>.
+     * 
+     * @return a new instance of this Tokenizer which has been reset.
+     */
+    @Override
+    public Object clone() {
+        try {
+            return cloneReset();
+        } catch (final CloneNotSupportedException ex) {
+            return null;
+        }
+    }
+
+    /**
+     * Creates a new instance of this Tokenizer. The new instance is reset so 
that
+     * it will be at the start of the token list.
+     * 
+     * @return a new instance of this Tokenizer which has been reset.
+     * @throws CloneNotSupportedException if there is a problem cloning
+     */
+    Object cloneReset() throws CloneNotSupportedException {
+        // this method exists to enable 100% test coverage
+        final StrTokenizer cloned = (StrTokenizer) super.clone();
+        if (cloned.chars != null) {
+            cloned.chars = cloned.chars.clone();
+        }
+        cloned.reset();
+        return cloned;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the String content that the tokenizer is parsing.
+     *
+     * @return the string content being parsed
+     */
+    @Override
+    public String toString() {
+        if (tokens == null) {
+            return "StrTokenizer[not tokenized yet]";
+        }
+        return "StrTokenizer" + getTokenList();
+    }
+
+}


http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/WordUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/WordUtils.java 
b/src/main/java/org/apache/commons/text/WordUtils.java
new file mode 100644
index 0000000..1d0085c
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/WordUtils.java
@@ -0,0 +1,733 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.lang.reflect.Array;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * <p>Operations on Strings that contain words.</p>
+ * 
+ * <p>This class tries to handle <code>null</code> input gracefully.
+ * An exception will not be thrown for a <code>null</code> input.
+ * Each method documents its behaviour in more detail.</p>
+ */
+public class WordUtils {
+
+    /**
+     * <p><code>WordUtils</code> instances should NOT be constructed in
+     * standard programming. Instead, the class should be used as
+     * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
+     *
+     * <p>This constructor is public to permit tools that require a JavaBean
+     * instance to operate.</p>
+     */
+    public WordUtils() {
+      super();
+    }
+
+    // Wrapping
+    
//--------------------------------------------------------------------------
+    /**
+     * <p>Wraps a single line of text, identifying words by <code>' 
'</code>.</p>
+     * 
+     * <p>New lines will be separated by the system property line separator.
+     * Very long words, such as URLs will <i>not</i> be wrapped.</p>
+     * 
+     * <p>Leading spaces on a new line are stripped.
+     * Trailing spaces are not stripped.</p>
+     *
+     * <table border="1" summary="Wrap Results">
+     *  <tr>
+     *   <th>input</th>
+     *   <th>wrapLength</th>
+     *   <th>result</th>
+     *  </tr>
+     *  <tr>
+     *   <td>null</td>
+     *   <td>*</td>
+     *   <td>null</td>
+     *  </tr>
+     *  <tr>
+     *   <td>""</td>
+     *   <td>*</td>
+     *   <td>""</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Here is one line of text that is going to be wrapped after 20 
columns."</td>
+     *   <td>20</td>
+     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 
columns."</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Click here to jump to the commons website - 
http://commons.apache.org";</td>
+     *   <td>20</td>
+     *   <td>"Click here to jump\nto the commons\nwebsite 
-\nhttp://commons.apache.org";</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Click here, http://commons.apache.org, to jump to the commons 
website"</td>
+     *   <td>20</td>
+     *   <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons 
website"</td>
+     *  </tr>
+     * </table>
+     *
+     * (assuming that '\n' is the systems line separator)
+     *
+     * @param str  the String to be word wrapped, may be null
+     * @param wrapLength  the column to wrap the words at, less than 1 is 
treated as 1
+     * @return a line with newlines inserted, <code>null</code> if null input
+     */
+    public static String wrap(final String str, final int wrapLength) {
+        return wrap(str, wrapLength, null, false);
+    }
+    
+    /**
+     * <p>Wraps a single line of text, identifying words by <code>' 
'</code>.</p>
+     * 
+     * <p>Leading spaces on a new line are stripped.
+     * Trailing spaces are not stripped.</p>
+     *
+     * <table border="1" summary="Wrap Results">
+     *  <tr>
+     *   <th>input</th>
+     *   <th>wrapLenght</th>
+     *   <th>newLineString</th>
+     *   <th>wrapLongWords</th>
+     *   <th>result</th>
+     *  </tr>
+     *  <tr>
+     *   <td>null</td>
+     *   <td>*</td>
+     *   <td>*</td>
+     *   <td>true/false</td>
+     *   <td>null</td>
+     *  </tr>
+     *  <tr>
+     *   <td>""</td>
+     *   <td>*</td>
+     *   <td>*</td>
+     *   <td>true/false</td>
+     *   <td>""</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Here is one line of text that is going to be wrapped after 20 
columns."</td>
+     *   <td>20</td>
+     *   <td>"\n"</td>
+     *   <td>true/false</td>
+     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 
columns."</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Here is one line of text that is going to be wrapped after 20 
columns."</td>
+     *   <td>20</td>
+     *   <td>"&lt;br /&gt;"</td>
+     *   <td>true/false</td>
+     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;to 
be wrapped after&lt;br /&gt;20 columns."</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Here is one line of text that is going to be wrapped after 20 
columns."</td>
+     *   <td>20</td>
+     *   <td>null</td>
+     *   <td>true/false</td>
+     *   <td>"Here is one line of" + systemNewLine + "text that is going" + 
systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Click here to jump to the commons website - 
http://commons.apache.org";</td>
+     *   <td>20</td>
+     *   <td>"\n"</td>
+     *   <td>false</td>
+     *   <td>"Click here to jump\nto the commons\nwebsite 
-\nhttp://commons.apache.org";</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Click here to jump to the commons website - 
http://commons.apache.org";</td>
+     *   <td>20</td>
+     *   <td>"\n"</td>
+     *   <td>true</td>
+     *   <td>"Click here to jump\nto the commons\nwebsite 
-\nhttp://commons.apach\ne.org";</td>
+     *  </tr>
+     * </table>
+     *
+     * @param str  the String to be word wrapped, may be null
+     * @param wrapLength  the column to wrap the words at, less than 1 is 
treated as 1
+     * @param newLineStr  the string to insert for a new line, 
+     *  <code>null</code> uses the system property line separator
+     * @param wrapLongWords  true if long words (such as URLs) should be 
wrapped
+     * @return a line with newlines inserted, <code>null</code> if null input
+     */
+    public static String wrap(final String str, final int wrapLength, final 
String newLineStr, final boolean wrapLongWords) {
+        return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
+    }
+
+    /**
+     * <p>Wraps a single line of text, identifying words by 
<code>wrapOn</code>.</p>
+     *
+     * <p>Leading spaces on a new line are stripped.
+     * Trailing spaces are not stripped.</p>
+     *
+     * <table border="1" summary="Wrap Results">
+     *  <tr>
+     *   <th>input</th>
+     *   <th>wrapLenght</th>
+     *   <th>newLineString</th>
+     *   <th>wrapLongWords</th>
+     *   <th>wrapOn</th>
+     *   <th>result</th>
+     *  </tr>
+     *  <tr>
+     *   <td>null</td>
+     *   <td>*</td>
+     *   <td>*</td>
+     *   <td>true/false</td>
+     *   <td>*</td>
+     *   <td>null</td>
+     *  </tr>
+     *  <tr>
+     *   <td>""</td>
+     *   <td>*</td>
+     *   <td>*</td>
+     *   <td>true/false</td>
+     *   <td>*</td>
+     *   <td>""</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Here is one line of text that is going to be wrapped after 20 
columns."</td>
+     *   <td>20</td>
+     *   <td>"\n"</td>
+     *   <td>true/false</td>
+     *   <td>" "</td>
+     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 
columns."</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Here is one line of text that is going to be wrapped after 20 
columns."</td>
+     *   <td>20</td>
+     *   <td>"&lt;br /&gt;"</td>
+     *   <td>true/false</td>
+     *   <td>" "</td>
+     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;to 
be wrapped after&lt;br /&gt;20 columns."</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Here is one line of text that is going to be wrapped after 20 
columns."</td>
+     *   <td>20</td>
+     *   <td>null</td>
+     *   <td>true/false</td>
+     *   <td>" "</td>
+     *   <td>"Here is one line of" + systemNewLine + "text that is going" + 
systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Click here to jump to the commons website - 
http://commons.apache.org";</td>
+     *   <td>20</td>
+     *   <td>"\n"</td>
+     *   <td>false</td>
+     *   <td>" "</td>
+     *   <td>"Click here to jump\nto the commons\nwebsite 
-\nhttp://commons.apache.org";</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"Click here to jump to the commons website - 
http://commons.apache.org";</td>
+     *   <td>20</td>
+     *   <td>"\n"</td>
+     *   <td>true</td>
+     *   <td>" "</td>
+     *   <td>"Click here to jump\nto the commons\nwebsite 
-\nhttp://commons.apach\ne.org";</td>
+     *  </tr>
+     *  <tr>
+     *   <td>"flammable/inflammable"</td>
+     *   <td>20</td>
+     *   <td>"\n"</td>
+     *   <td>true</td>
+     *   <td>"/"</td>
+     *   <td>"flammable\ninflammable"</td>
+     *  </tr>
+     * </table>
+     * @param str  the String to be word wrapped, may be null
+     * @param wrapLength  the column to wrap the words at, less than 1 is 
treated as 1
+     * @param newLineStr  the string to insert for a new line,
+     *  <code>null</code> uses the system property line separator
+     * @param wrapLongWords  true if long words (such as URLs) should be 
wrapped
+     * @param wrapOn regex expression to be used as a breakable characters,
+     *               if blank string is provided a space character will be used
+     * @return a line with newlines inserted, <code>null</code> if null input
+     */
+    public static String wrap(final String str, int wrapLength, String 
newLineStr, final boolean wrapLongWords, String wrapOn) {
+        if (str == null) {
+            return null;
+        }
+        if (newLineStr == null) {
+            newLineStr = System.getProperty("line.separator");
+        }
+        if (wrapLength < 1) {
+            wrapLength = 1;
+        }
+        if (wrapOn == null || wrapOn.length() == 0 || wrapOn.trim().length() 
== 0) {
+            wrapOn = " ";
+        }
+        final Pattern patternToWrapOn = Pattern.compile(wrapOn);
+        final int inputLineLength = str.length();
+        int offset = 0;
+        final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 
32);
+
+        while (offset < inputLineLength) {
+            int spaceToWrapAt = -1;
+            Matcher matcher = patternToWrapOn.matcher(str.substring(offset, 
Math
+                    .min(offset + wrapLength + 1, inputLineLength)));
+            if (matcher.find()) {
+                if (matcher.start() == 0) {
+                    offset += matcher.end();
+                    continue;
+                }else {
+                    spaceToWrapAt = matcher.start();
+                }
+            }
+
+            // only last line without leading spaces is left
+            if(inputLineLength - offset <= wrapLength) {
+                break;
+            }
+
+            while(matcher.find()){
+                spaceToWrapAt = matcher.start() + offset;
+            }
+
+            if (spaceToWrapAt >= offset) {
+                // normal case
+                wrappedLine.append(str.substring(offset, spaceToWrapAt));
+                wrappedLine.append(newLineStr);
+                offset = spaceToWrapAt + 1;
+
+            } else {
+                // really long word or URL
+                if (wrapLongWords) {
+                    // wrap really long word one line at a time
+                    wrappedLine.append(str.substring(offset, wrapLength + 
offset));
+                    wrappedLine.append(newLineStr);
+                    offset += wrapLength;
+                } else {
+                    // do not wrap really long word, just extend beyond limit
+                    matcher = patternToWrapOn.matcher(str.substring(offset + 
wrapLength));
+                    if (matcher.find()) {
+                        spaceToWrapAt = matcher.start() + offset + wrapLength;
+                    }
+
+                    if (spaceToWrapAt >= 0) {
+                        wrappedLine.append(str.substring(offset, 
spaceToWrapAt));
+                        wrappedLine.append(newLineStr);
+                        offset = spaceToWrapAt + 1;
+                    } else {
+                        wrappedLine.append(str.substring(offset));
+                        offset = inputLineLength;
+                    }
+                }
+            }
+        }
+
+        // Whatever is left in line is short enough to just pass through
+        wrappedLine.append(str.substring(offset));
+
+        return wrappedLine.toString();
+    }
+
+    // Capitalizing
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Capitalizes all the whitespace separated words in a String.
+     * Only the first character of each word is changed. To convert the 
+     * rest of each word to lowercase at the same time, 
+     * use {@link #capitalizeFully(String)}.</p>
+     *
+     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
+     * A <code>null</code> input String returns <code>null</code>.
+     * Capitalization uses the Unicode title case, normally equivalent to
+     * upper case.</p>
+     *
+     * <pre>
+     * WordUtils.capitalize(null)        = null
+     * WordUtils.capitalize("")          = ""
+     * WordUtils.capitalize("i am FINE") = "I Am FINE"
+     * </pre>
+     * 
+     * @param str  the String to capitalize, may be null
+     * @return capitalized String, <code>null</code> if null String input
+     * @see #uncapitalize(String)
+     * @see #capitalizeFully(String)
+     */
+    public static String capitalize(final String str) {
+        return capitalize(str, null);
+    }
+
+    /**
+     * <p>Capitalizes all the delimiter separated words in a String.
+     * Only the first character of each word is changed. To convert the 
+     * rest of each word to lowercase at the same time, 
+     * use {@link #capitalizeFully(String, char[])}.</p>
+     *
+     * <p>The delimiters represent a set of characters understood to separate 
words.
+     * The first string character and the first non-delimiter character after a
+     * delimiter will be capitalized. </p>
+     *
+     * <p>A <code>null</code> input String returns <code>null</code>.
+     * Capitalization uses the Unicode title case, normally equivalent to
+     * upper case.</p>
+     *
+     * <pre>
+     * WordUtils.capitalize(null, *)            = null
+     * WordUtils.capitalize("", *)              = ""
+     * WordUtils.capitalize(*, new char[0])     = *
+     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
+     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
+     * </pre>
+     * 
+     * @param str  the String to capitalize, may be null
+     * @param delimiters  set of characters to determine capitalization, null 
means whitespace
+     * @return capitalized String, <code>null</code> if null String input
+     * @see #uncapitalize(String)
+     * @see #capitalizeFully(String)
+     * @since 2.1
+     */
+    public static String capitalize(final String str, final char... 
delimiters) {
+        final int delimLen = delimiters == null ? -1 : delimiters.length;
+        if (str == null || str.length() == 0 || delimLen == 0) {
+            return str;
+        }
+        final char[] buffer = str.toCharArray();
+        boolean capitalizeNext = true;
+        for (int i = 0; i < buffer.length; i++) {
+            final char ch = buffer[i];
+            if (isDelimiter(ch, delimiters)) {
+                capitalizeNext = true;
+            } else if (capitalizeNext) {
+                buffer[i] = Character.toTitleCase(ch);
+                capitalizeNext = false;
+            }
+        }
+        return new String(buffer);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Converts all the whitespace separated words in a String into 
capitalized words, 
+     * that is each word is made up of a titlecase character and then a series 
of 
+     * lowercase characters.  </p>
+     *
+     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
+     * A <code>null</code> input String returns <code>null</code>.
+     * Capitalization uses the Unicode title case, normally equivalent to
+     * upper case.</p>
+     *
+     * <pre>
+     * WordUtils.capitalizeFully(null)        = null
+     * WordUtils.capitalizeFully("")          = ""
+     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
+     * </pre>
+     * 
+     * @param str  the String to capitalize, may be null
+     * @return capitalized String, <code>null</code> if null String input
+     */
+    public static String capitalizeFully(final String str) {
+        return capitalizeFully(str, null);
+    }
+
+    /**
+     * <p>Converts all the delimiter separated words in a String into 
capitalized words, 
+     * that is each word is made up of a titlecase character and then a series 
of 
+     * lowercase characters. </p>
+     *
+     * <p>The delimiters represent a set of characters understood to separate 
words.
+     * The first string character and the first non-delimiter character after a
+     * delimiter will be capitalized. </p>
+     *
+     * <p>A <code>null</code> input String returns <code>null</code>.
+     * Capitalization uses the Unicode title case, normally equivalent to
+     * upper case.</p>
+     *
+     * <pre>
+     * WordUtils.capitalizeFully(null, *)            = null
+     * WordUtils.capitalizeFully("", *)              = ""
+     * WordUtils.capitalizeFully(*, null)            = *
+     * WordUtils.capitalizeFully(*, new char[0])     = *
+     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
+     * </pre>
+     * 
+     * @param str  the String to capitalize, may be null
+     * @param delimiters  set of characters to determine capitalization, null 
means whitespace
+     * @return capitalized String, <code>null</code> if null String input
+     * @since 2.1
+     */
+    public static String capitalizeFully(String str, final char... delimiters) 
{
+        final int delimLen = delimiters == null ? -1 : delimiters.length;
+        if (str == null || str.length() == 0 || delimLen == 0) {
+            return str;
+        }
+        str = str.toLowerCase();
+        return capitalize(str, delimiters);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Uncapitalizes all the whitespace separated words in a String.
+     * Only the first character of each word is changed.</p>
+     *
+     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
+     * A <code>null</code> input String returns <code>null</code>.</p>
+     *
+     * <pre>
+     * WordUtils.uncapitalize(null)        = null
+     * WordUtils.uncapitalize("")          = ""
+     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
+     * </pre>
+     * 
+     * @param str  the String to uncapitalize, may be null
+     * @return uncapitalized String, <code>null</code> if null String input
+     * @see #capitalize(String)
+     */
+    public static String uncapitalize(final String str) {
+        return uncapitalize(str, null);
+    }
+
+    /**
+     * <p>Uncapitalizes all the whitespace separated words in a String.
+     * Only the first character of each word is changed.</p>
+     *
+     * <p>The delimiters represent a set of characters understood to separate 
words.
+     * The first string character and the first non-delimiter character after a
+     * delimiter will be uncapitalized. </p>
+     *
+     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
+     * A <code>null</code> input String returns <code>null</code>.</p>
+     *
+     * <pre>
+     * WordUtils.uncapitalize(null, *)            = null
+     * WordUtils.uncapitalize("", *)              = ""
+     * WordUtils.uncapitalize(*, null)            = *
+     * WordUtils.uncapitalize(*, new char[0])     = *
+     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
+     * </pre>
+     * 
+     * @param str  the String to uncapitalize, may be null
+     * @param delimiters  set of characters to determine uncapitalization, 
null means whitespace
+     * @return uncapitalized String, <code>null</code> if null String input
+     * @see #capitalize(String)
+     * @since 2.1
+     */
+    public static String uncapitalize(final String str, final char... 
delimiters) {
+        final int delimLen = delimiters == null ? -1 : delimiters.length;
+        if (str == null || str.length() == 0 || delimLen == 0) {
+            return str;
+        }
+        final char[] buffer = str.toCharArray();
+        boolean uncapitalizeNext = true;
+        for (int i = 0; i < buffer.length; i++) {
+            final char ch = buffer[i];
+            if (isDelimiter(ch, delimiters)) {
+                uncapitalizeNext = true;
+            } else if (uncapitalizeNext) {
+                buffer[i] = Character.toLowerCase(ch);
+                uncapitalizeNext = false;
+            }
+        }
+        return new String(buffer);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Swaps the case of a String using a word based algorithm.</p>
+     * 
+     * <ul>
+     *  <li>Upper case character converts to Lower case</li>
+     *  <li>Title case character converts to Lower case</li>
+     *  <li>Lower case character after Whitespace or at start converts to 
Title case</li>
+     *  <li>Other Lower case character converts to Upper case</li>
+     * </ul>
+     * 
+     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
+     * A <code>null</code> input String returns <code>null</code>.</p>
+     * 
+     * <pre>
+     * StringUtils.swapCase(null)                 = null
+     * StringUtils.swapCase("")                   = ""
+     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
+     * </pre>
+     * 
+     * @param str  the String to swap case, may be null
+     * @return the changed String, <code>null</code> if null String input
+     */
+    public static String swapCase(final String str) {
+        if (str == null || str.length() == 0) {
+            return str;
+        }
+        final char[] buffer = str.toCharArray();
+
+        boolean whitespace = true;
+
+        for (int i = 0; i < buffer.length; i++) {
+            final char ch = buffer[i];
+            if (Character.isUpperCase(ch)) {
+                buffer[i] = Character.toLowerCase(ch);
+                whitespace = false;
+            } else if (Character.isTitleCase(ch)) {
+                buffer[i] = Character.toLowerCase(ch);
+                whitespace = false;
+            } else if (Character.isLowerCase(ch)) {
+                if (whitespace) {
+                    buffer[i] = Character.toTitleCase(ch);
+                    whitespace = false;
+                } else {
+                    buffer[i] = Character.toUpperCase(ch);
+                }
+            } else {
+                whitespace = Character.isWhitespace(ch);
+            }
+        }
+        return new String(buffer);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Extracts the initial characters from each word in the String.</p>
+     * 
+     * <p>All first characters after whitespace are returned as a new string.
+     * Their case is not changed.</p>
+     *
+     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
+     * A <code>null</code> input String returns <code>null</code>.</p>
+     *
+     * <pre>
+     * WordUtils.initials(null)             = null
+     * WordUtils.initials("")               = ""
+     * WordUtils.initials("Ben John Lee")   = "BJL"
+     * WordUtils.initials("Ben J.Lee")      = "BJ"
+     * </pre>
+     *
+     * @param str  the String to get initials from, may be null
+     * @return String of initial letters, <code>null</code> if null String 
input
+     * @see #initials(String,char[])
+     * @since 2.2
+     */
+    public static String initials(final String str) {
+        return initials(str, null);
+    }
+
+    /**
+     * <p>Extracts the initial characters from each word in the String.</p>
+     * 
+     * <p>All first characters after the defined delimiters are returned as a 
new string.
+     * Their case is not changed.</p>
+     *
+     * <p>If the delimiters array is null, then Whitespace is used.
+     * Whitespace is defined by {@link Character#isWhitespace(char)}.
+     * A <code>null</code> input String returns <code>null</code>.
+     * An empty delimiter array returns an empty String.</p>
+     *
+     * <pre>
+     * WordUtils.initials(null, *)                = null
+     * WordUtils.initials("", *)                  = ""
+     * WordUtils.initials("Ben John Lee", null)   = "BJL"
+     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
+     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
+     * WordUtils.initials(*, new char[0])         = ""
+     * </pre>
+     * 
+     * @param str  the String to get initials from, may be null
+     * @param delimiters  set of characters to determine words, null means 
whitespace
+     * @return String of initial characters, <code>null</code> if null String 
input
+     * @see #initials(String)
+     * @since 2.2
+     */
+    public static String initials(final String str, final char... delimiters) {
+        if (str == null || str.length() == 0) {
+            return str;
+        }
+        if (delimiters != null && delimiters.length == 0) {
+            return "";
+        }
+        final int strLen = str.length();
+        final char[] buf = new char[strLen / 2 + 1];
+        int count = 0;
+        boolean lastWasGap = true;
+        for (int i = 0; i < strLen; i++) {
+            final char ch = str.charAt(i);
+
+            if (isDelimiter(ch, delimiters)) {
+                lastWasGap = true;
+            } else if (lastWasGap) {
+                buf[count++] = ch;
+                lastWasGap = false;
+            } else {
+                continue; // ignore ch
+            }
+        }
+        return new String(buf, 0, count);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Checks if the String contains all words in the given array.</p>
+     *
+     * <p>
+     * A {@code null} String will return {@code false}. A {@code null}, zero
+     * length search array or if one element of array is null will return 
{@code false}.
+     * </p>
+     *
+     * <pre>
+     * WordUtils.containsAllWords(null, *)            = false
+     * WordUtils.containsAllWords("", *)              = false
+     * WordUtils.containsAllWords(*, null)            = false
+     * WordUtils.containsAllWords(*, [])              = false
+     * WordUtils.containsAllWords("abcd", "ab", "cd") = false
+     * WordUtils.containsAllWords("abc def", "def", "abc") = true
+     * </pre>
+     *
+     *
+     * @param word The CharSequence to check, may be null
+     * @param words The array of String words to search for, may be null
+     * @return {@code true} if all search words are found, {@code false} 
otherwise
+     * @since 3.5
+     */
+    public static boolean containsAllWords(final CharSequence word, final 
CharSequence... words) {
+        if (word == null || word.length() == 0 || words == null || 
Array.getLength(words) == 0) {
+            return false;
+        }
+        for (final CharSequence w : words) {
+            if (w == null || w.length() == 0 || 
String.valueOf(w).trim().length() == 0 ) {
+                return false;
+            }
+            final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*");
+            if (!p.matcher(word).matches()) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Is the character a delimiter.
+     *
+     * @param ch  the character to check
+     * @param delimiters  the delimiters
+     * @return true if it is a delimiter
+     */
+    private static boolean isDelimiter(final char ch, final char[] delimiters) 
{
+        if (delimiters == null) {
+            return Character.isWhitespace(ch);
+        }
+        for (final char delimiter : delimiters) {
+            if (ch == delimiter) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java 
b/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
index d8b50e8..d209b3c 100644
--- a/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
+++ b/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
@@ -22,8 +22,6 @@ import java.io.Writer;
 /**
  * Executes a sequence of translators one after the other. Execution ends 
whenever 
  * the first translator consumes codepoints from the input.
- * 
- * @since 3.0
  */
 public class AggregateTranslator extends CharSequenceTranslator {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java 
b/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
index 16ec02f..0fed939 100644
--- 
a/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
+++ 
b/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
@@ -25,8 +25,6 @@ import java.util.Locale;
  * An API for translating text. 
  * Its core use is to escape and unescape text. Because escaping and 
unescaping 
  * is completely contextual, the API does not present two separate signatures.
- * 
- * @since 3.0
  */
 public abstract class CharSequenceTranslator {
 
@@ -114,8 +112,8 @@ public abstract class CharSequenceTranslator {
      * @param translators CharSequenceTranslator array of translators to merge 
with this one
      * @return CharSequenceTranslator merging this translator with the others
      */
-    public final org.apache.commons.text.translate.CharSequenceTranslator 
with(final org.apache.commons.text.translate.CharSequenceTranslator... 
translators) {
-        final org.apache.commons.text.translate.CharSequenceTranslator[] 
newArray = new 
org.apache.commons.text.translate.CharSequenceTranslator[translators.length + 
1];
+    public final CharSequenceTranslator with(final CharSequenceTranslator... 
translators) {
+        final CharSequenceTranslator[] newArray = new 
CharSequenceTranslator[translators.length + 1];
         newArray[0] = this;
         System.arraycopy(translators, 0, newArray, 1, translators.length);
         return new AggregateTranslator(newArray);

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java 
b/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
index cac3d8f..c63165c 100644
--- a/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
+++ b/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
@@ -22,8 +22,6 @@ import java.io.Writer;
 /**
  * Helper subclass to CharSequenceTranslator to allow for translations that 
  * will replace up to one character at a time.
- * 
- * @since 3.0
  */
 public abstract class CodePointTranslator extends CharSequenceTranslator {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/EntityArrays.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/translate/EntityArrays.java 
b/src/main/java/org/apache/commons/text/translate/EntityArrays.java
index 5c7c4e3..99626e6 100644
--- a/src/main/java/org/apache/commons/text/translate/EntityArrays.java
+++ b/src/main/java/org/apache/commons/text/translate/EntityArrays.java
@@ -20,8 +20,6 @@ package org.apache.commons.text.translate;
  * Class holding various entity data for HTML and XML - generally for use with
  * the LookupTranslator.
  * All arrays are of length [*][2].
- *
- * @since 3.0
  */
 public class EntityArrays {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java 
b/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
index a9a186e..8c5b2b6 100644
--- a/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
+++ b/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
@@ -18,8 +18,6 @@ package org.apache.commons.text.translate;
 
 /**
  * Translates codepoints to their Unicode escaped value suitable for Java 
source.
- * 
- * @since 3.2
  */
 public class JavaUnicodeEscaper extends UnicodeEscaper {
 
@@ -32,7 +30,7 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
      *            above which to escape
      * @return the newly created {@code UnicodeEscaper} instance
      */
-    public static org.apache.commons.text.translate.JavaUnicodeEscaper 
above(final int codepoint) {
+    public static JavaUnicodeEscaper above(final int codepoint) {
         return outsideOf(0, codepoint);
     }
 
@@ -45,7 +43,7 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
      *            below which to escape
      * @return the newly created {@code UnicodeEscaper} instance
      */
-    public static org.apache.commons.text.translate.JavaUnicodeEscaper 
below(final int codepoint) {
+    public static JavaUnicodeEscaper below(final int codepoint) {
         return outsideOf(codepoint, Integer.MAX_VALUE);
     }
 
@@ -60,8 +58,8 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
      *            below which to escape
      * @return the newly created {@code UnicodeEscaper} instance
      */
-    public static org.apache.commons.text.translate.JavaUnicodeEscaper 
between(final int codepointLow, final int codepointHigh) {
-        return new 
org.apache.commons.text.translate.JavaUnicodeEscaper(codepointLow, 
codepointHigh, true);
+    public static JavaUnicodeEscaper between(final int codepointLow, final int 
codepointHigh) {
+        return new JavaUnicodeEscaper(codepointLow, codepointHigh, true);
     }
 
     /**
@@ -75,8 +73,8 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
      *            above which to escape
      * @return the newly created {@code UnicodeEscaper} instance
      */
-    public static org.apache.commons.text.translate.JavaUnicodeEscaper 
outsideOf(final int codepointLow, final int codepointHigh) {
-        return new 
org.apache.commons.text.translate.JavaUnicodeEscaper(codepointLow, 
codepointHigh, false);
+    public static JavaUnicodeEscaper outsideOf(final int codepointLow, final 
int codepointHigh) {
+        return new JavaUnicodeEscaper(codepointLow, codepointHigh, false);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/LookupTranslator.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/LookupTranslator.java 
b/src/main/java/org/apache/commons/text/translate/LookupTranslator.java
index 614c86e..f73f312 100644
--- a/src/main/java/org/apache/commons/text/translate/LookupTranslator.java
+++ b/src/main/java/org/apache/commons/text/translate/LookupTranslator.java
@@ -23,8 +23,6 @@ import java.util.HashSet;
 
 /**
  * Translates a value using a lookup table.
- *
- * @since 3.0
  */
 public class LookupTranslator extends CharSequenceTranslator {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java 
b/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java
index a3bf24b..3e4bbd0 100644
--- a/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java
+++ b/src/main/java/org/apache/commons/text/translate/NumericEntityEscaper.java
@@ -21,8 +21,6 @@ import java.io.Writer;
 
 /**
  * Translates codepoints to their XML numeric entity escaped value.
- *
- * @since 3.0
  */
 public class NumericEntityEscaper extends CodePointTranslator {
 
@@ -59,7 +57,7 @@ public class NumericEntityEscaper extends CodePointTranslator 
{
      * @param codepoint below which to escape
      * @return the newly created {@code NumericEntityEscaper} instance
      */
-    public static org.apache.commons.text.translate.NumericEntityEscaper 
below(final int codepoint) {
+    public static NumericEntityEscaper below(final int codepoint) {
         return outsideOf(codepoint, Integer.MAX_VALUE);
     }
 
@@ -69,7 +67,7 @@ public class NumericEntityEscaper extends CodePointTranslator 
{
      * @param codepoint above which to escape
      * @return the newly created {@code NumericEntityEscaper} instance
      */
-    public static org.apache.commons.text.translate.NumericEntityEscaper 
above(final int codepoint) {
+    public static NumericEntityEscaper above(final int codepoint) {
         return outsideOf(0, codepoint);
     }
 
@@ -80,8 +78,8 @@ public class NumericEntityEscaper extends CodePointTranslator 
{
      * @param codepointHigh below which to escape
      * @return the newly created {@code NumericEntityEscaper} instance
      */
-    public static org.apache.commons.text.translate.NumericEntityEscaper 
between(final int codepointLow, final int codepointHigh) {
-        return new 
org.apache.commons.text.translate.NumericEntityEscaper(codepointLow, 
codepointHigh, true);
+    public static NumericEntityEscaper between(final int codepointLow, final 
int codepointHigh) {
+        return new NumericEntityEscaper(codepointLow, codepointHigh, true);
     }
 
     /**
@@ -91,8 +89,8 @@ public class NumericEntityEscaper extends CodePointTranslator 
{
      * @param codepointHigh above which to escape
      * @return the newly created {@code NumericEntityEscaper} instance
      */
-    public static org.apache.commons.text.translate.NumericEntityEscaper 
outsideOf(final int codepointLow, final int codepointHigh) {
-        return new 
org.apache.commons.text.translate.NumericEntityEscaper(codepointLow, 
codepointHigh, false);
+    public static NumericEntityEscaper outsideOf(final int codepointLow, final 
int codepointHigh) {
+        return new NumericEntityEscaper(codepointLow, codepointHigh, false);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java 
b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
index cd4605d..6e2016e 100644
--- 
a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
+++ 
b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java
@@ -26,8 +26,6 @@ import java.util.EnumSet;
  * the specific codepoint.
  *
  * Note that the semi-colon is optional.
- * 
- * @since 3.0
  */
 public class NumericEntityUnescaper extends CharSequenceTranslator {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/OctalUnescaper.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/OctalUnescaper.java 
b/src/main/java/org/apache/commons/text/translate/OctalUnescaper.java
index 5801348..f9c0c14 100644
--- a/src/main/java/org/apache/commons/text/translate/OctalUnescaper.java
+++ b/src/main/java/org/apache/commons/text/translate/OctalUnescaper.java
@@ -26,8 +26,6 @@ import java.io.Writer;
  *
  * Note that this currently only supports the viable range of octal for Java; 
namely 
  * 1 to 377. This is because parsing Java is the main use case.
- * 
- * @since 3.0
  */
 public class OctalUnescaper extends CharSequenceTranslator {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java 
b/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
index 398a0e9..0c90f6d 100644
--- a/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
+++ b/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
@@ -21,8 +21,6 @@ import java.io.Writer;
 
 /**
  * Translates codepoints to their Unicode escaped value.
- *
- * @since 3.0
  */
 public class UnicodeEscaper extends CodePointTranslator {
 
@@ -59,7 +57,7 @@ public class UnicodeEscaper extends CodePointTranslator {
      * @param codepoint below which to escape
      * @return the newly created {@code UnicodeEscaper} instance
      */
-    public static org.apache.commons.text.translate.UnicodeEscaper below(final 
int codepoint) {
+    public static UnicodeEscaper below(final int codepoint) {
         return outsideOf(codepoint, Integer.MAX_VALUE);
     }
 
@@ -69,7 +67,7 @@ public class UnicodeEscaper extends CodePointTranslator {
      * @param codepoint above which to escape
      * @return the newly created {@code UnicodeEscaper} instance
      */
-    public static org.apache.commons.text.translate.UnicodeEscaper above(final 
int codepoint) {
+    public static UnicodeEscaper above(final int codepoint) {
         return outsideOf(0, codepoint);
     }
 
@@ -80,8 +78,8 @@ public class UnicodeEscaper extends CodePointTranslator {
      * @param codepointHigh above which to escape
      * @return the newly created {@code UnicodeEscaper} instance
      */
-    public static org.apache.commons.text.translate.UnicodeEscaper 
outsideOf(final int codepointLow, final int codepointHigh) {
-        return new 
org.apache.commons.text.translate.UnicodeEscaper(codepointLow, codepointHigh, 
false);
+    public static UnicodeEscaper outsideOf(final int codepointLow, final int 
codepointHigh) {
+        return new UnicodeEscaper(codepointLow, codepointHigh, false);
     }
 
     /**
@@ -91,8 +89,8 @@ public class UnicodeEscaper extends CodePointTranslator {
      * @param codepointHigh below which to escape
      * @return the newly created {@code UnicodeEscaper} instance
      */
-    public static org.apache.commons.text.translate.UnicodeEscaper 
between(final int codepointLow, final int codepointHigh) {
-        return new 
org.apache.commons.text.translate.UnicodeEscaper(codepointLow, codepointHigh, 
true);
+    public static UnicodeEscaper between(final int codepointLow, final int 
codepointHigh) {
+        return new UnicodeEscaper(codepointLow, codepointHigh, true);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/UnicodeUnescaper.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/UnicodeUnescaper.java 
b/src/main/java/org/apache/commons/text/translate/UnicodeUnescaper.java
index b92f421..e8fda68 100644
--- a/src/main/java/org/apache/commons/text/translate/UnicodeUnescaper.java
+++ b/src/main/java/org/apache/commons/text/translate/UnicodeUnescaper.java
@@ -23,8 +23,6 @@ import java.io.Writer;
  * Translates escaped Unicode values of the form \\u+\d\d\d\d back to 
  * Unicode. It supports multiple 'u' characters and will work with or 
  * without the +.
- * 
- * @since 3.0
  */
 public class UnicodeUnescaper extends CharSequenceTranslator {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/main/java/org/apache/commons/text/translate/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/translate/package-info.java 
b/src/main/java/org/apache/commons/text/translate/package-info.java
index 722d7c9..3ef4d48 100644
--- a/src/main/java/org/apache/commons/text/translate/package-info.java
+++ b/src/main/java/org/apache/commons/text/translate/package-info.java
@@ -18,7 +18,5 @@
  * <p> An API for creating text translation routines from a set of smaller 
building blocks. Initially created to make it
  * possible for the user to customize the rules in the StringEscapeUtils 
class.</p>
  * <p>These classes are immutable, and therefore thread-safe.</p>
- *
- * @since 3.0
  */
 package org.apache.commons.text.translate;

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6f24aa45/src/test/java/org/apache/commons/text/CompositeFormatTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/CompositeFormatTest.java 
b/src/test/java/org/apache/commons/text/CompositeFormatTest.java
new file mode 100644
index 0000000..e9b8064
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/CompositeFormatTest.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.text;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+import java.text.FieldPosition;
+import java.text.Format;
+import java.text.ParsePosition;
+import java.text.SimpleDateFormat;
+import java.util.Locale;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.CompositeFormat}.
+ */
+public class CompositeFormatTest {
+
+    /**
+     * Ensures that the parse/format separation is correctly maintained. 
+     */
+    @Test
+    public void testCompositeFormat() {
+
+        final Format parser = new Format() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public StringBuffer format(final Object obj, final StringBuffer 
toAppendTo, final FieldPosition pos) {
+                throw new UnsupportedOperationException("Not implemented");
+            }
+
+            @Override
+            public Object parseObject(final String source, final ParsePosition 
pos) {
+                return null;    // do nothing
+            }
+        };
+
+        final Format formatter = new Format() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public StringBuffer format(final Object obj, final StringBuffer 
toAppendTo, final FieldPosition pos) {
+                return null;    // do nothing
+            }
+
+            @Override
+            public Object parseObject(final String source, final ParsePosition 
pos) {
+                throw new UnsupportedOperationException("Not implemented");
+            }
+        };
+
+        final CompositeFormat composite = new CompositeFormat(parser, 
formatter);
+
+        composite.parseObject("", null);
+        composite.format(new Object(), new StringBuffer(), null);
+        assertEquals( "Parser get method incorrectly implemented", parser, 
composite.getParser() );
+        assertEquals( "Formatter get method incorrectly implemented", 
formatter, composite.getFormatter() );
+    }
+
+    @Test
+    public void testUsage() throws Exception {
+        final Format f1 = new SimpleDateFormat("MMddyyyy", Locale.ENGLISH);
+        final Format f2 = new SimpleDateFormat("MMMM d, yyyy", Locale.ENGLISH);
+        final CompositeFormat c = new CompositeFormat(f1, f2);
+        final String testString = "January 3, 2005";
+        assertEquals(testString, c.format(c.parseObject("01032005")));
+        assertEquals(testString, c.reformat("01032005"));
+    }
+
+}

[07/14] [text] TEXT-23: Adding remainder of the o.a.c.l.text package

Reply via email to