[1/2] [text] TEXT-27: Adding StringEscapeUtils from commons-lang:3.5

chtompki Mon, 28 Nov 2016 13:58:47 -0800

Repository: commons-text
Updated Branches:
  refs/heads/master 10b97cb5f -> 43ba72325



TEXT-27: Adding StringEscapeUtils from commons-lang:3.5


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/d8f547e8
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/d8f547e8
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/d8f547e8

Branch: refs/heads/master
Commit: d8f547e818d815c0439f4b3c317d077a0290a5f5
Parents: 10b97cb
Author: Rob Tompkins <chtom...@gmail.com>
Authored: Mon Nov 28 15:02:18 2016 -0500
Committer: Rob Tompkins <chtom...@gmail.com>
Committed: Mon Nov 28 15:02:18 2016 -0500

----------------------------------------------------------------------
 pom.xml                                         |   6 +
 src/changes/changes.xml                         |   1 +
 .../apache/commons/text/StringEscapeUtils.java  | 811 +++++++++++++++++++
 .../commons/text/StringEscapeUtilsTest.java     | 621 ++++++++++++++
 4 files changed, 1439 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/d8f547e8/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0c3a10a..9113d23 100644
--- a/pom.xml
+++ b/pom.xml
@@ -101,6 +101,12 @@
       <version>1.4</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>2.5</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <distributionManagement>

http://git-wip-us.apache.org/repos/asf/commons-text/blob/d8f547e8/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index e60b364..1274672 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -22,6 +22,7 @@
   <body>
 
   <release version="1.0" date="tba" description="tba">
+    <action issue="TEXT-27" type="add" dev="chtompki">Move 
org.apache.commons.lang3.StringEscapeUtils.java into text</action>
     <action issue="TEXT-23" type="add" dev="chtompki">Moving from 
commons-lang, the package org.apache.commons.lang3.text</action>
     <action issue="TEXT-10" type="add" dev="kinow" due-to="Don Jeba">A more 
complex Levenshtein distance</action>
     <action issue="TEXT-24" type="add" dev="chtompki">Add coveralls and 
Travis.ci integration</action>

http://git-wip-us.apache.org/repos/asf/commons-text/blob/d8f547e8/src/main/java/org/apache/commons/text/StringEscapeUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StringEscapeUtils.java 
b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
new file mode 100644
index 0000000..6b88275
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
@@ -0,0 +1,811 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.apache.commons.lang3.CharUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import org.apache.commons.text.translate.AggregateTranslator;
+import org.apache.commons.text.translate.CharSequenceTranslator;
+import org.apache.commons.text.translate.EntityArrays;
+import org.apache.commons.text.translate.JavaUnicodeEscaper;
+import org.apache.commons.text.translate.LookupTranslator;
+import org.apache.commons.text.translate.NumericEntityEscaper;
+import org.apache.commons.text.translate.NumericEntityUnescaper;
+import org.apache.commons.text.translate.OctalUnescaper;
+import org.apache.commons.text.translate.UnicodeUnescaper;
+import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
+
+/**
+ * <p>Escapes and unescapes {@code String}s for
+ * Java, Java Script, HTML and XML.</p>
+ *
+ * <p>#ThreadSafe#</p>
+ *
+ *
+ * <p>
+ * This code has been adapted from Apache Commons Lang 3.5.
+ * </p>
+ */
+public class StringEscapeUtils {
+
+    /* ESCAPE TRANSLATORS */
+
+    /**
+     * Translator object for escaping Java. 
+     *
+     * While {@link #escapeJava(String)} is the expected method of use, this 
+     * object allows the Java escaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator ESCAPE_JAVA =
+            new LookupTranslator(
+                    new String[][] {
+                            {"\"", "\\\""},
+                            {"\\", "\\\\"},
+                    }).with(
+                    new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
+            ).with(
+                    JavaUnicodeEscaper.outsideOf(32, 0x7f)
+            );
+
+    /**
+     * Translator object for escaping EcmaScript/JavaScript. 
+     *
+     * While {@link #escapeEcmaScript(String)} is the expected method of use, 
this 
+     * object allows the EcmaScript escaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
+            new AggregateTranslator(
+                    new LookupTranslator(
+                            new String[][] {
+                                    {"'", "\\'"},
+                                    {"\"", "\\\""},
+                                    {"\\", "\\\\"},
+                                    {"/", "\\/"}
+                            }),
+                    new 
LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
+                    JavaUnicodeEscaper.outsideOf(32, 0x7f)
+            );
+
+    /**
+     * Translator object for escaping Json.
+     *
+     * While {@link #escapeJson(String)} is the expected method of use, this
+     * object allows the Json escaping functionality to be used
+     * as the foundation for a custom translator.
+     *
+     * @since 3.2
+     */
+    public static final CharSequenceTranslator ESCAPE_JSON =
+            new AggregateTranslator(
+                    new LookupTranslator(
+                            new String[][] {
+                                    {"\"", "\\\""},
+                                    {"\\", "\\\\"},
+                                    {"/", "\\/"}
+                            }),
+                    new 
LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
+                    JavaUnicodeEscaper.outsideOf(32, 0x7f)
+            );
+
+    /**
+     * Translator object for escaping XML.
+     *
+     * While {@link #escapeXml(String)} is the expected method of use, this 
+     * object allows the XML escaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
+     */
+    @Deprecated
+    public static final CharSequenceTranslator ESCAPE_XML =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
+                    new LookupTranslator(EntityArrays.APOS_ESCAPE())
+            );
+
+    /**
+     * Translator object for escaping XML 1.0.
+     *
+     * While {@link #escapeXml10(String)} is the expected method of use, this
+     * object allows the XML escaping functionality to be used
+     * as the foundation for a custom translator.
+     *
+     * @since 3.3
+     */
+    public static final CharSequenceTranslator ESCAPE_XML10 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
+                    new LookupTranslator(EntityArrays.APOS_ESCAPE()),
+                    new LookupTranslator(
+                            new String[][] {
+                                    { "\u0000", StringUtils.EMPTY },
+                                    { "\u0001", StringUtils.EMPTY },
+                                    { "\u0002", StringUtils.EMPTY },
+                                    { "\u0003", StringUtils.EMPTY },
+                                    { "\u0004", StringUtils.EMPTY },
+                                    { "\u0005", StringUtils.EMPTY },
+                                    { "\u0006", StringUtils.EMPTY },
+                                    { "\u0007", StringUtils.EMPTY },
+                                    { "\u0008", StringUtils.EMPTY },
+                                    { "\u000b", StringUtils.EMPTY },
+                                    { "\u000c", StringUtils.EMPTY },
+                                    { "\u000e", StringUtils.EMPTY },
+                                    { "\u000f", StringUtils.EMPTY },
+                                    { "\u0010", StringUtils.EMPTY },
+                                    { "\u0011", StringUtils.EMPTY },
+                                    { "\u0012", StringUtils.EMPTY },
+                                    { "\u0013", StringUtils.EMPTY },
+                                    { "\u0014", StringUtils.EMPTY },
+                                    { "\u0015", StringUtils.EMPTY },
+                                    { "\u0016", StringUtils.EMPTY },
+                                    { "\u0017", StringUtils.EMPTY },
+                                    { "\u0018", StringUtils.EMPTY },
+                                    { "\u0019", StringUtils.EMPTY },
+                                    { "\u001a", StringUtils.EMPTY },
+                                    { "\u001b", StringUtils.EMPTY },
+                                    { "\u001c", StringUtils.EMPTY },
+                                    { "\u001d", StringUtils.EMPTY },
+                                    { "\u001e", StringUtils.EMPTY },
+                                    { "\u001f", StringUtils.EMPTY },
+                                    { "\ufffe", StringUtils.EMPTY },
+                                    { "\uffff", StringUtils.EMPTY }
+                            }),
+                    NumericEntityEscaper.between(0x7f, 0x84),
+                    NumericEntityEscaper.between(0x86, 0x9f),
+                    new UnicodeUnpairedSurrogateRemover()
+            );
+
+    /**
+     * Translator object for escaping XML 1.1.
+     *
+     * While {@link #escapeXml11(String)} is the expected method of use, this
+     * object allows the XML escaping functionality to be used
+     * as the foundation for a custom translator.
+     *
+     * @since 3.3
+     */
+    public static final CharSequenceTranslator ESCAPE_XML11 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
+                    new LookupTranslator(EntityArrays.APOS_ESCAPE()),
+                    new LookupTranslator(
+                            new String[][] {
+                                    { "\u0000", StringUtils.EMPTY },
+                                    { "\u000b", "&#11;" },
+                                    { "\u000c", "&#12;" },
+                                    { "\ufffe", StringUtils.EMPTY },
+                                    { "\uffff", StringUtils.EMPTY }
+                            }),
+                    NumericEntityEscaper.between(0x1, 0x8),
+                    NumericEntityEscaper.between(0xe, 0x1f),
+                    NumericEntityEscaper.between(0x7f, 0x84),
+                    NumericEntityEscaper.between(0x86, 0x9f),
+                    new UnicodeUnpairedSurrogateRemover()
+            );
+
+    /**
+     * Translator object for escaping HTML version 3.0.
+     *
+     * While {@link #escapeHtml3(String)} is the expected method of use, this 
+     * object allows the HTML escaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator ESCAPE_HTML3 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
+                    new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
+            );
+
+    /**
+     * Translator object for escaping HTML version 4.0.
+     *
+     * While {@link #escapeHtml4(String)} is the expected method of use, this 
+     * object allows the HTML escaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator ESCAPE_HTML4 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
+                    new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
+                    new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
+            );
+
+    /**
+     * Translator object for escaping individual Comma Separated Values. 
+     *
+     * While {@link #escapeCsv(String)} is the expected method of use, this 
+     * object allows the CSV escaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
+
+    // TODO: Create a parent class - 'SinglePassTranslator' ?
+    //       It would handle the index checking + length returning, 
+    //       and could also have an optimization check method.
+    static class CsvEscaper extends CharSequenceTranslator {
+
+        private static final char CSV_DELIMITER = ',';
+        private static final char CSV_QUOTE = '"';
+        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
+        private static final char[] CSV_SEARCH_CHARS =
+                new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, 
CharUtils.LF};
+
+        @Override
+        public int translate(final CharSequence input, final int index, final 
Writer out) throws IOException {
+
+            if(index != 0) {
+                throw new IllegalStateException("CsvEscaper should never reach 
the [1] index");
+            }
+
+            if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
+                out.write(input.toString());
+            } else {
+                out.write(CSV_QUOTE);
+                out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, 
CSV_QUOTE_STR + CSV_QUOTE_STR));
+                out.write(CSV_QUOTE);
+            }
+            return Character.codePointCount(input, 0, input.length());
+        }
+    }
+
+    /* UNESCAPE TRANSLATORS */
+
+    /**
+     * Translator object for unescaping escaped Java. 
+     *
+     * While {@link #unescapeJava(String)} is the expected method of use, this 
+     * object allows the Java unescaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    // TODO: throw "illegal character: \92" as an Exception if a \ on the end 
of the Java (as per the compiler)?
+    public static final CharSequenceTranslator UNESCAPE_JAVA =
+            new AggregateTranslator(
+                    new OctalUnescaper(),     // .between('\1', '\377'),
+                    new UnicodeUnescaper(),
+                    new 
LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
+                    new LookupTranslator(
+                            new String[][] {
+                                    {"\\\\", "\\"},
+                                    {"\\\"", "\""},
+                                    {"\\'", "'"},
+                                    {"\\", ""}
+                            })
+            );
+
+    /**
+     * Translator object for unescaping escaped EcmaScript. 
+     *
+     * While {@link #unescapeEcmaScript(String)} is the expected method of 
use, this 
+     * object allows the EcmaScript unescaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = 
UNESCAPE_JAVA;
+
+    /**
+     * Translator object for unescaping escaped Json.
+     *
+     * While {@link #unescapeJson(String)} is the expected method of use, this
+     * object allows the Json unescaping functionality to be used
+     * as the foundation for a custom translator.
+     *
+     * @since 3.2
+     */
+    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
+
+    /**
+     * Translator object for unescaping escaped HTML 3.0. 
+     *
+     * While {@link #unescapeHtml3(String)} is the expected method of use, 
this 
+     * object allows the HTML unescaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator UNESCAPE_HTML3 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
+                    new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
+                    new NumericEntityUnescaper()
+            );
+
+    /**
+     * Translator object for unescaping escaped HTML 4.0. 
+     *
+     * While {@link #unescapeHtml4(String)} is the expected method of use, 
this 
+     * object allows the HTML unescaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator UNESCAPE_HTML4 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
+                    new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
+                    new 
LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
+                    new NumericEntityUnescaper()
+            );
+
+    /**
+     * Translator object for unescaping escaped XML.
+     *
+     * While {@link #unescapeXml(String)} is the expected method of use, this 
+     * object allows the XML unescaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator UNESCAPE_XML =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
+                    new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
+                    new NumericEntityUnescaper()
+            );
+
+    /**
+     * Translator object for unescaping escaped Comma Separated Value entries.
+     *
+     * While {@link #unescapeCsv(String)} is the expected method of use, this 
+     * object allows the CSV unescaping functionality to be used 
+     * as the foundation for a custom translator. 
+     *
+     * @since 3.0
+     */
+    public static final CharSequenceTranslator UNESCAPE_CSV = new 
CsvUnescaper();
+
+    static class CsvUnescaper extends CharSequenceTranslator {
+
+        private static final char CSV_DELIMITER = ',';
+        private static final char CSV_QUOTE = '"';
+        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
+        private static final char[] CSV_SEARCH_CHARS =
+                new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, 
CharUtils.LF};
+
+        @Override
+        public int translate(final CharSequence input, final int index, final 
Writer out) throws IOException {
+
+            if(index != 0) {
+                throw new IllegalStateException("CsvUnescaper should never 
reach the [1] index");
+            }
+
+            if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 
1) != CSV_QUOTE ) {
+                out.write(input.toString());
+                return Character.codePointCount(input, 0, input.length());
+            }
+
+            // strip quotes
+            final String quoteless = input.subSequence(1, input.length() - 
1).toString();
+
+            if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
+                // deal with escaped quotes; ie) ""
+                out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + 
CSV_QUOTE_STR, CSV_QUOTE_STR));
+            } else {
+                out.write(input.toString());
+            }
+            return Character.codePointCount(input, 0, input.length());
+        }
+    }
+
+    /* Helper functions */
+
+    /**
+     * <p>{@code StringEscapeUtils} instances should NOT be constructed in
+     * standard programming.</p>
+     *
+     * <p>Instead, the class should be used as:</p>
+     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
+     *
+     * <p>This constructor is public to permit tools that require a JavaBean
+     * instance to operate.</p>
+     */
+    public StringEscapeUtils() {
+        super();
+    }
+
+    // Java and JavaScript
+    
//--------------------------------------------------------------------------
+    /**
+     * <p>Escapes the characters in a {@code String} using Java String 
rules.</p>
+     *
+     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, 
ff, etc.) </p>
+     *
+     * <p>So a tab becomes the characters {@code '\\'} and
+     * {@code 't'}.</p>
+     *
+     * <p>The only difference between Java strings and JavaScript strings
+     * is that in JavaScript, a single quote and forward-slash (/) are 
escaped.</p>
+     *
+     * <p>Example:</p>
+     * <pre>
+     * input string: He didn't say, "Stop!"
+     * output string: He didn't say, \"Stop!\"
+     * </pre>
+     *
+     * @param input  String to escape values in, may be null
+     * @return String with escaped values, {@code null} if null string input
+     */
+    public static final String escapeJava(final String input) {
+        return ESCAPE_JAVA.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using EcmaScript String 
rules.</p>
+     * <p>Escapes any values it finds into their EcmaScript String form.
+     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, 
etc.) </p>
+     *
+     * <p>So a tab becomes the characters {@code '\\'} and
+     * {@code 't'}.</p>
+     *
+     * <p>The only difference between Java strings and EcmaScript strings
+     * is that in EcmaScript, a single quote and forward-slash (/) are 
escaped.</p>
+     *
+     * <p>Note that EcmaScript is best known by the JavaScript and 
ActionScript dialects. </p>
+     *
+     * <p>Example:</p>
+     * <pre>
+     * input string: He didn't say, "Stop!"
+     * output string: He didn\'t say, \"Stop!\"
+     * </pre>
+     *
+     * @param input  String to escape values in, may be null
+     * @return String with escaped values, {@code null} if null string input
+     *
+     * @since 3.0
+     */
+    public static final String escapeEcmaScript(final String input) {
+        return ESCAPE_ECMASCRIPT.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using Json String 
rules.</p>
+     * <p>Escapes any values it finds into their Json String form.
+     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, 
etc.) </p>
+     *
+     * <p>So a tab becomes the characters {@code '\\'} and
+     * {@code 't'}.</p>
+     *
+     * <p>The only difference between Java strings and Json strings
+     * is that in Json, forward-slash (/) is escaped.</p>
+     *
+     * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
+     *
+     * <p>Example:</p>
+     * <pre>
+     * input string: He didn't say, "Stop!"
+     * output string: He didn't say, \"Stop!\"
+     * </pre>
+     *
+     * @param input  String to escape values in, may be null
+     * @return String with escaped values, {@code null} if null string input
+     *
+     * @since 3.2
+     */
+    public static final String escapeJson(final String input) {
+        return ESCAPE_JSON.translate(input);
+    }
+
+    /**
+     * <p>Unescapes any Java literals found in the {@code String}.
+     * For example, it will turn a sequence of {@code '\'} and
+     * {@code 'n'} into a newline character, unless the {@code '\'}
+     * is preceded by another {@code '\'}.</p>
+     *
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string 
input
+     */
+    public static final String unescapeJava(final String input) {
+        return UNESCAPE_JAVA.translate(input);
+    }
+
+    /**
+     * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
+     *
+     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
+     * into a newline character, unless the {@code '\'} is preceded by another
+     * {@code '\'}.</p>
+     *
+     * @see #unescapeJava(String)
+     * @param input  the {@code String} to unescape, may be null
+     * @return A new unescaped {@code String}, {@code null} if null string 
input
+     *
+     * @since 3.0
+     */
+    public static final String unescapeEcmaScript(final String input) {
+        return UNESCAPE_ECMASCRIPT.translate(input);
+    }
+
+    /**
+     * <p>Unescapes any Json literals found in the {@code String}.</p>
+     *
+     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
+     * into a newline character, unless the {@code '\'} is preceded by another
+     * {@code '\'}.</p>
+     *
+     * @see #unescapeJava(String)
+     * @param input  the {@code String} to unescape, may be null
+     * @return A new unescaped {@code String}, {@code null} if null string 
input
+     *
+     * @since 3.2
+     */
+    public static final String unescapeJson(final String input) {
+        return UNESCAPE_JSON.translate(input);
+    }
+
+    // HTML and XML
+    
//--------------------------------------------------------------------------
+    /**
+     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
+     *
+     * <p>
+     * For example:
+     * </p> 
+     * <p><code>"bread" &amp; "butter"</code></p>
+     * becomes:
+     * <p>
+     * <code>&amp;quot;bread&amp;quot; &amp;amp; 
&amp;quot;butter&amp;quot;</code>.
+     * </p>
+     *
+     * <p>Supports all known HTML 4.0 entities, including funky accents.
+     * Note that the commonly used apostrophe escape character (&amp;apos;)
+     * is not a legal entity and so is not supported). </p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     *
+     * @see <a 
href="http://hotwired.lycos.com/webmonkey/reference/special_characters/";>ISO 
Entities</a>
+     * @see <a href="http://www.w3.org/TR/REC-html32#latin1";>HTML 3.2 
Character Entities for ISO Latin-1</a>
+     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html";>HTML 
4.0 Character entity references</a>
+     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3";>HTML 
4.01 Character References</a>
+     * @see <a 
href="http://www.w3.org/TR/html401/charset.html#code-position";>HTML 4.01 Code 
positions</a>
+     *
+     * @since 3.0
+     */
+    public static final String escapeHtml4(final String input) {
+        return ESCAPE_HTML4.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
+     * <p>Supports only the HTML 3.0 entities. </p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     *
+     * @since 3.0
+     */
+    public static final String escapeHtml3(final String input) {
+        return ESCAPE_HTML3.translate(input);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Unescapes a string containing entity escapes to a string
+     * containing the actual Unicode characters corresponding to the
+     * escapes. Supports HTML 4.0 entities.</p>
+     *
+     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
+     * will become {@code "<Franï¿½ais>"}</p>
+     *
+     * <p>If an entity is unrecognized, it is left alone, and inserted
+     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
+     * become {@code ">&zzzz;x"}.</p>
+     *
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string 
input
+     *
+     * @since 3.0
+     */
+    public static final String unescapeHtml4(final String input) {
+        return UNESCAPE_HTML4.translate(input);
+    }
+
+    /**
+     * <p>Unescapes a string containing entity escapes to a string
+     * containing the actual Unicode characters corresponding to the
+     * escapes. Supports only HTML 3.0 entities.</p>
+     *
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string 
input
+     *
+     * @since 3.0
+     */
+    public static final String unescapeHtml3(final String input) {
+        return UNESCAPE_HTML3.translate(input);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Escapes the characters in a {@code String} using XML entities.</p>
+     *
+     * <p>For example: {@code "bread" & "butter"} =&gt;
+     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
+     * </p>
+     *
+     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
+     * Does not support DTDs or external entities.</p>
+     *
+     * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no 
longer 
+     *    escaped. If you still wish this functionality, you can achieve it 
+     *    via the following: 
+     * {@code StringEscapeUtils.ESCAPE_XML.with( 
NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     * @see #unescapeXml(java.lang.String)
+     * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link 
#escapeXml11(java.lang.String)} instead.
+     */
+    @Deprecated
+    public static final String escapeXml(final String input) {
+        return ESCAPE_XML.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using XML entities.</p>
+     *
+     * <p>For example: {@code "bread" & "butter"} =&gt;
+     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
+     * </p>
+     *
+     * <p>Note that XML 1.0 is a text-only format: it cannot represent control
+     * characters or unpaired Unicode surrogate codepoints, even after 
escaping.
+     * {@code escapeXml10} will remove characters that do not fit in the
+     * following ranges:</p>
+     *
+     * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF]}</p>
+     *
+     * <p>Though not strictly necessary, {@code escapeXml10} will escape
+     * characters in the following ranges:</p>
+     *
+     * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
+     *
+     * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
+     * document. If you want to allow more non-text characters in an XML 1.1
+     * document, use {@link #escapeXml11(String)}.</p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     * @see #unescapeXml(java.lang.String)
+     * @since 3.3
+     */
+    public static String escapeXml10(final String input) {
+        return ESCAPE_XML10.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using XML entities.</p>
+     *
+     * <p>For example: {@code "bread" & "butter"} =&gt;
+     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
+     * </p>
+     *
+     * <p>XML 1.1 can represent certain control characters, but it cannot 
represent
+     * the null byte or unpaired Unicode surrogate codepoints, even after 
escaping.
+     * {@code escapeXml11} will remove characters that do not fit in the 
following
+     * ranges:</p>
+     *
+     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
+     *
+     * <p>{@code escapeXml11} will escape characters in the following 
ranges:</p>
+     *
+     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | 
[#x86-#x9F]}</p>
+     *
+     * <p>The returned string can be inserted into a valid XML 1.1 document. 
Do not
+     * use it for XML 1.0 documents.</p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     * @see #unescapeXml(java.lang.String)
+     * @since 3.3
+     */
+    public static String escapeXml11(final String input) {
+        return ESCAPE_XML11.translate(input);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Unescapes a string containing XML entity escapes to a string
+     * containing the actual Unicode characters corresponding to the
+     * escapes.</p>
+     *
+     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
+     * Does not support DTDs or external entities.</p>
+     *
+     * <p>Note that numerical \\u Unicode codes are unescaped to their 
respective 
+     *    Unicode characters. This may change in future releases. </p>
+     *
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string 
input
+     * @see #escapeXml(String)
+     * @see #escapeXml10(String)
+     * @see #escapeXml11(String)
+     */
+    public static final String unescapeXml(final String input) {
+        return UNESCAPE_XML.translate(input);
+    }
+
+    //-----------------------------------------------------------------------
+
+    /**
+     * <p>Returns a {@code String} value for a CSV column enclosed in double 
quotes,
+     * if required.</p>
+     *
+     * <p>If the value contains a comma, newline or double quote, then the
+     *    String value is returned enclosed in double quotes.</p>
+     *
+     * <p>Any double quote characters in the value are escaped with another 
double quote.</p>
+     *
+     * <p>If the value does not contain a comma, newline or double quote, then 
the
+     *    String value is returned unchanged.</p>
+     *
+     * see <a 
href="http://en.wikipedia.org/wiki/Comma-separated_values";>Wikipedia</a> and
+     * <a href="http://tools.ietf.org/html/rfc4180";>RFC 4180</a>.
+     *
+     * @param input the input CSV column String, may be null
+     * @return the input String, enclosed in double quotes if the value 
contains a comma,
+     * newline or double quote, {@code null} if null string input
+     * @since 2.4
+     */
+    public static final String escapeCsv(final String input) {
+        return ESCAPE_CSV.translate(input);
+    }
+
+    /**
+     * <p>Returns a {@code String} value for an unescaped CSV column. </p>
+     *
+     * <p>If the value is enclosed in double quotes, and contains a comma, 
newline 
+     *    or double quote, then quotes are removed. 
+     * </p>
+     *
+     * <p>Any double quote escaped characters (a pair of double quotes) are 
unescaped 
+     *    to just one double quote. </p>
+     *
+     * <p>If the value is not enclosed in double quotes, or is and does not 
contain a 
+     *    comma, newline or double quote, then the String value is returned 
unchanged.</p>
+     *
+     * see <a 
href="http://en.wikipedia.org/wiki/Comma-separated_values";>Wikipedia</a> and
+     * <a href="http://tools.ietf.org/html/rfc4180";>RFC 4180</a>.
+     *
+     * @param input the input CSV column String, may be null
+     * @return the input String, with enclosing double quotes removed and 
embedded double 
+     * quotes unescaped, {@code null} if null string input
+     * @since 2.4
+     */
+    public static final String unescapeCsv(final String input) {
+        return UNESCAPE_CSV.translate(input);
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/commons-text/blob/d8f547e8/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java 
b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
new file mode 100644
index 0000000..c86f769
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java
@@ -0,0 +1,621 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Modifier;
+import java.nio.charset.Charset;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.text.translate.CharSequenceTranslator;
+import org.apache.commons.text.translate.NumericEntityEscaper;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link StringEscapeUtils}.
+ *
+ * <p>
+ * This code has been adapted from Apache Commons Lang 3.5.
+ * </p>
+ *
+ */
+public class StringEscapeUtilsTest {
+    private final static String FOO = "foo";
+
+    @Test
+    public void testConstructor() {
+        assertNotNull(new StringEscapeUtils());
+        final Constructor<?>[] cons = 
StringEscapeUtils.class.getDeclaredConstructors();
+        assertEquals(1, cons.length);
+        assertTrue(Modifier.isPublic(cons[0].getModifiers()));
+        assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers()));
+        assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers()));
+    }
+
+    @Test
+    public void testEscapeJava() throws IOException {
+        assertEquals(null, StringEscapeUtils.escapeJava(null));
+        try {
+            StringEscapeUtils.ESCAPE_JAVA.translate(null, null);
+            fail();
+        } catch (final IOException ex) {
+            fail();
+        } catch (final IllegalArgumentException ex) {
+        }
+        try {
+            StringEscapeUtils.ESCAPE_JAVA.translate("", null);
+            fail();
+        } catch (final IOException ex) {
+            fail();
+        } catch (final IllegalArgumentException ex) {
+        }
+
+        assertEscapeJava("empty string", "", "");
+        assertEscapeJava(FOO, FOO);
+        assertEscapeJava("tab", "\\t", "\t");
+        assertEscapeJava("backslash", "\\\\", "\\");
+        assertEscapeJava("single quote should not be escaped", "'", "'");
+        assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r");
+        assertEscapeJava("\\u1234", "\u1234");
+        assertEscapeJava("\\u0234", "\u0234");
+        assertEscapeJava("\\u00EF", "\u00ef");
+        assertEscapeJava("\\u0001", "\u0001");
+        assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", 
"\uabcd");
+
+        assertEscapeJava("He didn't say, \\\"stop!\\\"",
+                "He didn't say, \"stop!\"");
+        assertEscapeJava("non-breaking space", "This space is non-breaking:" + 
"\\u00A0",
+                "This space is non-breaking:\u00a0");
+        assertEscapeJava("\\uABCD\\u1234\\u012C",
+                "\uABCD\u1234\u012C");
+    }
+
+    /**
+     * Tests https://issues.apache.org/jira/browse/LANG-421
+     */
+    @Test
+    public void testEscapeJavaWithSlash() {
+        final String input = "String with a slash (/) in it";
+
+        final String expected = input;
+        final String actual = StringEscapeUtils.escapeJava(input);
+
+        /**
+         * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, 
which are not a valid character to escape
+         * in a Java string.
+         */
+        assertEquals(expected, actual);
+    }
+
+    private void assertEscapeJava(final String escaped, final String original) 
throws IOException {
+        assertEscapeJava(null, escaped, original);
+    }
+
+    private void assertEscapeJava(String message, final String expected, final 
String original) throws IOException {
+        final String converted = StringEscapeUtils.escapeJava(original);
+        message = "escapeJava(String) failed" + (message == null ? "" : (": " 
+ message));
+        assertEquals(message, expected, converted);
+
+        final StringWriter writer = new StringWriter();
+        StringEscapeUtils.ESCAPE_JAVA.translate(original, writer);
+        assertEquals(expected, writer.toString());
+    }
+
+    @Test
+    public void testUnescapeJava() throws IOException {
+        assertEquals(null, StringEscapeUtils.unescapeJava(null));
+        try {
+            StringEscapeUtils.UNESCAPE_JAVA.translate(null, null);
+            fail();
+        } catch (final IOException ex) {
+            fail();
+        } catch (final IllegalArgumentException ex) {
+        }
+        try {
+            StringEscapeUtils.UNESCAPE_JAVA.translate("", null);
+            fail();
+        } catch (final IOException ex) {
+            fail();
+        } catch (final IllegalArgumentException ex) {
+        }
+        try {
+            StringEscapeUtils.unescapeJava("\\u02-3");
+            fail();
+        } catch (final RuntimeException ex) {
+        }
+
+        assertUnescapeJava("", "");
+        assertUnescapeJava("test", "test");
+        assertUnescapeJava("\ntest\b", "\\ntest\\b");
+        assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b");
+        assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r");
+        assertUnescapeJava("", "\\");
+        //foo
+        assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx");
+        assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx");
+        assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd");
+    }
+
+    private void assertUnescapeJava(final String unescaped, final String 
original) throws IOException {
+        assertUnescapeJava(null, unescaped, original);
+    }
+
+    private void assertUnescapeJava(final String message, final String 
unescaped, final String original) throws IOException {
+        final String expected = unescaped;
+        final String actual = StringEscapeUtils.unescapeJava(original);
+
+        assertEquals("unescape(String) failed" +
+                        (message == null ? "" : (": " + message)) +
+                        ": expected '" + 
StringEscapeUtils.escapeJava(expected) +
+                        // we escape this so we can see it in the error message
+                        "' actual '" + StringEscapeUtils.escapeJava(actual) + 
"'",
+                expected, actual);
+
+        final StringWriter writer = new StringWriter();
+        StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer);
+        assertEquals(unescaped, writer.toString());
+
+    }
+
+    @Test
+    public void testEscapeEcmaScript() {
+        assertEquals(null, StringEscapeUtils.escapeEcmaScript(null));
+        try {
+            StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null);
+            fail();
+        } catch (final IOException ex) {
+            fail();
+        } catch (final IllegalArgumentException ex) {
+        }
+        try {
+            StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null);
+            fail();
+        } catch (final IOException ex) {
+            fail();
+        } catch (final IllegalArgumentException ex) {
+        }
+
+        assertEquals("He didn\\'t say, \\\"stop!\\\"", 
StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\""));
+        assertEquals("document.getElementById(\\\"test\\\").value = 
\\'<script>alert(\\'aaa\\');<\\/script>\\';",
+                
StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = 
'<script>alert('aaa');</script>';"));
+    }
+
+
+    // HTML and XML
+    //--------------------------------------------------------------
+
+    private static final String[][] HTML_ESCAPES = {
+            {"no escaping", "plain text", "plain text"},
+            {"no escaping", "plain text", "plain text"},
+            {"empty string", "", ""},
+            {"null", null, null},
+            {"ampersand", "bread &amp; butter", "bread & butter"},
+            {"quotes", "&quot;bread&quot; &amp; butter", "\"bread\" & butter"},
+            {"final character only", "greater than &gt;", "greater than >"},
+            {"first character only", "&lt; less than", "< less than"},
+            {"apostrophe", "Huntington's chorea", "Huntington's chorea"},
+            {"languages", "English,Fran&ccedil;ais,\u65E5\u672C\u8A9E 
(nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
+            {"8-bit ascii shouldn't number-escape", "\u0080\u009F", 
"\u0080\u009F"},
+    };
+
+    @Test
+    public void testEscapeHtml() {
+        for (final String[] element : HTML_ESCAPES) {
+            final String message = element[0];
+            final String expected = element[1];
+            final String original = element[2];
+            assertEquals(message, expected, 
StringEscapeUtils.escapeHtml4(original));
+            final StringWriter sw = new StringWriter();
+            try {
+                StringEscapeUtils.ESCAPE_HTML4.translate(original, sw);
+            } catch (final IOException e) {
+            }
+            final String actual = original == null ? null : sw.toString();
+            assertEquals(message, expected, actual);
+        }
+    }
+
+    @Test
+    public void testUnescapeHtml4() {
+        for (final String[] element : HTML_ESCAPES) {
+            final String message = element[0];
+            final String expected = element[2];
+            final String original = element[1];
+            assertEquals(message, expected, 
StringEscapeUtils.unescapeHtml4(original));
+
+            final StringWriter sw = new StringWriter();
+            try {
+                StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw);
+            } catch (final IOException e) {
+            }
+            final String actual = original == null ? null : sw.toString();
+            assertEquals(message, expected, actual);
+        }
+        // \u00E7 is a cedilla (c with wiggle under)
+        // note that the test string must be 7-bit-clean (Unicode escaped) or 
else it will compile incorrectly
+        // on some locales        
+        assertEquals("funny chars pass through OK", "Fran\u00E7ais", 
StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"));
+
+        assertEquals("Hello&;World", 
StringEscapeUtils.unescapeHtml4("Hello&;World"));
+        assertEquals("Hello&#;World", 
StringEscapeUtils.unescapeHtml4("Hello&#;World"));
+        assertEquals("Hello&# ;World", 
StringEscapeUtils.unescapeHtml4("Hello&# ;World"));
+        assertEquals("Hello&##;World", 
StringEscapeUtils.unescapeHtml4("Hello&##;World"));
+    }
+
+    @Test
+    public void testUnescapeHexCharsHtml() {
+        // Simple easy to grok test 
+        assertEquals("hex number unescape", "\u0080\u009F", 
StringEscapeUtils.unescapeHtml4("&#x80;&#x9F;"));
+        assertEquals("hex number unescape", "\u0080\u009F", 
StringEscapeUtils.unescapeHtml4("&#X80;&#X9F;"));
+        // Test all Character values:
+        for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) {
+            final Character c1 = new Character(i);
+            final Character c2 = new Character((char)(i+1));
+            final String expected = c1.toString() + c2.toString();
+            final String escapedC1 = "&#x" + 
Integer.toHexString((c1.charValue())) + ";";
+            final String escapedC2 = "&#x" + 
Integer.toHexString((c2.charValue())) + ";";
+            assertEquals("hex number unescape index " + (int)i, expected, 
StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2));
+        }
+    }
+
+    @Test
+    public void testUnescapeUnknownEntity() throws Exception {
+        assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;"));
+    }
+
+    @Test
+    public void testEscapeHtmlVersions() throws Exception {
+        assertEquals("&Beta;", StringEscapeUtils.escapeHtml4("\u0392"));
+        assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("&Beta;"));
+
+        // TODO: refine API for escaping/unescaping specific HTML versions
+    }
+
+    @Test
+    @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by 
ESCAPE_XML10 and ESCAPE_XML11 in 3.3
+    public void testEscapeXml() throws Exception {
+        assertEquals("&lt;abc&gt;", StringEscapeUtils.escapeXml("<abc>"));
+        assertEquals("<abc>", StringEscapeUtils.unescapeXml("&lt;abc&gt;"));
+
+        assertEquals("XML should not escape >0x7f values",
+                "\u00A1", StringEscapeUtils.escapeXml("\u00A1"));
+        assertEquals("XML should be able to unescape >0x7f values",
+                "\u00A0", StringEscapeUtils.unescapeXml("&#160;"));
+        assertEquals("XML should be able to unescape >0x7f values with one 
leading 0",
+                "\u00A0", StringEscapeUtils.unescapeXml("&#0160;"));
+        assertEquals("XML should be able to unescape >0x7f values with two 
leading 0s",
+                "\u00A0", StringEscapeUtils.unescapeXml("&#00160;"));
+        assertEquals("XML should be able to unescape >0x7f values with three 
leading 0s",
+                "\u00A0", StringEscapeUtils.unescapeXml("&#000160;"));
+
+        assertEquals("ain't", StringEscapeUtils.unescapeXml("ain&apos;t"));
+        assertEquals("ain&apos;t", StringEscapeUtils.escapeXml("ain't"));
+        assertEquals("", StringEscapeUtils.escapeXml(""));
+        assertEquals(null, StringEscapeUtils.escapeXml(null));
+        assertEquals(null, StringEscapeUtils.unescapeXml(null));
+
+        StringWriter sw = new StringWriter();
+        try {
+            StringEscapeUtils.ESCAPE_XML.translate("<abc>", sw);
+        } catch (final IOException e) {
+        }
+        assertEquals("XML was escaped incorrectly", "&lt;abc&gt;", 
sw.toString() );
+
+        sw = new StringWriter();
+        try {
+            StringEscapeUtils.UNESCAPE_XML.translate("&lt;abc&gt;", sw);
+        } catch (final IOException e) {
+        }
+        assertEquals("XML was unescaped incorrectly", "<abc>", sw.toString() );
+    }
+
+    @Test
+    public void testEscapeXml10() throws Exception {
+        assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", 
StringEscapeUtils.escapeXml10("a<b>c\"d'e&f"));
+        assertEquals("XML 1.0 should not escape \t \n \r",
+                "a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd"));
+        assertEquals("XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19",
+                "ab", 
StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb"));
+        assertEquals("XML 1.0 should omit #xd800-#xdfff",
+                "a\ud7ff  \ue000b", 
StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b"));
+        assertEquals("XML 1.0 should omit #xfffe | #xffff",
+                "a\ufffdb", 
StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb"));
+        assertEquals("XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 
1.1 compatibility",
+                "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", 
StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
+    }
+
+    @Test
+    public void testEscapeXml11() throws Exception {
+        assertEquals("a&lt;b&gt;c&quot;d&apos;e&amp;f", 
StringEscapeUtils.escapeXml11("a<b>c\"d'e&f"));
+        assertEquals("XML 1.1 should not escape \t \n \r",
+                "a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd"));
+        assertEquals("XML 1.1 should omit #x0",
+                "ab", StringEscapeUtils.escapeXml11("a\u0000b"));
+        assertEquals("XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19",
+                "a&#1;&#8;&#11;&#12;&#14;&#31;b", 
StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb"));
+        assertEquals("XML 1.1 should escape #x7F-#x84 | #x86-#x9F",
+                "a\u007e&#127;&#132;\u0085&#134;&#159;\u00a0b", 
StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b"));
+        assertEquals("XML 1.1 should omit #xd800-#xdfff",
+                "a\ud7ff  \ue000b", 
StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b"));
+        assertEquals("XML 1.1 should omit #xfffe | #xffff",
+                "a\ufffdb", 
StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb"));
+    }
+
+    /**
+     * Tests Supplementary characters. 
+     * <p>
+     * From http://www.w3.org/International/questions/qa-escapes
+     * </p>
+     * <blockquote>
+     * Supplementary characters are those Unicode characters that have code 
points higher than the characters in
+     * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character 
is encoded using two 16-bit surrogate code points from the
+     * BMP. Because of this, some people think that supplementary characters 
need to be represented using two escapes, but this is incorrect
+     * - you must use the single, code point value for that character. For 
example, use &amp;&#35;x233B4&#59; rather than
+     * &amp;&#35;xD84C&#59;&amp;&#35;xDFB4&#59;.
+     * </blockquote>
+     * @see <a 
href="http://www.w3.org/International/questions/qa-escapes";>Using character 
escapes in markup and CSS</a>
+     * @see <a 
href="https://issues.apache.org/jira/browse/LANG-728";>LANG-728</a>
+     */
+    @Test
+    @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by 
ESCAPE_XML10 and ESCAPE_XML11 in 3.3
+    public void testEscapeXmlSupplementaryCharacters() {
+        final CharSequenceTranslator escapeXml =
+                StringEscapeUtils.ESCAPE_XML.with( 
NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );
+
+        assertEquals("Supplementary character must be represented using a 
single escape", "&#144308;",
+                escapeXml.translate("\uD84C\uDFB4"));
+
+        assertEquals("Supplementary characters mixed with basic characters 
should be encoded correctly", "a b c &#144308;",
+                escapeXml.translate("a b c \uD84C\uDFB4"));
+    }
+
+    @Test
+    @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by 
ESCAPE_XML10 and ESCAPE_XML11 in 3.3
+    public void testEscapeXmlAllCharacters() {
+        // http://www.w3.org/TR/xml/#charsets says:
+        // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF] /* any Unicode character,
+        // excluding the surrogate blocks, FFFE, and FFFF. */
+        final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML
+                .with(NumericEntityEscaper.below(9), 
NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19),
+                        NumericEntityEscaper.between(0xD800, 0xDFFF), 
NumericEntityEscaper.between(0xFFFE, 0xFFFF), 
NumericEntityEscaper.above(0x110000));
+
+        assertEquals("&#0;&#1;&#2;&#3;&#4;&#5;&#6;&#7;&#8;", 
escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008"));
+        assertEquals("\t", escapeXml.translate("\t")); // 0x9
+        assertEquals("\n", escapeXml.translate("\n")); // 0xA
+        assertEquals("&#11;&#12;", escapeXml.translate("\u000B\u000C"));
+        assertEquals("\r", escapeXml.translate("\r")); // 0xD
+        assertEquals("Hello World! Ain&apos;t this great?", 
escapeXml.translate("Hello World! Ain't this great?"));
+        assertEquals("&#14;&#15;&#24;&#25;", 
escapeXml.translate("\u000E\u000F\u0018\u0019"));
+    }
+
+    /**
+     * Reverse of the above.
+     *
+     * @see <a 
href="https://issues.apache.org/jira/browse/LANG-729";>LANG-729</a>
+     */
+    @Test
+    public void testUnescapeXmlSupplementaryCharacters() {
+        assertEquals("Supplementary character must be represented using a 
single escape", "\uD84C\uDFB4",
+                StringEscapeUtils.unescapeXml("&#144308;") );
+
+        assertEquals("Supplementary characters mixed with basic characters 
should be decoded correctly", "a b c \uD84C\uDFB4",
+                StringEscapeUtils.unescapeXml("a b c &#144308;") );
+    }
+
+    // Tests issue #38569
+    // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569
+    @Test
+    public void testStandaloneAmphersand() {
+        assertEquals("<P&O>", StringEscapeUtils.unescapeHtml4("&lt;P&O&gt;"));
+        assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & 
&lt;"));
+        assertEquals("<P&O>", StringEscapeUtils.unescapeXml("&lt;P&O&gt;"));
+        assertEquals("test & <", StringEscapeUtils.unescapeXml("test & &lt;"));
+    }
+
+    @Test
+    public void testLang313() {
+        assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &amp;"));
+    }
+
+    @Test
+    public void testEscapeCsvString() throws Exception {
+        assertEquals("foo.bar",            
StringEscapeUtils.escapeCsv("foo.bar"));
+        assertEquals("\"foo,bar\"",        
StringEscapeUtils.escapeCsv("foo,bar"));
+        assertEquals("\"foo\nbar\"",       
StringEscapeUtils.escapeCsv("foo\nbar"));
+        assertEquals("\"foo\rbar\"",       
StringEscapeUtils.escapeCsv("foo\rbar"));
+        assertEquals("\"foo\"\"bar\"",     
StringEscapeUtils.escapeCsv("foo\"bar"));
+        assertEquals("foo\uD84C\uDFB4bar", 
StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
+        assertEquals("",   StringEscapeUtils.escapeCsv(""));
+        assertEquals(null, StringEscapeUtils.escapeCsv(null));
+    }
+
+    @Test
+    public void testEscapeCsvWriter() throws Exception {
+        checkCsvEscapeWriter("foo.bar",            "foo.bar");
+        checkCsvEscapeWriter("\"foo,bar\"",        "foo,bar");
+        checkCsvEscapeWriter("\"foo\nbar\"",       "foo\nbar");
+        checkCsvEscapeWriter("\"foo\rbar\"",       "foo\rbar");
+        checkCsvEscapeWriter("\"foo\"\"bar\"",     "foo\"bar");
+        checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
+        checkCsvEscapeWriter("", null);
+        checkCsvEscapeWriter("", "");
+    }
+
+    private void checkCsvEscapeWriter(final String expected, final String 
value) {
+        try {
+            final StringWriter writer = new StringWriter();
+            StringEscapeUtils.ESCAPE_CSV.translate(value, writer);
+            assertEquals(expected, writer.toString());
+        } catch (final IOException e) {
+            fail("Threw: " + e);
+        }
+    }
+
+    @Test
+    public void testUnescapeCsvString() throws Exception {
+        assertEquals("foo.bar",              
StringEscapeUtils.unescapeCsv("foo.bar"));
+        assertEquals("foo,bar",              
StringEscapeUtils.unescapeCsv("\"foo,bar\""));
+        assertEquals("foo\nbar",             
StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
+        assertEquals("foo\rbar",             
StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
+        assertEquals("foo\"bar",             
StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
+        assertEquals("foo\uD84C\uDFB4bar",   
StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
+        assertEquals("",   StringEscapeUtils.unescapeCsv(""));
+        assertEquals(null, StringEscapeUtils.unescapeCsv(null));
+
+        assertEquals("\"foo.bar\"",          
StringEscapeUtils.unescapeCsv("\"foo.bar\""));
+    }
+
+    @Test
+    public void testUnescapeCsvWriter() throws Exception {
+        checkCsvUnescapeWriter("foo.bar",            "foo.bar");
+        checkCsvUnescapeWriter("foo,bar",            "\"foo,bar\"");
+        checkCsvUnescapeWriter("foo\nbar",           "\"foo\nbar\"");
+        checkCsvUnescapeWriter("foo\rbar",           "\"foo\rbar\"");
+        checkCsvUnescapeWriter("foo\"bar",           "\"foo\"\"bar\"");
+        checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
+        checkCsvUnescapeWriter("", null);
+        checkCsvUnescapeWriter("", "");
+
+        checkCsvUnescapeWriter("\"foo.bar\"",        "\"foo.bar\"");
+    }
+
+    private void checkCsvUnescapeWriter(final String expected, final String 
value) {
+        try {
+            final StringWriter writer = new StringWriter();
+            StringEscapeUtils.UNESCAPE_CSV.translate(value, writer);
+            assertEquals(expected, writer.toString());
+        } catch (final IOException e) {
+            fail("Threw: " + e);
+        }
+    }
+
+    /**
+     * Tests // https://issues.apache.org/jira/browse/LANG-480
+     */
+    @Test
+    public void testEscapeHtmlHighUnicode() {
+        // this is the utf8 representation of the character:
+        // COUNTING ROD UNIT DIGIT THREE
+        // in Unicode
+        // codepoint: U+1D362
+        final byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, 
(byte)0xA2 };
+
+        final String original = new String(data, Charset.forName("UTF8"));
+
+        final String escaped = StringEscapeUtils.escapeHtml4( original );
+        assertEquals( "High Unicode should not have been escaped", original, 
escaped);
+
+        final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
+        assertEquals( "High Unicode should have been unchanged", original, 
unescaped);
+
+        // TODO: I think this should hold, needs further investigation
+        //        String unescapedFromEntity = 
StringEscapeUtils.unescapeHtml4( "&#119650;" );
+        //        assertEquals( "High Unicode should have been unescaped", 
original, unescapedFromEntity);
+    }
+
+    /**
+     * Tests https://issues.apache.org/jira/browse/LANG-339
+     */
+    @Test
+    public void testEscapeHiragana() {
+        // Some random Japanese Unicode characters
+        final String original = "\u304B\u304C\u3068";
+        final String escaped = StringEscapeUtils.escapeHtml4(original);
+        assertEquals( "Hiragana character Unicode behaviour should not be 
being escaped by escapeHtml4",
+                original, escaped);
+
+        final String unescaped = StringEscapeUtils.unescapeHtml4( escaped );
+
+        assertEquals( "Hiragana character Unicode behaviour has changed - 
expected no unescaping", escaped, unescaped);
+    }
+
+    /**
+     * Tests https://issues.apache.org/jira/browse/LANG-708
+     *
+     * @throws IOException
+     *             if an I/O error occurs
+     */
+    @Test
+    public void testLang708() throws IOException {
+        final FileInputStream fis = new 
FileInputStream("src/test/resources/lang-708-input.txt");
+        final String input = IOUtils.toString(fis, "UTF-8");
+        final String escaped = StringEscapeUtils.escapeEcmaScript(input);
+        // just the end:
+        assertTrue(escaped, escaped.endsWith("}]"));
+        // a little more:
+        assertTrue(escaped, escaped.endsWith("\"valueCode\\\":\\\"\\\"}]"));
+        fis.close();
+    }
+
+    /**
+     * Tests https://issues.apache.org/jira/browse/LANG-720
+     */
+    @Test
+    @SuppressWarnings( "deprecation" ) // escapeXml(String) has been replaced 
by escapeXml10(String) and escapeXml11(String) in 3.3
+    public void testLang720() {
+        final String input = "\ud842\udfb7" + "A";
+        final String escaped = StringEscapeUtils.escapeXml(input);
+        assertEquals(input, escaped);
+    }
+
+    /**
+     * Tests https://issues.apache.org/jira/browse/LANG-911
+     */
+    @Test
+    public void testLang911() {
+        final String bellsTest = "\ud83d\udc80\ud83d\udd14";
+        final String value = StringEscapeUtils.escapeJava(bellsTest);
+        final String valueTest = StringEscapeUtils.unescapeJava(value);
+        assertEquals(bellsTest, valueTest);
+    }
+
+    @Test
+    public void testEscapeJson() {
+        assertEquals(null, StringEscapeUtils.escapeJson(null));
+        try {
+            StringEscapeUtils.ESCAPE_JSON.translate(null, null);
+            fail();
+        } catch (final IOException ex) {
+            fail();
+        } catch (final IllegalArgumentException ex) {
+        }
+        try {
+            StringEscapeUtils.ESCAPE_JSON.translate("", null);
+            fail();
+        } catch (final IOException ex) {
+            fail();
+        } catch (final IllegalArgumentException ex) {
+        }
+
+        assertEquals("He didn't say, \\\"stop!\\\"", 
StringEscapeUtils.escapeJson("He didn't say, \"stop!\""));
+
+        final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: 
\\b\\r\\n\\f\\t\\\\\\/";
+        final String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/";
+
+        assertEquals(expected, StringEscapeUtils.escapeJson(input));
+    }
+
+}
\ No newline at end of file

[1/2] [text] TEXT-27: Adding StringEscapeUtils from commons-lang:3.5

Reply via email to