This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch 1.x
in repository https://gitbox.apache.org/repos/asf/commons-fileupload.git

commit f4d1066825e5830475bfb16b3d97c884d62c7e40
Author: Merbin J Anselm <[email protected]>
AuthorDate: Mon Dec 9 17:36:57 2019 +0530

    Added support for RFC 5987 aka RFC 2231
---
 .../apache/commons/fileupload/ParameterParser.java |   9 +-
 .../fileupload/util/mime/RFC2231Utility.java       | 106 +++++++++++++++++++++
 .../commons/fileupload/ParameterParserTest.java    |  15 ++-
 .../util/mime/RFC2231UtilityTestCase.java          |  54 +++++++++++
 4 files changed, 182 insertions(+), 2 deletions(-)

diff --git a/src/main/java/org/apache/commons/fileupload/ParameterParser.java 
b/src/main/java/org/apache/commons/fileupload/ParameterParser.java
index 9a484cc2..1a0ca851 100644
--- a/src/main/java/org/apache/commons/fileupload/ParameterParser.java
+++ b/src/main/java/org/apache/commons/fileupload/ParameterParser.java
@@ -22,6 +22,7 @@ import java.util.Locale;
 import java.util.Map;
 
 import org.apache.commons.fileupload.util.mime.MimeUtility;
+import org.apache.commons.fileupload.util.mime.RFC2231Utility;
 
 /**
  * A simple parser intended to parse sequences of name/value pairs.
@@ -305,10 +306,12 @@ public class ParameterParser {
 
         String paramName = null;
         String paramValue = null;
+        boolean hasExtendedParams = false;
         while (hasChar()) {
             paramName = parseToken(new char[] {
                     '=', separator });
             paramValue = null;
+            hasExtendedParams = (paramName != null) ? paramName.contains("*") 
: false; //TODO: Check only if delimiter is at end
             if (hasChar() && (charArray[pos] == '=')) {
                 pos++; // skip '='
                 paramValue = parseQuotedToken(new char[] {
@@ -316,7 +319,8 @@ public class ParameterParser {
 
                 if (paramValue != null) {
                     try {
-                        paramValue = MimeUtility.decodeText(paramValue);
+                        paramValue = hasExtendedParams ? 
RFC2231Utility.decodeText(paramValue)
+                                : MimeUtility.decodeText(paramValue);
                     } catch (final UnsupportedEncodingException e) {
                         // let's keep the original value in this case
                     }
@@ -326,6 +330,9 @@ public class ParameterParser {
                 pos++; // skip separator
             }
             if ((paramName != null) && (paramName.length() > 0)) {
+                if (hasExtendedParams) {
+                    paramName = paramName.replace("*", ""); //strip of the * 
from the name //TODO: Replace the last character alone
+                }
                 if (this.lowerCaseNames) {
                     paramName = paramName.toLowerCase(Locale.ENGLISH);
                 }
diff --git 
a/src/main/java/org/apache/commons/fileupload/util/mime/RFC2231Utility.java 
b/src/main/java/org/apache/commons/fileupload/util/mime/RFC2231Utility.java
new file mode 100644
index 00000000..f625494b
--- /dev/null
+++ b/src/main/java/org/apache/commons/fileupload/util/mime/RFC2231Utility.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.fileupload.util.mime;
+
+import java.io.ByteArrayOutputStream;
+import java.io.UnsupportedEncodingException;
+/**
+ * Utility class to decode/encode character set on HTTP Header fields based on 
RFC 2231.
+ * This implementation adheres to RFC 5987 in particular, which was defined 
for HTTP headers
+ *
+ * RFC 5987 builds on RFC 2231, but has lesser scope like <a 
href="https://tools.ietf.org/html/rfc5987#section-3.2>mandatory charset 
definition</a>
+ * and <a href="https://tools.ietf.org/html/rfc5987#section-4>no parameter 
continuation</a>
+ *
+ * <p>
+ * @see <a href="https://tools.ietf.org/html/rfc2231";>RFC 2231</a>
+ * @see <a href="https://tools.ietf.org/html/rfc5987";>RFC 5987</a>
+ */
+public final class RFC2231Utility {
+
+    private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
+
+    private static final byte[] HEX_DECODE = new byte[0x80];
+
+    // create a ASCII decoded array of Hexadecimal values
+    static {
+        for (int i = 0; i < HEX_DIGITS.length; i++) {
+            HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
+            HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
+        }
+    }
+
+    /**
+     * Decode a string of text obtained from a HTTP header as per RFC 2231
+     *
+     * <p/>
+     * <b>Eg 1.</b> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A}
+     * will be decoded to {@code This is ***fun***}
+     * <p/>
+     * <b>Eg 2.</b> {@code iso-8859-1'en'%A3%20rate}
+     * will be decoded to {@code £ rate}
+     * <p/>
+     * <b>Eg 3.</b> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates}
+     * will be decoded to {@code £ and € rates}
+     *
+     * @param encodedText - Text to be decoded has a format of {@code 
<charset>'<language>'<encoded_value>} and ASCII only
+     * @return Decoded text based on charset encoding
+     * @throws UnsupportedEncodingException
+     */
+    public static String decodeText(String encodedText) throws 
UnsupportedEncodingException {
+        int langDelimitStart = encodedText.indexOf('\'');
+        if (langDelimitStart == -1) {
+            // missing charset
+            return encodedText;
+        }
+        String mimeCharset = encodedText.substring(0, langDelimitStart);
+        int langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
+        if (langDelimitEnd == -1) {
+            // missing language
+            return encodedText;
+        }
+        byte[] bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
+        return new String(bytes, getJavaCharset(mimeCharset));
+    }
+
+    /**
+     * Convert {@code text} to their corresponding Hex value
+     * @param text - ASCII text input
+     * @return Byte array of characters decoded from ASCII table
+     */
+    private static byte[] fromHex(String text) {
+        ByteArrayOutputStream out = new ByteArrayOutputStream(text.length());
+        for (int i = 0; i < text.length();) {
+            char c = text.charAt(i++);
+            if (c == '%') {
+                if (i > text.length() - 2) {
+                    break; // unterminated sequence
+                }
+                byte b1 = HEX_DECODE[text.charAt(i++) & 0x7f];
+                byte b2 = HEX_DECODE[text.charAt(i++) & 0x7f];
+                out.write((b1 << 4) | b2);
+            } else {
+                out.write((byte) c);
+            }
+        }
+        return out.toByteArray();
+    }
+
+    private static String getJavaCharset(String mimeCharset) {
+        // good enough for standard values
+        return mimeCharset;
+    }
+}
diff --git 
a/src/test/java/org/apache/commons/fileupload/ParameterParserTest.java 
b/src/test/java/org/apache/commons/fileupload/ParameterParserTest.java
index fdad8b25..a97cac9b 100644
--- a/src/test/java/org/apache/commons/fileupload/ParameterParserTest.java
+++ b/src/test/java/org/apache/commons/fileupload/ParameterParserTest.java
@@ -19,6 +19,7 @@ package org.apache.commons.fileupload;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
+import java.io.UnsupportedEncodingException;
 import java.util.Map;
 
 import org.junit.Test;
@@ -110,11 +111,23 @@ public class ParameterParserTest {
      * Test for <a 
href="http://issues.apache.org/jira/browse/FILEUPLOAD-199";>FILEUPLOAD-199</a>
      */
     @Test
-    public void fileUpload199() {
+    public void testFileUpload199() {
         final ParameterParser parser = new ParameterParser();
         final String s = "Content-Disposition: form-data; name=\"file\"; 
filename=\"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= 
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=\"\r\n";
         final Map<String, String> params = parser.parse(s, new char[] { ',', 
';' });
         assertEquals("If you can read this you understand the example.", 
params.get("filename"));
     }
 
+    /**
+     * Test for <a 
href="https://issues.apache.org/jira/browse/FILEUPLOAD-274";>FILEUPLOAD-274</a>
+     * @throws UnsupportedEncodingException
+     */
+    @Test
+    public void testFileUpload274() {
+        ParameterParser parser = new ParameterParser();
+        String s = "Content-Disposition: form-data; name=\"file\"; 
filename*=UTF-8\'\'%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF\r\n";
+        Map<String, String> params = parser.parse(s, new char[] { ',', ';' });
+        assertEquals("\u3053\u3093\u306B\u3061\u306F", 
params.get("filename")); //filename = "こんにちは" in japanese
+    }
+
 }
diff --git 
a/src/test/java/org/apache/commons/fileupload/util/mime/RFC2231UtilityTestCase.java
 
b/src/test/java/org/apache/commons/fileupload/util/mime/RFC2231UtilityTestCase.java
new file mode 100644
index 00000000..6502be76
--- /dev/null
+++ 
b/src/test/java/org/apache/commons/fileupload/util/mime/RFC2231UtilityTestCase.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.fileupload.util.mime;
+
+import java.io.UnsupportedEncodingException;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * The expected characters are encoded in UTF16, while the actual characters 
may be encoded in UTF-8/ISO-8859-1
+ *
+ * RFC 5987 recommends to support both UTF-8 & ISO 8859-1. Test values are 
taken from https://tools.ietf.org/html/rfc5987#section-3.2.2
+ */
+public final class RFC2231UtilityTestCase {
+
+    @Test
+    public void noNeedToDecode() throws Exception {
+        assertEncoded("abc", "abc");
+    }
+
+    @Test
+    public void decodeUtf8() throws Exception {
+        assertEncoded("\u00a3 \u0061\u006e\u0064 \u20ac 
\u0072\u0061\u0074\u0065\u0073", "UTF-8''%c2%a3%20and%20%e2%82%ac%20rates"); 
//"£ and € rates"
+    }
+
+    @Test
+    public void decodeIso88591() throws Exception {
+        assertEncoded("\u00A3 rate", "iso-8859-1'en'%A3%20rate"); //"£ rate"
+    }
+
+    private static void assertEncoded(String expected, String encoded) throws 
Exception {
+        Assert.assertEquals(expected, RFC2231Utility.decodeText(encoded));
+    }
+
+    @Test(expected = UnsupportedEncodingException.class)
+    public void decodeInvalidEncoding() throws Exception {
+        RFC2231Utility.decodeText("abc'en'hello");
+    }
+}

Reply via email to