This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch 1.x in repository https://gitbox.apache.org/repos/asf/commons-fileupload.git
commit f4d1066825e5830475bfb16b3d97c884d62c7e40 Author: Merbin J Anselm <[email protected]> AuthorDate: Mon Dec 9 17:36:57 2019 +0530 Added support for RFC 5987 aka RFC 2231 --- .../apache/commons/fileupload/ParameterParser.java | 9 +- .../fileupload/util/mime/RFC2231Utility.java | 106 +++++++++++++++++++++ .../commons/fileupload/ParameterParserTest.java | 15 ++- .../util/mime/RFC2231UtilityTestCase.java | 54 +++++++++++ 4 files changed, 182 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/fileupload/ParameterParser.java b/src/main/java/org/apache/commons/fileupload/ParameterParser.java index 9a484cc2..1a0ca851 100644 --- a/src/main/java/org/apache/commons/fileupload/ParameterParser.java +++ b/src/main/java/org/apache/commons/fileupload/ParameterParser.java @@ -22,6 +22,7 @@ import java.util.Locale; import java.util.Map; import org.apache.commons.fileupload.util.mime.MimeUtility; +import org.apache.commons.fileupload.util.mime.RFC2231Utility; /** * A simple parser intended to parse sequences of name/value pairs. @@ -305,10 +306,12 @@ public class ParameterParser { String paramName = null; String paramValue = null; + boolean hasExtendedParams = false; while (hasChar()) { paramName = parseToken(new char[] { '=', separator }); paramValue = null; + hasExtendedParams = (paramName != null) ? paramName.contains("*") : false; //TODO: Check only if delimiter is at end if (hasChar() && (charArray[pos] == '=')) { pos++; // skip '=' paramValue = parseQuotedToken(new char[] { @@ -316,7 +319,8 @@ public class ParameterParser { if (paramValue != null) { try { - paramValue = MimeUtility.decodeText(paramValue); + paramValue = hasExtendedParams ? RFC2231Utility.decodeText(paramValue) + : MimeUtility.decodeText(paramValue); } catch (final UnsupportedEncodingException e) { // let's keep the original value in this case } @@ -326,6 +330,9 @@ public class ParameterParser { pos++; // skip separator } if ((paramName != null) && (paramName.length() > 0)) { + if (hasExtendedParams) { + paramName = paramName.replace("*", ""); //strip of the * from the name //TODO: Replace the last character alone + } if (this.lowerCaseNames) { paramName = paramName.toLowerCase(Locale.ENGLISH); } diff --git a/src/main/java/org/apache/commons/fileupload/util/mime/RFC2231Utility.java b/src/main/java/org/apache/commons/fileupload/util/mime/RFC2231Utility.java new file mode 100644 index 00000000..f625494b --- /dev/null +++ b/src/main/java/org/apache/commons/fileupload/util/mime/RFC2231Utility.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.fileupload.util.mime; + +import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; +/** + * Utility class to decode/encode character set on HTTP Header fields based on RFC 2231. + * This implementation adheres to RFC 5987 in particular, which was defined for HTTP headers + * + * RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2>mandatory charset definition</a> + * and <a href="https://tools.ietf.org/html/rfc5987#section-4>no parameter continuation</a> + * + * <p> + * @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a> + * @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a> + */ +public final class RFC2231Utility { + + private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray(); + + private static final byte[] HEX_DECODE = new byte[0x80]; + + // create a ASCII decoded array of Hexadecimal values + static { + for (int i = 0; i < HEX_DIGITS.length; i++) { + HEX_DECODE[HEX_DIGITS[i]] = (byte) i; + HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i; + } + } + + /** + * Decode a string of text obtained from a HTTP header as per RFC 2231 + * + * <p/> + * <b>Eg 1.</b> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A} + * will be decoded to {@code This is ***fun***} + * <p/> + * <b>Eg 2.</b> {@code iso-8859-1'en'%A3%20rate} + * will be decoded to {@code £ rate} + * <p/> + * <b>Eg 3.</b> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates} + * will be decoded to {@code £ and € rates} + * + * @param encodedText - Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only + * @return Decoded text based on charset encoding + * @throws UnsupportedEncodingException + */ + public static String decodeText(String encodedText) throws UnsupportedEncodingException { + int langDelimitStart = encodedText.indexOf('\''); + if (langDelimitStart == -1) { + // missing charset + return encodedText; + } + String mimeCharset = encodedText.substring(0, langDelimitStart); + int langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1); + if (langDelimitEnd == -1) { + // missing language + return encodedText; + } + byte[] bytes = fromHex(encodedText.substring(langDelimitEnd + 1)); + return new String(bytes, getJavaCharset(mimeCharset)); + } + + /** + * Convert {@code text} to their corresponding Hex value + * @param text - ASCII text input + * @return Byte array of characters decoded from ASCII table + */ + private static byte[] fromHex(String text) { + ByteArrayOutputStream out = new ByteArrayOutputStream(text.length()); + for (int i = 0; i < text.length();) { + char c = text.charAt(i++); + if (c == '%') { + if (i > text.length() - 2) { + break; // unterminated sequence + } + byte b1 = HEX_DECODE[text.charAt(i++) & 0x7f]; + byte b2 = HEX_DECODE[text.charAt(i++) & 0x7f]; + out.write((b1 << 4) | b2); + } else { + out.write((byte) c); + } + } + return out.toByteArray(); + } + + private static String getJavaCharset(String mimeCharset) { + // good enough for standard values + return mimeCharset; + } +} diff --git a/src/test/java/org/apache/commons/fileupload/ParameterParserTest.java b/src/test/java/org/apache/commons/fileupload/ParameterParserTest.java index fdad8b25..a97cac9b 100644 --- a/src/test/java/org/apache/commons/fileupload/ParameterParserTest.java +++ b/src/test/java/org/apache/commons/fileupload/ParameterParserTest.java @@ -19,6 +19,7 @@ package org.apache.commons.fileupload; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import java.io.UnsupportedEncodingException; import java.util.Map; import org.junit.Test; @@ -110,11 +111,23 @@ public class ParameterParserTest { * Test for <a href="http://issues.apache.org/jira/browse/FILEUPLOAD-199">FILEUPLOAD-199</a> */ @Test - public void fileUpload199() { + public void testFileUpload199() { final ParameterParser parser = new ParameterParser(); final String s = "Content-Disposition: form-data; name=\"file\"; filename=\"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=\"\r\n"; final Map<String, String> params = parser.parse(s, new char[] { ',', ';' }); assertEquals("If you can read this you understand the example.", params.get("filename")); } + /** + * Test for <a href="https://issues.apache.org/jira/browse/FILEUPLOAD-274">FILEUPLOAD-274</a> + * @throws UnsupportedEncodingException + */ + @Test + public void testFileUpload274() { + ParameterParser parser = new ParameterParser(); + String s = "Content-Disposition: form-data; name=\"file\"; filename*=UTF-8\'\'%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF\r\n"; + Map<String, String> params = parser.parse(s, new char[] { ',', ';' }); + assertEquals("\u3053\u3093\u306B\u3061\u306F", params.get("filename")); //filename = "こんにちは" in japanese + } + } diff --git a/src/test/java/org/apache/commons/fileupload/util/mime/RFC2231UtilityTestCase.java b/src/test/java/org/apache/commons/fileupload/util/mime/RFC2231UtilityTestCase.java new file mode 100644 index 00000000..6502be76 --- /dev/null +++ b/src/test/java/org/apache/commons/fileupload/util/mime/RFC2231UtilityTestCase.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.fileupload.util.mime; + +import java.io.UnsupportedEncodingException; + +import org.junit.Assert; +import org.junit.Test; + +/** + * The expected characters are encoded in UTF16, while the actual characters may be encoded in UTF-8/ISO-8859-1 + * + * RFC 5987 recommends to support both UTF-8 & ISO 8859-1. Test values are taken from https://tools.ietf.org/html/rfc5987#section-3.2.2 + */ +public final class RFC2231UtilityTestCase { + + @Test + public void noNeedToDecode() throws Exception { + assertEncoded("abc", "abc"); + } + + @Test + public void decodeUtf8() throws Exception { + assertEncoded("\u00a3 \u0061\u006e\u0064 \u20ac \u0072\u0061\u0074\u0065\u0073", "UTF-8''%c2%a3%20and%20%e2%82%ac%20rates"); //"£ and € rates" + } + + @Test + public void decodeIso88591() throws Exception { + assertEncoded("\u00A3 rate", "iso-8859-1'en'%A3%20rate"); //"£ rate" + } + + private static void assertEncoded(String expected, String encoded) throws Exception { + Assert.assertEquals(expected, RFC2231Utility.decodeText(encoded)); + } + + @Test(expected = UnsupportedEncodingException.class) + public void decodeInvalidEncoding() throws Exception { + RFC2231Utility.decodeText("abc'en'hello"); + } +}
