Author: markt
Date: Sun Mar 3 23:07:28 2013
New Revision: 1452152
URL: http://svn.apache.org/r1452152
Log:
Start of a new framework for UTF-8 testing. I have discovered that the correct
replacement behaviour (that we want to use for URIs) has many, many edge cases.
This class should make it easier to construct additional tests.
Added:
tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java (with
props)
Added: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java?rev=1452152&view=auto
==============================================================================
--- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java (added)
+++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java Sun Mar
3 23:07:28 2013
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestUtf8Extended {
+
+ private List<Utf8TestCase> testCases = new ArrayList<>();
+
+ @Before
+ public void setup() {
+ testCases.add(new Utf8TestCase(
+ "Zero length input",
+ new int[] {},
+ -1,
+ ""));
+ testCases.add(new Utf8TestCase(
+ "Valid one byte sequence",
+ new int[] {0x41},
+ -1,
+ "A"));
+ testCases.add(new Utf8TestCase(
+ "Valid two byte sequence",
+ new int[] {0xC2, 0xA9},
+ -1,
+ "\u00A9"));
+ testCases.add(new Utf8TestCase(
+ "Valid three byte sequence",
+ new int[] {0xE0, 0xA4, 0x87},
+ -1,
+ "\u0907"));
+ testCases.add(new Utf8TestCase(
+ "Valid four byte sequence",
+ new int[] {0xF0, 0x90, 0x90, 0x80},
+ -1,
+ "\uD801\uDC00"));
+ }
+
+ @Test
+ public void testHarmonyDecoder() {
+ doTest(new Utf8Decoder());
+ }
+
+
+ @Test
+ public void testJvmDecoder() {
+ doTest(Charset.forName("UTF-8").newDecoder());
+ }
+
+
+ private void doTest(CharsetDecoder decoder) {
+ for (Utf8TestCase testCase : testCases) {
+ // Configure decoder to fail on an error
+ decoder.reset();
+ decoder.onMalformedInput(CodingErrorAction.REPORT);
+ decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+
+ // Add each byte one at a time. The decoder should fail as soon as
+ // an invalid sequence has been provided
+ int len = testCase.input.length;
+ ByteBuffer bb = ByteBuffer.allocate(len);
+ CharBuffer cb = CharBuffer.allocate(len);
+ for (int i = 0; i < len; i++) {
+ bb.put((byte) testCase.input[i]);
+ bb.flip();
+ CoderResult cr = decoder.decode(bb, cb, false);
+ if (cr.isError()) {
+ Assert.assertEquals(testCase.description,
+ testCase.invalidIndex, i);
+ break;
+ }
+ bb.compact();
+ }
+
+ // Configure decoder to replace on an error
+ decoder.reset();
+ decoder.onMalformedInput(CodingErrorAction.REPLACE);
+ decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+
+ // Add each byte one at a time.
+ bb.clear();
+ cb.clear();
+ for (int i = 0; i < len; i++) {
+ bb.put((byte) testCase.input[i]);
+ bb.flip();
+ CoderResult cr = decoder.decode(bb, cb, false);
+ if (cr.isError()) {
+ Assert.fail(testCase.description);
+ }
+ bb.compact();
+ }
+ // For incomplete sequences at the end of the input need to tell
+ // the decoder the input has ended
+ bb.flip();
+ CoderResult cr = decoder.decode(bb, cb, true);
+ if (cr.isError()) {
+ Assert.fail(testCase.description);
+ }
+ cb.flip();
+ Assert.assertEquals(testCase.description, testCase.outputReplaced,
+ cb.toString());
+ }
+ }
+
+
+ /**
+ * Encapsulates a single UTF-8 test case
+ */
+ private static class Utf8TestCase {
+ private final String description;
+ private final int[] input;
+ private final int invalidIndex;
+ private final String outputReplaced;
+
+ public Utf8TestCase(String description, int[] input, int invalidIndex,
+ String outputReplaced) {
+ this.description = description;
+ this.input = input;
+ this.invalidIndex = invalidIndex;
+ this.outputReplaced = outputReplaced;
+ }
+ }
+}
Propchange: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java
------------------------------------------------------------------------------
svn:eol-style = native
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]