This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch 10.1.x in repository https://gitbox.apache.org/repos/asf/tomcat.git
commit 4aebe196cd02745689ced8d66c70f77c1a52af39 Author: Mark Thomas <ma...@apache.org> AuthorDate: Wed Jun 14 12:25:21 2023 +0100 Add control of byte decoding errors to ByteChunk and StringCache --- java/org/apache/tomcat/util/buf/ByteChunk.java | 47 +++++++++- java/org/apache/tomcat/util/buf/StringCache.java | 66 +++++++++++--- test/org/apache/jasper/compiler/TestGenerator.java | 3 +- .../apache/tomcat/util/buf/TestStringCache.java | 100 +++++++++++++++++++++ 4 files changed, 203 insertions(+), 13 deletions(-) diff --git a/java/org/apache/tomcat/util/buf/ByteChunk.java b/java/org/apache/tomcat/util/buf/ByteChunk.java index 101f9c0eaa..ff00f55774 100644 --- a/java/org/apache/tomcat/util/buf/ByteChunk.java +++ b/java/org/apache/tomcat/util/buf/ByteChunk.java @@ -21,7 +21,9 @@ import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.nio.ByteBuffer; import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; /* @@ -521,23 +523,64 @@ public final class ByteChunk extends AbstractChunk { @Override public String toString() { + try { + return toString(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE); + } catch (CharacterCodingException e) { + // Unreachable code. Use of REPLACE above means the exception will never be thrown. + throw new IllegalStateException(e); + } + } + + + public String toString(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction) + throws CharacterCodingException { if (isNull()) { return null; } else if (end - start == 0) { return ""; } - return StringCache.toString(this); + return StringCache.toString(this, malformedInputAction, unmappableCharacterAction); } + /** + * Converts the current content of the byte buffer to a String using the configured character set. + * + * @return The result of converting the bytes to a String + * + * @deprecated Unused. This method will be removed in Tomcat 11 onwards. + */ + @Deprecated public String toStringInternal() { + try { + return toStringInternal(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE); + } catch (CharacterCodingException e) { + // Unreachable code. Use of REPLACE above means the exception will never be thrown. + throw new IllegalStateException(e); + } + } + + + /** + * Converts the current content of the byte buffer to a String using the configured character set. + * + * @param malformedInputAction Action to take if the input is malformed + * @param unmappableCharacterAction Action to take if a byte sequence can't be mapped to a character + * + * @return The result of converting the bytes to a String + * + * @throws CharacterCodingException If an error occurs during the conversion + */ + public String toStringInternal(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction) + throws CharacterCodingException { if (charset == null) { charset = DEFAULT_CHARSET; } // new String(byte[], int, int, Charset) takes a defensive copy of the // entire byte array. This is expensive if only a small subset of the // bytes will be used. The code below is from Apache Harmony. - CharBuffer cb = charset.decode(ByteBuffer.wrap(buff, start, end - start)); + CharBuffer cb = charset.newDecoder().onMalformedInput(malformedInputAction) + .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff, start, end - start)); return new String(cb.array(), cb.arrayOffset(), cb.length()); } diff --git a/java/org/apache/tomcat/util/buf/StringCache.java b/java/org/apache/tomcat/util/buf/StringCache.java index d39de93cfa..5b82e44f74 100644 --- a/java/org/apache/tomcat/util/buf/StringCache.java +++ b/java/org/apache/tomcat/util/buf/StringCache.java @@ -16,10 +16,13 @@ */ package org.apache.tomcat.util.buf; +import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.nio.charset.CodingErrorAction; import java.util.ArrayList; import java.util.HashMap; import java.util.Map.Entry; +import java.util.Objects; import java.util.TreeMap; import org.apache.juli.logging.Log; @@ -208,11 +211,22 @@ public class StringCache { public static String toString(ByteChunk bc) { + try { + return toString(bc, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE); + } catch (CharacterCodingException e) { + // Unreachable code. Use of REPLACE above means the exception will never be thrown. + throw new IllegalStateException(e); + } + } + + + public static String toString(ByteChunk bc, CodingErrorAction malformedInputAction, + CodingErrorAction unmappableCharacterAction) throws CharacterCodingException { // If the cache is null, then either caching is disabled, or we're // still training if (bcCache == null) { - String value = bc.toStringInternal(); + String value = bc.toStringInternal(malformedInputAction, unmappableCharacterAction); if (byteEnabled && (value.length() < maxStringSize)) { // If training, everything is synced synchronized (bcStats) { @@ -285,6 +299,8 @@ public class StringCache { System.arraycopy(bc.getBuffer(), start, entry.name, 0, end - start); // Set encoding entry.charset = bc.getCharset(); + entry.malformedInputAction = malformedInputAction; + entry.unmappableCharacterAction = unmappableCharacterAction; // Initialize occurrence count to one count = new int[1]; count[0] = 1; @@ -300,9 +316,9 @@ public class StringCache { } else { accessCount++; // Find the corresponding String - String result = find(bc); + String result = find(bc, malformedInputAction, unmappableCharacterAction); if (result == null) { - return bc.toStringInternal(); + return bc.toStringInternal(malformedInputAction, unmappableCharacterAction); } // Note: We don't care about safety for the stats hitCount++; @@ -462,10 +478,31 @@ public class StringCache { * @param name The name to find * * @return the corresponding value + * + * @deprecated Unused. Will be removed in Tomcat 11. + * Use {@link #find(ByteChunk, CodingErrorAction, CodingErrorAction)} */ + @Deprecated protected static final String find(ByteChunk name) { + return find(name, CodingErrorAction.REPLACE, CodingErrorAction.REPLACE); + } + + + /** + * Find an entry given its name in the cache and return the associated String. + * + * @param name The name to find + * @param malformedInputAction Action to take if an malformed input is encountered + * @param unmappableCharacterAction Action to take if an unmappable character is encountered + * + * @return the corresponding value + */ + protected static final String find(ByteChunk name, CodingErrorAction malformedInputAction, + CodingErrorAction unmappableCharacterAction) { int pos = findClosest(name, bcCache, bcCache.length); - if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || !(name.getCharset().equals(bcCache[pos].charset))) { + if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || !(name.getCharset().equals(bcCache[pos].charset)) || + !malformedInputAction.equals(bcCache[pos].malformedInputAction) || + !unmappableCharacterAction.equals(bcCache[pos].unmappableCharacterAction)) { return null; } else { return bcCache[pos].value; @@ -631,11 +668,12 @@ public class StringCache { // -------------------------------------------------- ByteEntry Inner Class - private static class ByteEntry { private byte[] name = null; private Charset charset = null; + private CodingErrorAction malformedInputAction = null; + private CodingErrorAction unmappableCharacterAction = null; private String value = null; @Override @@ -645,17 +683,25 @@ public class StringCache { @Override public int hashCode() { - return value.hashCode(); + return Objects.hash(malformedInputAction, unmappableCharacterAction, value); } @Override public boolean equals(Object obj) { - if (obj instanceof ByteEntry) { - return value.equals(((ByteEntry) obj).value); + if (this == obj) { + return true; } - return false; + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + ByteEntry other = (ByteEntry) obj; + return Objects.equals(malformedInputAction, other.malformedInputAction) && + Objects.equals(unmappableCharacterAction, other.unmappableCharacterAction) && + Objects.equals(value, other.value); } - } diff --git a/test/org/apache/jasper/compiler/TestGenerator.java b/test/org/apache/jasper/compiler/TestGenerator.java index 82921eb8fe..b8bd60a880 100644 --- a/test/org/apache/jasper/compiler/TestGenerator.java +++ b/test/org/apache/jasper/compiler/TestGenerator.java @@ -22,6 +22,7 @@ import java.beans.PropertyDescriptor; import java.beans.PropertyEditorSupport; import java.io.File; import java.io.IOException; +import java.nio.charset.CodingErrorAction; import java.util.Date; import java.util.Scanner; @@ -211,7 +212,7 @@ public class TestGenerator extends TomcatBaseTest { ByteChunk bc = new ByteChunk(); int rc = getUrl("http://localhost:" + getPort() + "/test/bug5nnnn/bug56529.jsp", bc, null); Assert.assertEquals(HttpServletResponse.SC_OK, rc); - String response = bc.toStringInternal(); + String response = bc.toStringInternal(CodingErrorAction.REPORT, CodingErrorAction.REPORT); Assert.assertTrue(response, response.contains("[1:attribute1: '', attribute2: '']")); Assert.assertTrue(response, response.contains("[2:attribute1: '', attribute2: '']")); } diff --git a/test/org/apache/tomcat/util/buf/TestStringCache.java b/test/org/apache/tomcat/util/buf/TestStringCache.java new file mode 100644 index 0000000000..2345d2de76 --- /dev/null +++ b/test/org/apache/tomcat/util/buf/TestStringCache.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.buf; + +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; + +import org.junit.Assert; +import org.junit.Test; + +public class TestStringCache { + + private static final byte[] BYTES_VALID = new byte[] { 65, 66, 67, 68}; + private static final byte[] BYTES_INVALID = new byte[] {65, 66, -1, 67, 68}; + + private static final ByteChunk INPUT_VALID = new ByteChunk(); + private static final ByteChunk INPUT_INVALID = new ByteChunk(); + + private static final CodingErrorAction[] actions = + new CodingErrorAction[] { CodingErrorAction.IGNORE, CodingErrorAction.REPLACE, CodingErrorAction.REPORT }; + + static { + INPUT_VALID.setBytes(BYTES_VALID, 0, BYTES_VALID.length); + INPUT_VALID.setCharset(StandardCharsets.UTF_8); + INPUT_INVALID.setBytes(BYTES_INVALID, 0, BYTES_INVALID.length); + INPUT_INVALID.setCharset(StandardCharsets.UTF_8); + } + + + @Test + public void testCodingErrorLookup() { + + System.setProperty("tomcat.util.buf.StringCache.byte.enabled", "true"); + + Assert.assertTrue(StringCache.byteEnabled); + StringCache sc = new StringCache(); + sc.reset(); + + for (int i = 0; i < StringCache.trainThreshold * 2; i++) { + for (CodingErrorAction malformedInputAction : actions) { + try { + // UTF-8 doesn't have any unmappable characters + INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE); + INPUT_INVALID.toString(malformedInputAction, CodingErrorAction.IGNORE); + } catch (CharacterCodingException e) { + // Ignore + } + } + } + + Assert.assertNotNull(StringCache.bcCache); + + // Check the valid input is cached correctly + for (CodingErrorAction malformedInputAction : actions) { + try { + Assert.assertEquals("ABCD", INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE)); + } catch (CharacterCodingException e) { + // Should not happen for valid input + Assert.fail(); + } + } + + // Check the valid input is cached correctly + try { + Assert.assertEquals("ABCD", INPUT_INVALID.toString(CodingErrorAction.IGNORE, CodingErrorAction.IGNORE)); + } catch (CharacterCodingException e) { + // Should not happen for invalid input with IGNORE + Assert.fail(); + } + try { + Assert.assertEquals("AB\ufffdCD", INPUT_INVALID.toString(CodingErrorAction.REPLACE, CodingErrorAction.IGNORE)); + } catch (CharacterCodingException e) { + // Should not happen for invalid input with REPLACE + Assert.fail(); + } + try { + Assert.assertEquals("ABCD", INPUT_INVALID.toString(CodingErrorAction.REPORT, CodingErrorAction.IGNORE)); + // Should throw exception + Assert.fail(); + } catch (CharacterCodingException e) { + // Ignore + } + + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org