On Thu, Jun 22, 2023 at 8:55 PM <[email protected]> wrote:
>
> This is an automated email from the ASF dual-hosted git repository.
>
> markt pushed a commit to branch main
> in repository https://gitbox.apache.org/repos/asf/tomcat.git
>
>
> The following commit(s) were added to refs/heads/main by this push:
> new 944951302e Add control of byte decoding errors to ByteChunk and
> StringCache
> 944951302e is described below
>
> commit 944951302e2f478879411dbff353f5818ad44121
> Author: Mark Thomas <[email protected]>
> AuthorDate: Wed Jun 14 12:25:21 2023 +0100
>
> Add control of byte decoding errors to ByteChunk and StringCache
> ---
> java/org/apache/tomcat/util/buf/ByteChunk.java | 47 +++++++++-
> java/org/apache/tomcat/util/buf/StringCache.java | 66 +++++++++++---
> test/org/apache/jasper/compiler/TestGenerator.java | 3 +-
> .../apache/tomcat/util/buf/TestStringCache.java | 100
> +++++++++++++++++++++
> 4 files changed, 203 insertions(+), 13 deletions(-)
>
> diff --git a/java/org/apache/tomcat/util/buf/ByteChunk.java
> b/java/org/apache/tomcat/util/buf/ByteChunk.java
> index 101f9c0eaa..ff00f55774 100644
> --- a/java/org/apache/tomcat/util/buf/ByteChunk.java
> +++ b/java/org/apache/tomcat/util/buf/ByteChunk.java
> @@ -21,7 +21,9 @@ import java.io.ObjectInputStream;
> import java.io.ObjectOutputStream;
> import java.nio.ByteBuffer;
> import java.nio.CharBuffer;
> +import java.nio.charset.CharacterCodingException;
> import java.nio.charset.Charset;
> +import java.nio.charset.CodingErrorAction;
> import java.nio.charset.StandardCharsets;
>
> /*
> @@ -521,23 +523,64 @@ public final class ByteChunk extends AbstractChunk {
>
> @Override
> public String toString() {
> + try {
> + return toString(CodingErrorAction.REPLACE,
> CodingErrorAction.REPLACE);
> + } catch (CharacterCodingException e) {
> + // Unreachable code. Use of REPLACE above means the exception
> will never be thrown.
> + throw new IllegalStateException(e);
> + }
> + }
> +
> +
> + public String toString(CodingErrorAction malformedInputAction,
> CodingErrorAction unmappableCharacterAction)
> + throws CharacterCodingException {
> if (isNull()) {
> return null;
> } else if (end - start == 0) {
> return "";
> }
> - return StringCache.toString(this);
> + return StringCache.toString(this, malformedInputAction,
> unmappableCharacterAction);
> }
>
>
> + /**
> + * Converts the current content of the byte buffer to a String using the
> configured character set.
> + *
> + * @return The result of converting the bytes to a String
> + *
> + * @deprecated Unused. This method will be removed in Tomcat 11 onwards.
> + */
> + @Deprecated
> public String toStringInternal() {
> + try {
> + return toStringInternal(CodingErrorAction.REPLACE,
> CodingErrorAction.REPLACE);
> + } catch (CharacterCodingException e) {
> + // Unreachable code. Use of REPLACE above means the exception
> will never be thrown.
> + throw new IllegalStateException(e);
> + }
> + }
> +
> +
> + /**
> + * Converts the current content of the byte buffer to a String using the
> configured character set.
> + *
> + * @param malformedInputAction Action to take if the input is
> malformed
> + * @param unmappableCharacterAction Action to take if a byte sequence
> can't be mapped to a character
> + *
> + * @return The result of converting the bytes to a String
> + *
> + * @throws CharacterCodingException If an error occurs during the
> conversion
> + */
> + public String toStringInternal(CodingErrorAction malformedInputAction,
> CodingErrorAction unmappableCharacterAction)
> + throws CharacterCodingException {
> if (charset == null) {
> charset = DEFAULT_CHARSET;
> }
> // new String(byte[], int, int, Charset) takes a defensive copy of
> the
> // entire byte array. This is expensive if only a small subset of the
> // bytes will be used. The code below is from Apache Harmony.
> - CharBuffer cb = charset.decode(ByteBuffer.wrap(buff, start, end -
> start));
> + CharBuffer cb =
> charset.newDecoder().onMalformedInput(malformedInputAction)
> +
> .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff,
> start, end - start));
Looking at the code, this is not equivalent, like charset.decode uses
thread locals and so on. I will make a change so that charset.decode
is used if is REPLACE REPLACE, if you don't mind. I'm pretty sure
benching would show no performance difference though.
Rémy
> return new String(cb.array(), cb.arrayOffset(), cb.length());
> }
>
> diff --git a/java/org/apache/tomcat/util/buf/StringCache.java
> b/java/org/apache/tomcat/util/buf/StringCache.java
> index d39de93cfa..5b82e44f74 100644
> --- a/java/org/apache/tomcat/util/buf/StringCache.java
> +++ b/java/org/apache/tomcat/util/buf/StringCache.java
> @@ -16,10 +16,13 @@
> */
> package org.apache.tomcat.util.buf;
>
> +import java.nio.charset.CharacterCodingException;
> import java.nio.charset.Charset;
> +import java.nio.charset.CodingErrorAction;
> import java.util.ArrayList;
> import java.util.HashMap;
> import java.util.Map.Entry;
> +import java.util.Objects;
> import java.util.TreeMap;
>
> import org.apache.juli.logging.Log;
> @@ -208,11 +211,22 @@ public class StringCache {
>
>
> public static String toString(ByteChunk bc) {
> + try {
> + return toString(bc, CodingErrorAction.REPLACE,
> CodingErrorAction.REPLACE);
> + } catch (CharacterCodingException e) {
> + // Unreachable code. Use of REPLACE above means the exception
> will never be thrown.
> + throw new IllegalStateException(e);
> + }
> + }
> +
> +
> + public static String toString(ByteChunk bc, CodingErrorAction
> malformedInputAction,
> + CodingErrorAction unmappableCharacterAction) throws
> CharacterCodingException {
>
> // If the cache is null, then either caching is disabled, or we're
> // still training
> if (bcCache == null) {
> - String value = bc.toStringInternal();
> + String value = bc.toStringInternal(malformedInputAction,
> unmappableCharacterAction);
> if (byteEnabled && (value.length() < maxStringSize)) {
> // If training, everything is synced
> synchronized (bcStats) {
> @@ -285,6 +299,8 @@ public class StringCache {
> System.arraycopy(bc.getBuffer(), start,
> entry.name, 0, end - start);
> // Set encoding
> entry.charset = bc.getCharset();
> + entry.malformedInputAction =
> malformedInputAction;
> + entry.unmappableCharacterAction =
> unmappableCharacterAction;
> // Initialize occurrence count to one
> count = new int[1];
> count[0] = 1;
> @@ -300,9 +316,9 @@ public class StringCache {
> } else {
> accessCount++;
> // Find the corresponding String
> - String result = find(bc);
> + String result = find(bc, malformedInputAction,
> unmappableCharacterAction);
> if (result == null) {
> - return bc.toStringInternal();
> + return bc.toStringInternal(malformedInputAction,
> unmappableCharacterAction);
> }
> // Note: We don't care about safety for the stats
> hitCount++;
> @@ -462,10 +478,31 @@ public class StringCache {
> * @param name The name to find
> *
> * @return the corresponding value
> + *
> + * @deprecated Unused. Will be removed in Tomcat 11.
> + * Use {@link #find(ByteChunk, CodingErrorAction,
> CodingErrorAction)}
> */
> + @Deprecated
> protected static final String find(ByteChunk name) {
> + return find(name, CodingErrorAction.REPLACE,
> CodingErrorAction.REPLACE);
> + }
> +
> +
> + /**
> + * Find an entry given its name in the cache and return the associated
> String.
> + *
> + * @param name The name to find
> + * @param malformedInputAction Action to take if an malformed input
> is encountered
> + * @param unmappableCharacterAction Action to take if an unmappable
> character is encountered
> + *
> + * @return the corresponding value
> + */
> + protected static final String find(ByteChunk name, CodingErrorAction
> malformedInputAction,
> + CodingErrorAction unmappableCharacterAction) {
> int pos = findClosest(name, bcCache, bcCache.length);
> - if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) ||
> !(name.getCharset().equals(bcCache[pos].charset))) {
> + if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) ||
> !(name.getCharset().equals(bcCache[pos].charset)) ||
> +
> !malformedInputAction.equals(bcCache[pos].malformedInputAction) ||
> +
> !unmappableCharacterAction.equals(bcCache[pos].unmappableCharacterAction)) {
> return null;
> } else {
> return bcCache[pos].value;
> @@ -631,11 +668,12 @@ public class StringCache {
>
> // -------------------------------------------------- ByteEntry Inner
> Class
>
> -
> private static class ByteEntry {
>
> private byte[] name = null;
> private Charset charset = null;
> + private CodingErrorAction malformedInputAction = null;
> + private CodingErrorAction unmappableCharacterAction = null;
> private String value = null;
>
> @Override
> @@ -645,17 +683,25 @@ public class StringCache {
>
> @Override
> public int hashCode() {
> - return value.hashCode();
> + return Objects.hash(malformedInputAction,
> unmappableCharacterAction, value);
> }
>
> @Override
> public boolean equals(Object obj) {
> - if (obj instanceof ByteEntry) {
> - return value.equals(((ByteEntry) obj).value);
> + if (this == obj) {
> + return true;
> }
> - return false;
> + if (obj == null) {
> + return false;
> + }
> + if (getClass() != obj.getClass()) {
> + return false;
> + }
> + ByteEntry other = (ByteEntry) obj;
> + return Objects.equals(malformedInputAction,
> other.malformedInputAction) &&
> + Objects.equals(unmappableCharacterAction,
> other.unmappableCharacterAction) &&
> + Objects.equals(value, other.value);
> }
> -
> }
>
>
> diff --git a/test/org/apache/jasper/compiler/TestGenerator.java
> b/test/org/apache/jasper/compiler/TestGenerator.java
> index 8cfe73f54c..b854a6010b 100644
> --- a/test/org/apache/jasper/compiler/TestGenerator.java
> +++ b/test/org/apache/jasper/compiler/TestGenerator.java
> @@ -22,6 +22,7 @@ import java.beans.PropertyDescriptor;
> import java.beans.PropertyEditorSupport;
> import java.io.File;
> import java.io.IOException;
> +import java.nio.charset.CodingErrorAction;
> import java.util.Date;
> import java.util.Scanner;
>
> @@ -211,7 +212,7 @@ public class TestGenerator extends TomcatBaseTest {
> ByteChunk bc = new ByteChunk();
> int rc = getUrl("http://localhost:" + getPort() +
> "/test/bug5nnnn/bug56529.jsp", bc, null);
> Assert.assertEquals(HttpServletResponse.SC_OK, rc);
> - String response = bc.toStringInternal();
> + String response = bc.toStringInternal(CodingErrorAction.REPORT,
> CodingErrorAction.REPORT);
> Assert.assertTrue(response, response.contains("[1:attribute1: '',
> attribute2: '']"));
> Assert.assertTrue(response, response.contains("[2:attribute1: '',
> attribute2: '']"));
> }
> diff --git a/test/org/apache/tomcat/util/buf/TestStringCache.java
> b/test/org/apache/tomcat/util/buf/TestStringCache.java
> new file mode 100644
> index 0000000000..2345d2de76
> --- /dev/null
> +++ b/test/org/apache/tomcat/util/buf/TestStringCache.java
> @@ -0,0 +1,100 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements. See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +package org.apache.tomcat.util.buf;
> +
> +import java.nio.charset.CharacterCodingException;
> +import java.nio.charset.CodingErrorAction;
> +import java.nio.charset.StandardCharsets;
> +
> +import org.junit.Assert;
> +import org.junit.Test;
> +
> +public class TestStringCache {
> +
> + private static final byte[] BYTES_VALID = new byte[] { 65, 66, 67, 68};
> + private static final byte[] BYTES_INVALID = new byte[] {65, 66, -1, 67,
> 68};
> +
> + private static final ByteChunk INPUT_VALID = new ByteChunk();
> + private static final ByteChunk INPUT_INVALID = new ByteChunk();
> +
> + private static final CodingErrorAction[] actions =
> + new CodingErrorAction[] { CodingErrorAction.IGNORE,
> CodingErrorAction.REPLACE, CodingErrorAction.REPORT };
> +
> + static {
> + INPUT_VALID.setBytes(BYTES_VALID, 0, BYTES_VALID.length);
> + INPUT_VALID.setCharset(StandardCharsets.UTF_8);
> + INPUT_INVALID.setBytes(BYTES_INVALID, 0, BYTES_INVALID.length);
> + INPUT_INVALID.setCharset(StandardCharsets.UTF_8);
> + }
> +
> +
> + @Test
> + public void testCodingErrorLookup() {
> +
> + System.setProperty("tomcat.util.buf.StringCache.byte.enabled",
> "true");
> +
> + Assert.assertTrue(StringCache.byteEnabled);
> + StringCache sc = new StringCache();
> + sc.reset();
> +
> + for (int i = 0; i < StringCache.trainThreshold * 2; i++) {
> + for (CodingErrorAction malformedInputAction : actions) {
> + try {
> + // UTF-8 doesn't have any unmappable characters
> + INPUT_VALID.toString(malformedInputAction,
> CodingErrorAction.IGNORE);
> + INPUT_INVALID.toString(malformedInputAction,
> CodingErrorAction.IGNORE);
> + } catch (CharacterCodingException e) {
> + // Ignore
> + }
> + }
> + }
> +
> + Assert.assertNotNull(StringCache.bcCache);
> +
> + // Check the valid input is cached correctly
> + for (CodingErrorAction malformedInputAction : actions) {
> + try {
> + Assert.assertEquals("ABCD",
> INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE));
> + } catch (CharacterCodingException e) {
> + // Should not happen for valid input
> + Assert.fail();
> + }
> + }
> +
> + // Check the valid input is cached correctly
> + try {
> + Assert.assertEquals("ABCD",
> INPUT_INVALID.toString(CodingErrorAction.IGNORE, CodingErrorAction.IGNORE));
> + } catch (CharacterCodingException e) {
> + // Should not happen for invalid input with IGNORE
> + Assert.fail();
> + }
> + try {
> + Assert.assertEquals("AB\ufffdCD",
> INPUT_INVALID.toString(CodingErrorAction.REPLACE, CodingErrorAction.IGNORE));
> + } catch (CharacterCodingException e) {
> + // Should not happen for invalid input with REPLACE
> + Assert.fail();
> + }
> + try {
> + Assert.assertEquals("ABCD",
> INPUT_INVALID.toString(CodingErrorAction.REPORT, CodingErrorAction.IGNORE));
> + // Should throw exception
> + Assert.fail();
> + } catch (CharacterCodingException e) {
> + // Ignore
> + }
> +
> + }
> +}
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [email protected]
> For additional commands, e-mail: [email protected]
>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]