On Thu, Jun 22, 2023 at 8:55 PM <ma...@apache.org> wrote:
>
> This is an automated email from the ASF dual-hosted git repository.
>
> markt pushed a commit to branch main
> in repository https://gitbox.apache.org/repos/asf/tomcat.git
>
>
> The following commit(s) were added to refs/heads/main by this push:
>      new 944951302e Add control of byte decoding errors to ByteChunk and 
> StringCache
> 944951302e is described below
>
> commit 944951302e2f478879411dbff353f5818ad44121
> Author: Mark Thomas <ma...@apache.org>
> AuthorDate: Wed Jun 14 12:25:21 2023 +0100
>
>     Add control of byte decoding errors to ByteChunk and StringCache
> ---
>  java/org/apache/tomcat/util/buf/ByteChunk.java     |  47 +++++++++-
>  java/org/apache/tomcat/util/buf/StringCache.java   |  66 +++++++++++---
>  test/org/apache/jasper/compiler/TestGenerator.java |   3 +-
>  .../apache/tomcat/util/buf/TestStringCache.java    | 100 
> +++++++++++++++++++++
>  4 files changed, 203 insertions(+), 13 deletions(-)
>
> diff --git a/java/org/apache/tomcat/util/buf/ByteChunk.java 
> b/java/org/apache/tomcat/util/buf/ByteChunk.java
> index 101f9c0eaa..ff00f55774 100644
> --- a/java/org/apache/tomcat/util/buf/ByteChunk.java
> +++ b/java/org/apache/tomcat/util/buf/ByteChunk.java
> @@ -21,7 +21,9 @@ import java.io.ObjectInputStream;
>  import java.io.ObjectOutputStream;
>  import java.nio.ByteBuffer;
>  import java.nio.CharBuffer;
> +import java.nio.charset.CharacterCodingException;
>  import java.nio.charset.Charset;
> +import java.nio.charset.CodingErrorAction;
>  import java.nio.charset.StandardCharsets;
>
>  /*
> @@ -521,23 +523,64 @@ public final class ByteChunk extends AbstractChunk {
>
>      @Override
>      public String toString() {
> +        try {
> +            return toString(CodingErrorAction.REPLACE, 
> CodingErrorAction.REPLACE);
> +        } catch (CharacterCodingException e) {
> +            // Unreachable code. Use of REPLACE above means the exception 
> will never be thrown.
> +            throw new IllegalStateException(e);
> +        }
> +    }
> +
> +
> +    public String toString(CodingErrorAction malformedInputAction, 
> CodingErrorAction unmappableCharacterAction)
> +            throws CharacterCodingException {
>          if (isNull()) {
>              return null;
>          } else if (end - start == 0) {
>              return "";
>          }
> -        return StringCache.toString(this);
> +        return StringCache.toString(this, malformedInputAction, 
> unmappableCharacterAction);
>      }
>
>
> +    /**
> +     * Converts the current content of the byte buffer to a String using the 
> configured character set.
> +     *
> +     * @return The result of converting the bytes to a String
> +     *
> +     * @deprecated Unused. This method will be removed in Tomcat 11 onwards.
> +     */
> +    @Deprecated
>      public String toStringInternal() {
> +        try {
> +            return toStringInternal(CodingErrorAction.REPLACE, 
> CodingErrorAction.REPLACE);
> +        } catch (CharacterCodingException e) {
> +            // Unreachable code. Use of REPLACE above means the exception 
> will never be thrown.
> +            throw new IllegalStateException(e);
> +        }
> +    }
> +
> +
> +    /**
> +     * Converts the current content of the byte buffer to a String using the 
> configured character set.
> +     *
> +     * @param malformedInputAction      Action to take if the input is 
> malformed
> +     * @param unmappableCharacterAction Action to take if a byte sequence 
> can't be mapped to a character
> +     *
> +     * @return The result of converting the bytes to a String
> +     *
> +     * @throws CharacterCodingException If an error occurs during the 
> conversion
> +     */
> +    public String toStringInternal(CodingErrorAction malformedInputAction, 
> CodingErrorAction unmappableCharacterAction)
> +            throws CharacterCodingException {
>          if (charset == null) {
>              charset = DEFAULT_CHARSET;
>          }
>          // new String(byte[], int, int, Charset) takes a defensive copy of 
> the
>          // entire byte array. This is expensive if only a small subset of the
>          // bytes will be used. The code below is from Apache Harmony.
> -        CharBuffer cb = charset.decode(ByteBuffer.wrap(buff, start, end - 
> start));
> +        CharBuffer cb = 
> charset.newDecoder().onMalformedInput(malformedInputAction)
> +                
> .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff,
>  start, end - start));

Looking at the code, this is not equivalent, like charset.decode uses
thread locals and so on. I will make a change so that charset.decode
is used if is REPLACE REPLACE, if you don't mind. I'm pretty sure
benching would show no performance difference though.

Rémy

>          return new String(cb.array(), cb.arrayOffset(), cb.length());
>      }
>
> diff --git a/java/org/apache/tomcat/util/buf/StringCache.java 
> b/java/org/apache/tomcat/util/buf/StringCache.java
> index d39de93cfa..5b82e44f74 100644
> --- a/java/org/apache/tomcat/util/buf/StringCache.java
> +++ b/java/org/apache/tomcat/util/buf/StringCache.java
> @@ -16,10 +16,13 @@
>   */
>  package org.apache.tomcat.util.buf;
>
> +import java.nio.charset.CharacterCodingException;
>  import java.nio.charset.Charset;
> +import java.nio.charset.CodingErrorAction;
>  import java.util.ArrayList;
>  import java.util.HashMap;
>  import java.util.Map.Entry;
> +import java.util.Objects;
>  import java.util.TreeMap;
>
>  import org.apache.juli.logging.Log;
> @@ -208,11 +211,22 @@ public class StringCache {
>
>
>      public static String toString(ByteChunk bc) {
> +        try {
> +            return toString(bc, CodingErrorAction.REPLACE, 
> CodingErrorAction.REPLACE);
> +        } catch (CharacterCodingException e) {
> +            // Unreachable code. Use of REPLACE above means the exception 
> will never be thrown.
> +            throw new IllegalStateException(e);
> +        }
> +    }
> +
> +
> +    public static String toString(ByteChunk bc, CodingErrorAction 
> malformedInputAction,
> +            CodingErrorAction unmappableCharacterAction) throws 
> CharacterCodingException {
>
>          // If the cache is null, then either caching is disabled, or we're
>          // still training
>          if (bcCache == null) {
> -            String value = bc.toStringInternal();
> +            String value = bc.toStringInternal(malformedInputAction, 
> unmappableCharacterAction);
>              if (byteEnabled && (value.length() < maxStringSize)) {
>                  // If training, everything is synced
>                  synchronized (bcStats) {
> @@ -285,6 +299,8 @@ public class StringCache {
>                              System.arraycopy(bc.getBuffer(), start, 
> entry.name, 0, end - start);
>                              // Set encoding
>                              entry.charset = bc.getCharset();
> +                            entry.malformedInputAction = 
> malformedInputAction;
> +                            entry.unmappableCharacterAction = 
> unmappableCharacterAction;
>                              // Initialize occurrence count to one
>                              count = new int[1];
>                              count[0] = 1;
> @@ -300,9 +316,9 @@ public class StringCache {
>          } else {
>              accessCount++;
>              // Find the corresponding String
> -            String result = find(bc);
> +            String result = find(bc, malformedInputAction, 
> unmappableCharacterAction);
>              if (result == null) {
> -                return bc.toStringInternal();
> +                return bc.toStringInternal(malformedInputAction, 
> unmappableCharacterAction);
>              }
>              // Note: We don't care about safety for the stats
>              hitCount++;
> @@ -462,10 +478,31 @@ public class StringCache {
>       * @param name The name to find
>       *
>       * @return the corresponding value
> +     *
> +     * @deprecated Unused. Will be removed in Tomcat 11.
> +     *             Use {@link #find(ByteChunk, CodingErrorAction, 
> CodingErrorAction)}
>       */
> +    @Deprecated
>      protected static final String find(ByteChunk name) {
> +        return find(name, CodingErrorAction.REPLACE, 
> CodingErrorAction.REPLACE);
> +    }
> +
> +
> +    /**
> +     * Find an entry given its name in the cache and return the associated 
> String.
> +     *
> +     * @param name                      The name to find
> +     * @param malformedInputAction      Action to take if an malformed input 
> is encountered
> +     * @param unmappableCharacterAction Action to take if an unmappable 
> character is encountered
> +     *
> +     * @return the corresponding value
> +     */
> +    protected static final String find(ByteChunk name, CodingErrorAction 
> malformedInputAction,
> +        CodingErrorAction unmappableCharacterAction) {
>          int pos = findClosest(name, bcCache, bcCache.length);
> -        if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || 
> !(name.getCharset().equals(bcCache[pos].charset))) {
> +        if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || 
> !(name.getCharset().equals(bcCache[pos].charset)) ||
> +                
> !malformedInputAction.equals(bcCache[pos].malformedInputAction) ||
> +                
> !unmappableCharacterAction.equals(bcCache[pos].unmappableCharacterAction)) {
>              return null;
>          } else {
>              return bcCache[pos].value;
> @@ -631,11 +668,12 @@ public class StringCache {
>
>      // -------------------------------------------------- ByteEntry Inner 
> Class
>
> -
>      private static class ByteEntry {
>
>          private byte[] name = null;
>          private Charset charset = null;
> +        private CodingErrorAction malformedInputAction = null;
> +        private CodingErrorAction unmappableCharacterAction = null;
>          private String value = null;
>
>          @Override
> @@ -645,17 +683,25 @@ public class StringCache {
>
>          @Override
>          public int hashCode() {
> -            return value.hashCode();
> +            return Objects.hash(malformedInputAction, 
> unmappableCharacterAction, value);
>          }
>
>          @Override
>          public boolean equals(Object obj) {
> -            if (obj instanceof ByteEntry) {
> -                return value.equals(((ByteEntry) obj).value);
> +            if (this == obj) {
> +                return true;
>              }
> -            return false;
> +            if (obj == null) {
> +                return false;
> +            }
> +            if (getClass() != obj.getClass()) {
> +                return false;
> +            }
> +            ByteEntry other = (ByteEntry) obj;
> +            return Objects.equals(malformedInputAction, 
> other.malformedInputAction) &&
> +                    Objects.equals(unmappableCharacterAction, 
> other.unmappableCharacterAction) &&
> +                    Objects.equals(value, other.value);
>          }
> -
>      }
>
>
> diff --git a/test/org/apache/jasper/compiler/TestGenerator.java 
> b/test/org/apache/jasper/compiler/TestGenerator.java
> index 8cfe73f54c..b854a6010b 100644
> --- a/test/org/apache/jasper/compiler/TestGenerator.java
> +++ b/test/org/apache/jasper/compiler/TestGenerator.java
> @@ -22,6 +22,7 @@ import java.beans.PropertyDescriptor;
>  import java.beans.PropertyEditorSupport;
>  import java.io.File;
>  import java.io.IOException;
> +import java.nio.charset.CodingErrorAction;
>  import java.util.Date;
>  import java.util.Scanner;
>
> @@ -211,7 +212,7 @@ public class TestGenerator extends TomcatBaseTest {
>          ByteChunk bc = new ByteChunk();
>          int rc = getUrl("http://localhost:"; + getPort() + 
> "/test/bug5nnnn/bug56529.jsp", bc, null);
>          Assert.assertEquals(HttpServletResponse.SC_OK, rc);
> -        String response = bc.toStringInternal();
> +        String response = bc.toStringInternal(CodingErrorAction.REPORT, 
> CodingErrorAction.REPORT);
>          Assert.assertTrue(response, response.contains("[1:attribute1: '', 
> attribute2: '']"));
>          Assert.assertTrue(response, response.contains("[2:attribute1: '', 
> attribute2: '']"));
>      }
> diff --git a/test/org/apache/tomcat/util/buf/TestStringCache.java 
> b/test/org/apache/tomcat/util/buf/TestStringCache.java
> new file mode 100644
> index 0000000000..2345d2de76
> --- /dev/null
> +++ b/test/org/apache/tomcat/util/buf/TestStringCache.java
> @@ -0,0 +1,100 @@
> +/*
> + *  Licensed to the Apache Software Foundation (ASF) under one or more
> + *  contributor license agreements.  See the NOTICE file distributed with
> + *  this work for additional information regarding copyright ownership.
> + *  The ASF licenses this file to You under the Apache License, Version 2.0
> + *  (the "License"); you may not use this file except in compliance with
> + *  the License.  You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + *  Unless required by applicable law or agreed to in writing, software
> + *  distributed under the License is distributed on an "AS IS" BASIS,
> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + *  See the License for the specific language governing permissions and
> + *  limitations under the License.
> + */
> +package org.apache.tomcat.util.buf;
> +
> +import java.nio.charset.CharacterCodingException;
> +import java.nio.charset.CodingErrorAction;
> +import java.nio.charset.StandardCharsets;
> +
> +import org.junit.Assert;
> +import org.junit.Test;
> +
> +public class TestStringCache {
> +
> +    private static final byte[] BYTES_VALID = new byte[] { 65, 66, 67, 68};
> +    private static final byte[] BYTES_INVALID = new byte[] {65, 66, -1, 67, 
> 68};
> +
> +    private static final ByteChunk INPUT_VALID = new ByteChunk();
> +    private static final ByteChunk INPUT_INVALID = new ByteChunk();
> +
> +    private static final CodingErrorAction[] actions =
> +            new CodingErrorAction[] { CodingErrorAction.IGNORE, 
> CodingErrorAction.REPLACE, CodingErrorAction.REPORT };
> +
> +    static {
> +        INPUT_VALID.setBytes(BYTES_VALID, 0, BYTES_VALID.length);
> +        INPUT_VALID.setCharset(StandardCharsets.UTF_8);
> +        INPUT_INVALID.setBytes(BYTES_INVALID, 0, BYTES_INVALID.length);
> +        INPUT_INVALID.setCharset(StandardCharsets.UTF_8);
> +    }
> +
> +
> +    @Test
> +    public void testCodingErrorLookup() {
> +
> +        System.setProperty("tomcat.util.buf.StringCache.byte.enabled", 
> "true");
> +
> +        Assert.assertTrue(StringCache.byteEnabled);
> +        StringCache sc = new StringCache();
> +        sc.reset();
> +
> +        for (int i = 0; i < StringCache.trainThreshold * 2; i++) {
> +            for (CodingErrorAction malformedInputAction : actions) {
> +                try {
> +                    // UTF-8 doesn't have any unmappable characters
> +                    INPUT_VALID.toString(malformedInputAction, 
> CodingErrorAction.IGNORE);
> +                    INPUT_INVALID.toString(malformedInputAction, 
> CodingErrorAction.IGNORE);
> +                } catch (CharacterCodingException e) {
> +                    // Ignore
> +                }
> +            }
> +        }
> +
> +        Assert.assertNotNull(StringCache.bcCache);
> +
> +        // Check the valid input is cached correctly
> +        for (CodingErrorAction malformedInputAction : actions) {
> +            try {
> +                Assert.assertEquals("ABCD", 
> INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE));
> +            } catch (CharacterCodingException e) {
> +                // Should not happen for valid input
> +                Assert.fail();
> +            }
> +        }
> +
> +        // Check the valid input is cached correctly
> +        try {
> +            Assert.assertEquals("ABCD", 
> INPUT_INVALID.toString(CodingErrorAction.IGNORE, CodingErrorAction.IGNORE));
> +        } catch (CharacterCodingException e) {
> +            // Should not happen for invalid input with IGNORE
> +            Assert.fail();
> +        }
> +        try {
> +            Assert.assertEquals("AB\ufffdCD", 
> INPUT_INVALID.toString(CodingErrorAction.REPLACE, CodingErrorAction.IGNORE));
> +        } catch (CharacterCodingException e) {
> +            // Should not happen for invalid input with REPLACE
> +            Assert.fail();
> +        }
> +        try {
> +            Assert.assertEquals("ABCD", 
> INPUT_INVALID.toString(CodingErrorAction.REPORT, CodingErrorAction.IGNORE));
> +            // Should throw exception
> +            Assert.fail();
> +        } catch (CharacterCodingException e) {
> +            // Ignore
> +        }
> +
> +    }
> +}
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
> For additional commands, e-mail: dev-h...@tomcat.apache.org
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to