On Thu, Jun 22, 2023 at 8:55 PM <ma...@apache.org> wrote: > > This is an automated email from the ASF dual-hosted git repository. > > markt pushed a commit to branch main > in repository https://gitbox.apache.org/repos/asf/tomcat.git > > > The following commit(s) were added to refs/heads/main by this push: > new 944951302e Add control of byte decoding errors to ByteChunk and > StringCache > 944951302e is described below > > commit 944951302e2f478879411dbff353f5818ad44121 > Author: Mark Thomas <ma...@apache.org> > AuthorDate: Wed Jun 14 12:25:21 2023 +0100 > > Add control of byte decoding errors to ByteChunk and StringCache > --- > java/org/apache/tomcat/util/buf/ByteChunk.java | 47 +++++++++- > java/org/apache/tomcat/util/buf/StringCache.java | 66 +++++++++++--- > test/org/apache/jasper/compiler/TestGenerator.java | 3 +- > .../apache/tomcat/util/buf/TestStringCache.java | 100 > +++++++++++++++++++++ > 4 files changed, 203 insertions(+), 13 deletions(-) > > diff --git a/java/org/apache/tomcat/util/buf/ByteChunk.java > b/java/org/apache/tomcat/util/buf/ByteChunk.java > index 101f9c0eaa..ff00f55774 100644 > --- a/java/org/apache/tomcat/util/buf/ByteChunk.java > +++ b/java/org/apache/tomcat/util/buf/ByteChunk.java > @@ -21,7 +21,9 @@ import java.io.ObjectInputStream; > import java.io.ObjectOutputStream; > import java.nio.ByteBuffer; > import java.nio.CharBuffer; > +import java.nio.charset.CharacterCodingException; > import java.nio.charset.Charset; > +import java.nio.charset.CodingErrorAction; > import java.nio.charset.StandardCharsets; > > /* > @@ -521,23 +523,64 @@ public final class ByteChunk extends AbstractChunk { > > @Override > public String toString() { > + try { > + return toString(CodingErrorAction.REPLACE, > CodingErrorAction.REPLACE); > + } catch (CharacterCodingException e) { > + // Unreachable code. Use of REPLACE above means the exception > will never be thrown. > + throw new IllegalStateException(e); > + } > + } > + > + > + public String toString(CodingErrorAction malformedInputAction, > CodingErrorAction unmappableCharacterAction) > + throws CharacterCodingException { > if (isNull()) { > return null; > } else if (end - start == 0) { > return ""; > } > - return StringCache.toString(this); > + return StringCache.toString(this, malformedInputAction, > unmappableCharacterAction); > } > > > + /** > + * Converts the current content of the byte buffer to a String using the > configured character set. > + * > + * @return The result of converting the bytes to a String > + * > + * @deprecated Unused. This method will be removed in Tomcat 11 onwards. > + */ > + @Deprecated > public String toStringInternal() { > + try { > + return toStringInternal(CodingErrorAction.REPLACE, > CodingErrorAction.REPLACE); > + } catch (CharacterCodingException e) { > + // Unreachable code. Use of REPLACE above means the exception > will never be thrown. > + throw new IllegalStateException(e); > + } > + } > + > + > + /** > + * Converts the current content of the byte buffer to a String using the > configured character set. > + * > + * @param malformedInputAction Action to take if the input is > malformed > + * @param unmappableCharacterAction Action to take if a byte sequence > can't be mapped to a character > + * > + * @return The result of converting the bytes to a String > + * > + * @throws CharacterCodingException If an error occurs during the > conversion > + */ > + public String toStringInternal(CodingErrorAction malformedInputAction, > CodingErrorAction unmappableCharacterAction) > + throws CharacterCodingException { > if (charset == null) { > charset = DEFAULT_CHARSET; > } > // new String(byte[], int, int, Charset) takes a defensive copy of > the > // entire byte array. This is expensive if only a small subset of the > // bytes will be used. The code below is from Apache Harmony. > - CharBuffer cb = charset.decode(ByteBuffer.wrap(buff, start, end - > start)); > + CharBuffer cb = > charset.newDecoder().onMalformedInput(malformedInputAction) > + > .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff, > start, end - start));
Looking at the code, this is not equivalent, like charset.decode uses thread locals and so on. I will make a change so that charset.decode is used if is REPLACE REPLACE, if you don't mind. I'm pretty sure benching would show no performance difference though. Rémy > return new String(cb.array(), cb.arrayOffset(), cb.length()); > } > > diff --git a/java/org/apache/tomcat/util/buf/StringCache.java > b/java/org/apache/tomcat/util/buf/StringCache.java > index d39de93cfa..5b82e44f74 100644 > --- a/java/org/apache/tomcat/util/buf/StringCache.java > +++ b/java/org/apache/tomcat/util/buf/StringCache.java > @@ -16,10 +16,13 @@ > */ > package org.apache.tomcat.util.buf; > > +import java.nio.charset.CharacterCodingException; > import java.nio.charset.Charset; > +import java.nio.charset.CodingErrorAction; > import java.util.ArrayList; > import java.util.HashMap; > import java.util.Map.Entry; > +import java.util.Objects; > import java.util.TreeMap; > > import org.apache.juli.logging.Log; > @@ -208,11 +211,22 @@ public class StringCache { > > > public static String toString(ByteChunk bc) { > + try { > + return toString(bc, CodingErrorAction.REPLACE, > CodingErrorAction.REPLACE); > + } catch (CharacterCodingException e) { > + // Unreachable code. Use of REPLACE above means the exception > will never be thrown. > + throw new IllegalStateException(e); > + } > + } > + > + > + public static String toString(ByteChunk bc, CodingErrorAction > malformedInputAction, > + CodingErrorAction unmappableCharacterAction) throws > CharacterCodingException { > > // If the cache is null, then either caching is disabled, or we're > // still training > if (bcCache == null) { > - String value = bc.toStringInternal(); > + String value = bc.toStringInternal(malformedInputAction, > unmappableCharacterAction); > if (byteEnabled && (value.length() < maxStringSize)) { > // If training, everything is synced > synchronized (bcStats) { > @@ -285,6 +299,8 @@ public class StringCache { > System.arraycopy(bc.getBuffer(), start, > entry.name, 0, end - start); > // Set encoding > entry.charset = bc.getCharset(); > + entry.malformedInputAction = > malformedInputAction; > + entry.unmappableCharacterAction = > unmappableCharacterAction; > // Initialize occurrence count to one > count = new int[1]; > count[0] = 1; > @@ -300,9 +316,9 @@ public class StringCache { > } else { > accessCount++; > // Find the corresponding String > - String result = find(bc); > + String result = find(bc, malformedInputAction, > unmappableCharacterAction); > if (result == null) { > - return bc.toStringInternal(); > + return bc.toStringInternal(malformedInputAction, > unmappableCharacterAction); > } > // Note: We don't care about safety for the stats > hitCount++; > @@ -462,10 +478,31 @@ public class StringCache { > * @param name The name to find > * > * @return the corresponding value > + * > + * @deprecated Unused. Will be removed in Tomcat 11. > + * Use {@link #find(ByteChunk, CodingErrorAction, > CodingErrorAction)} > */ > + @Deprecated > protected static final String find(ByteChunk name) { > + return find(name, CodingErrorAction.REPLACE, > CodingErrorAction.REPLACE); > + } > + > + > + /** > + * Find an entry given its name in the cache and return the associated > String. > + * > + * @param name The name to find > + * @param malformedInputAction Action to take if an malformed input > is encountered > + * @param unmappableCharacterAction Action to take if an unmappable > character is encountered > + * > + * @return the corresponding value > + */ > + protected static final String find(ByteChunk name, CodingErrorAction > malformedInputAction, > + CodingErrorAction unmappableCharacterAction) { > int pos = findClosest(name, bcCache, bcCache.length); > - if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || > !(name.getCharset().equals(bcCache[pos].charset))) { > + if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) || > !(name.getCharset().equals(bcCache[pos].charset)) || > + > !malformedInputAction.equals(bcCache[pos].malformedInputAction) || > + > !unmappableCharacterAction.equals(bcCache[pos].unmappableCharacterAction)) { > return null; > } else { > return bcCache[pos].value; > @@ -631,11 +668,12 @@ public class StringCache { > > // -------------------------------------------------- ByteEntry Inner > Class > > - > private static class ByteEntry { > > private byte[] name = null; > private Charset charset = null; > + private CodingErrorAction malformedInputAction = null; > + private CodingErrorAction unmappableCharacterAction = null; > private String value = null; > > @Override > @@ -645,17 +683,25 @@ public class StringCache { > > @Override > public int hashCode() { > - return value.hashCode(); > + return Objects.hash(malformedInputAction, > unmappableCharacterAction, value); > } > > @Override > public boolean equals(Object obj) { > - if (obj instanceof ByteEntry) { > - return value.equals(((ByteEntry) obj).value); > + if (this == obj) { > + return true; > } > - return false; > + if (obj == null) { > + return false; > + } > + if (getClass() != obj.getClass()) { > + return false; > + } > + ByteEntry other = (ByteEntry) obj; > + return Objects.equals(malformedInputAction, > other.malformedInputAction) && > + Objects.equals(unmappableCharacterAction, > other.unmappableCharacterAction) && > + Objects.equals(value, other.value); > } > - > } > > > diff --git a/test/org/apache/jasper/compiler/TestGenerator.java > b/test/org/apache/jasper/compiler/TestGenerator.java > index 8cfe73f54c..b854a6010b 100644 > --- a/test/org/apache/jasper/compiler/TestGenerator.java > +++ b/test/org/apache/jasper/compiler/TestGenerator.java > @@ -22,6 +22,7 @@ import java.beans.PropertyDescriptor; > import java.beans.PropertyEditorSupport; > import java.io.File; > import java.io.IOException; > +import java.nio.charset.CodingErrorAction; > import java.util.Date; > import java.util.Scanner; > > @@ -211,7 +212,7 @@ public class TestGenerator extends TomcatBaseTest { > ByteChunk bc = new ByteChunk(); > int rc = getUrl("http://localhost:" + getPort() + > "/test/bug5nnnn/bug56529.jsp", bc, null); > Assert.assertEquals(HttpServletResponse.SC_OK, rc); > - String response = bc.toStringInternal(); > + String response = bc.toStringInternal(CodingErrorAction.REPORT, > CodingErrorAction.REPORT); > Assert.assertTrue(response, response.contains("[1:attribute1: '', > attribute2: '']")); > Assert.assertTrue(response, response.contains("[2:attribute1: '', > attribute2: '']")); > } > diff --git a/test/org/apache/tomcat/util/buf/TestStringCache.java > b/test/org/apache/tomcat/util/buf/TestStringCache.java > new file mode 100644 > index 0000000000..2345d2de76 > --- /dev/null > +++ b/test/org/apache/tomcat/util/buf/TestStringCache.java > @@ -0,0 +1,100 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > +package org.apache.tomcat.util.buf; > + > +import java.nio.charset.CharacterCodingException; > +import java.nio.charset.CodingErrorAction; > +import java.nio.charset.StandardCharsets; > + > +import org.junit.Assert; > +import org.junit.Test; > + > +public class TestStringCache { > + > + private static final byte[] BYTES_VALID = new byte[] { 65, 66, 67, 68}; > + private static final byte[] BYTES_INVALID = new byte[] {65, 66, -1, 67, > 68}; > + > + private static final ByteChunk INPUT_VALID = new ByteChunk(); > + private static final ByteChunk INPUT_INVALID = new ByteChunk(); > + > + private static final CodingErrorAction[] actions = > + new CodingErrorAction[] { CodingErrorAction.IGNORE, > CodingErrorAction.REPLACE, CodingErrorAction.REPORT }; > + > + static { > + INPUT_VALID.setBytes(BYTES_VALID, 0, BYTES_VALID.length); > + INPUT_VALID.setCharset(StandardCharsets.UTF_8); > + INPUT_INVALID.setBytes(BYTES_INVALID, 0, BYTES_INVALID.length); > + INPUT_INVALID.setCharset(StandardCharsets.UTF_8); > + } > + > + > + @Test > + public void testCodingErrorLookup() { > + > + System.setProperty("tomcat.util.buf.StringCache.byte.enabled", > "true"); > + > + Assert.assertTrue(StringCache.byteEnabled); > + StringCache sc = new StringCache(); > + sc.reset(); > + > + for (int i = 0; i < StringCache.trainThreshold * 2; i++) { > + for (CodingErrorAction malformedInputAction : actions) { > + try { > + // UTF-8 doesn't have any unmappable characters > + INPUT_VALID.toString(malformedInputAction, > CodingErrorAction.IGNORE); > + INPUT_INVALID.toString(malformedInputAction, > CodingErrorAction.IGNORE); > + } catch (CharacterCodingException e) { > + // Ignore > + } > + } > + } > + > + Assert.assertNotNull(StringCache.bcCache); > + > + // Check the valid input is cached correctly > + for (CodingErrorAction malformedInputAction : actions) { > + try { > + Assert.assertEquals("ABCD", > INPUT_VALID.toString(malformedInputAction, CodingErrorAction.IGNORE)); > + } catch (CharacterCodingException e) { > + // Should not happen for valid input > + Assert.fail(); > + } > + } > + > + // Check the valid input is cached correctly > + try { > + Assert.assertEquals("ABCD", > INPUT_INVALID.toString(CodingErrorAction.IGNORE, CodingErrorAction.IGNORE)); > + } catch (CharacterCodingException e) { > + // Should not happen for invalid input with IGNORE > + Assert.fail(); > + } > + try { > + Assert.assertEquals("AB\ufffdCD", > INPUT_INVALID.toString(CodingErrorAction.REPLACE, CodingErrorAction.IGNORE)); > + } catch (CharacterCodingException e) { > + // Should not happen for invalid input with REPLACE > + Assert.fail(); > + } > + try { > + Assert.assertEquals("ABCD", > INPUT_INVALID.toString(CodingErrorAction.REPORT, CodingErrorAction.IGNORE)); > + // Should throw exception > + Assert.fail(); > + } catch (CharacterCodingException e) { > + // Ignore > + } > + > + } > +} > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org > For additional commands, e-mail: dev-h...@tomcat.apache.org > --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org