http://git-wip-us.apache.org/repos/asf/struts/blob/76f18840/core/src/main/java/org/apache/struts2/util/tomcat/buf/MessageBytes.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/struts2/util/tomcat/buf/MessageBytes.java b/core/src/main/java/org/apache/struts2/util/tomcat/buf/MessageBytes.java new file mode 100644 index 0000000..df07284 --- /dev/null +++ b/core/src/main/java/org/apache/struts2/util/tomcat/buf/MessageBytes.java @@ -0,0 +1,546 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.struts2.util.tomcat.buf; + +import java.io.IOException; +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.Locale; + +/** + * This class is used to represent a subarray of bytes in an HTTP message. + * It represents all request/response elements. The byte/char conversions are + * delayed and cached. Everything is recyclable. + * + * The object can represent a byte[], a char[], or a (sub) String. All + * operations can be made in case sensitive mode or not. + * + * @author d...@eng.sun.com + * @author James Todd [go...@eng.sun.com] + * @author Costin Manolache + */ +public final class MessageBytes implements Cloneable, Serializable { + private static final long serialVersionUID = 1L; + + // primary type ( whatever is set as original value ) + private int type = T_NULL; + + public static final int T_NULL = 0; + /** getType() is T_STR if the the object used to create the MessageBytes + was a String */ + public static final int T_STR = 1; + /** getType() is T_STR if the the object used to create the MessageBytes + was a byte[] */ + public static final int T_BYTES = 2; + /** getType() is T_STR if the the object used to create the MessageBytes + was a char[] */ + public static final int T_CHARS = 3; + + private int hashCode=0; + // did we compute the hashcode ? + private boolean hasHashCode=false; + + // Internal objects to represent array + offset, and specific methods + private final ByteChunk byteC=new ByteChunk(); + private final CharChunk charC=new CharChunk(); + + // String + private String strValue; + // true if a String value was computed. Probably not needed, + // strValue!=null is the same + private boolean hasStrValue=false; + + /** + * Creates a new, uninitialized MessageBytes object. + * Use static newInstance() in order to allow + * future hooks. + */ + private MessageBytes() { + } + + /** Construct a new MessageBytes instance + */ + public static MessageBytes newInstance() { + return factory.newInstance(); + } + + public boolean isNull() { + // should we check also hasStrValue ??? + return byteC.isNull() && charC.isNull() && ! hasStrValue; + // bytes==null && strValue==null; + } + + /** + * Resets the message bytes to an uninitialized (NULL) state. + */ + public void recycle() { + type=T_NULL; + byteC.recycle(); + charC.recycle(); + + strValue=null; + + hasStrValue=false; + hasHashCode=false; + hasLongValue=false; + } + + + /** + * Sets the content to the specified subarray of bytes. + * + * @param b the bytes + * @param off the start offset of the bytes + * @param len the length of the bytes + */ + public void setBytes(byte[] b, int off, int len) { + byteC.setBytes( b, off, len ); + type=T_BYTES; + hasStrValue=false; + hasHashCode=false; + hasLongValue=false; + } + + /** + * Sets the content to be a char[] + * + * @param c the bytes + * @param off the start offset of the bytes + * @param len the length of the bytes + */ + public void setChars( char[] c, int off, int len ) { + charC.setChars( c, off, len ); + type=T_CHARS; + hasStrValue=false; + hasHashCode=false; + hasLongValue=false; + } + + /** + * Set the content to be a string + */ + public void setString( String s ) { + strValue=s; + hasHashCode=false; + hasLongValue=false; + if (s == null) { + hasStrValue=false; + type=T_NULL; + } else { + hasStrValue=true; + type=T_STR; + } + } + + // -------------------- Conversion and getters -------------------- + + /** Compute the string value + */ + @Override + public String toString() { + if( hasStrValue ) { + return strValue; + } + + switch (type) { + case T_CHARS: + strValue=charC.toString(); + hasStrValue=true; + return strValue; + case T_BYTES: + strValue=byteC.toString(); + hasStrValue=true; + return strValue; + } + return null; + } + + //---------------------------------------- + /** Return the type of the original content. Can be + * T_STR, T_BYTES, T_CHARS or T_NULL + */ + public int getType() { + return type; + } + + /** + * Returns the byte chunk, representing the byte[] and offset/length. + * Valid only if T_BYTES or after a conversion was made. + */ + public ByteChunk getByteChunk() { + return byteC; + } + + /** + * Returns the char chunk, representing the char[] and offset/length. + * Valid only if T_CHARS or after a conversion was made. + */ + public CharChunk getCharChunk() { + return charC; + } + + /** + * Returns the string value. + * Valid only if T_STR or after a conversion was made. + */ + public String getString() { + return strValue; + } + + /** + * Get the Charset used for string<->byte conversions. + */ + public Charset getCharset() { + return byteC.getCharset(); + } + + /** + * Set the Charset used for string<->byte conversions. + */ + public void setCharset(Charset charset) { + byteC.setCharset(charset); + } + + /** Do a char->byte conversion. + */ + public void toBytes() { + if (!byteC.isNull()) { + type=T_BYTES; + return; + } + toString(); + type=T_BYTES; + Charset charset = byteC.getCharset(); + ByteBuffer result = charset.encode(strValue); + byteC.setBytes(result.array(), result.arrayOffset(), result.limit()); + } + + /** Convert to char[] and fill the CharChunk. + * XXX Not optimized - it converts to String first. + */ + public void toChars() { + if( ! charC.isNull() ) { + type=T_CHARS; + return; + } + // inefficient + toString(); + type=T_CHARS; + char cc[]=strValue.toCharArray(); + charC.setChars(cc, 0, cc.length); + } + + + /** + * Returns the length of the original buffer. + * Note that the length in bytes may be different from the length + * in chars. + */ + public int getLength() { + if(type==T_BYTES) { + return byteC.getLength(); + } + if(type==T_CHARS) { + return charC.getLength(); + } + if(type==T_STR) { + return strValue.length(); + } + toString(); + if( strValue==null ) { + return 0; + } + return strValue.length(); + } + + // -------------------- equals -------------------- + + /** + * Compares the message bytes to the specified String object. + * @param s the String to compare + * @return true if the comparison succeeded, false otherwise + */ + public boolean equals(String s) { + switch (type) { + case T_STR: + if (strValue == null) { + return s == null; + } + return strValue.equals( s ); + case T_CHARS: + return charC.equals( s ); + case T_BYTES: + return byteC.equals( s ); + default: + return false; + } + } + + /** + * Compares the message bytes to the specified String object. + * @param s the String to compare + * @return true if the comparison succeeded, false otherwise + */ + public boolean equalsIgnoreCase(String s) { + switch (type) { + case T_STR: + if (strValue == null) { + return s == null; + } + return strValue.equalsIgnoreCase( s ); + case T_CHARS: + return charC.equalsIgnoreCase( s ); + case T_BYTES: + return byteC.equalsIgnoreCase( s ); + default: + return false; + } + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof MessageBytes) { + return equals((MessageBytes) obj); + } + return false; + } + + public boolean equals(MessageBytes mb) { + switch (type) { + case T_STR: + return mb.equals( strValue ); + } + + if( mb.type != T_CHARS && + mb.type!= T_BYTES ) { + // it's a string or int/date string value + return equals( mb.toString() ); + } + + // mb is either CHARS or BYTES. + // this is either CHARS or BYTES + // Deal with the 4 cases ( in fact 3, one is symmetric) + + if( mb.type == T_CHARS && type==T_CHARS ) { + return charC.equals( mb.charC ); + } + if( mb.type==T_BYTES && type== T_BYTES ) { + return byteC.equals( mb.byteC ); + } + if( mb.type== T_CHARS && type== T_BYTES ) { + return byteC.equals( mb.charC ); + } + if( mb.type== T_BYTES && type== T_CHARS ) { + return mb.byteC.equals( charC ); + } + // can't happen + return true; + } + + + /** + * Returns true if the message bytes starts with the specified string. + * @param s the string + * @param pos The start position + */ + public boolean startsWithIgnoreCase(String s, int pos) { + switch (type) { + case T_STR: + if( strValue==null ) { + return false; + } + if( strValue.length() < pos + s.length() ) { + return false; + } + + for( int i=0; i<s.length(); i++ ) { + if( Ascii.toLower( s.charAt( i ) ) != + Ascii.toLower( strValue.charAt( pos + i ))) { + return false; + } + } + return true; + case T_CHARS: + return charC.startsWithIgnoreCase( s, pos ); + case T_BYTES: + return byteC.startsWithIgnoreCase( s, pos ); + default: + return false; + } + } + + + // -------------------- Hash code -------------------- + @Override + public int hashCode() { + if( hasHashCode ) { + return hashCode; + } + int code = 0; + + code=hash(); + hashCode=code; + hasHashCode=true; + return code; + } + + // normal hash. + private int hash() { + int code=0; + switch (type) { + case T_STR: + // We need to use the same hash function + for (int i = 0; i < strValue.length(); i++) { + code = code * 37 + strValue.charAt( i ); + } + return code; + case T_CHARS: + return charC.hash(); + case T_BYTES: + return byteC.hash(); + default: + return 0; + } + } + + // Inefficient initial implementation. Will be replaced on the next + // round of tune-up + public int indexOf(String s, int starting) { + toString(); + return strValue.indexOf( s, starting ); + } + + // Inefficient initial implementation. Will be replaced on the next + // round of tune-up + public int indexOf(String s) { + return indexOf( s, 0 ); + } + + public int indexOfIgnoreCase(String s, int starting) { + toString(); + String upper=strValue.toUpperCase(Locale.ENGLISH); + String sU=s.toUpperCase(Locale.ENGLISH); + return upper.indexOf( sU, starting ); + } + + /** Copy the src into this MessageBytes, allocating more space if + * needed + */ + public void duplicate( MessageBytes src ) throws IOException + { + switch( src.getType() ) { + case MessageBytes.T_BYTES: + type=T_BYTES; + ByteChunk bc=src.getByteChunk(); + byteC.allocate( 2 * bc.getLength(), -1 ); + byteC.append( bc ); + break; + case MessageBytes.T_CHARS: + type=T_CHARS; + CharChunk cc=src.getCharChunk(); + charC.allocate( 2 * cc.getLength(), -1 ); + charC.append( cc ); + break; + case MessageBytes.T_STR: + type=T_STR; + String sc=src.getString(); + this.setString( sc ); + break; + } + } + + // -------------------- Deprecated code -------------------- + // efficient long + // XXX used only for headers - shouldn't be stored here. + private long longValue; + private boolean hasLongValue=false; + + /** Set the buffer to the representation of an long + */ + public void setLong(long l) { + byteC.allocate(32, 64); + long current = l; + byte[] buf = byteC.getBuffer(); + int start = 0; + int end = 0; + if (l == 0) { + buf[end++] = (byte) '0'; + } + if (l < 0) { + current = -l; + buf[end++] = (byte) '-'; + } + while (current > 0) { + int digit = (int) (current % 10); + current = current / 10; + buf[end++] = HexUtils.getHex(digit); + } + byteC.setOffset(0); + byteC.setEnd(end); + // Inverting buffer + end--; + if (l < 0) { + start++; + } + while (end > start) { + byte temp = buf[start]; + buf[start] = buf[end]; + buf[end] = temp; + start++; + end--; + } + longValue=l; + hasStrValue=false; + hasHashCode=false; + hasLongValue=true; + type=T_BYTES; + } + + // Used for headers conversion + /** Convert the buffer to an long, cache the value + */ + public long getLong() { + if( hasLongValue ) { + return longValue; + } + + switch (type) { + case T_BYTES: + longValue=byteC.getLong(); + break; + default: + longValue= Long.parseLong(toString()); + } + + hasLongValue=true; + return longValue; + + } + + // -------------------- Future may be different -------------------- + + private static final MessageBytesFactory factory=new MessageBytesFactory(); + + private static class MessageBytesFactory { + protected MessageBytesFactory() { + } + public MessageBytes newInstance() { + return new MessageBytes(); + } + } +}
http://git-wip-us.apache.org/repos/asf/struts/blob/76f18840/core/src/main/java/org/apache/struts2/util/tomcat/buf/StringCache.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/struts2/util/tomcat/buf/StringCache.java b/core/src/main/java/org/apache/struts2/util/tomcat/buf/StringCache.java new file mode 100644 index 0000000..3a72d49 --- /dev/null +++ b/core/src/main/java/org/apache/struts2/util/tomcat/buf/StringCache.java @@ -0,0 +1,695 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.struts2.util.tomcat.buf; + +import com.opensymphony.xwork2.util.logging.Logger; +import com.opensymphony.xwork2.util.logging.LoggerFactory; + +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map.Entry; +import java.util.TreeMap; + +/** + * This class implements a String cache for ByteChunk and CharChunk. + * + * @author Remy Maucherat + */ +public class StringCache { + + + private static final Logger log = LoggerFactory.getLogger(StringCache.class); + + + // ------------------------------------------------------- Static Variables + + + /** + * Enabled ? + */ + protected static boolean byteEnabled = ("true".equals(System.getProperty( + "tomcat.util.buf.StringCache.byte.enabled", "false"))); + + + protected static boolean charEnabled = ("true".equals(System.getProperty( + "tomcat.util.buf.StringCache.char.enabled", "false"))); + + + protected static int trainThreshold = Integer.parseInt(System.getProperty( + "tomcat.util.buf.StringCache.trainThreshold", "20000")); + + + protected static int cacheSize = Integer.parseInt(System.getProperty( + "tomcat.util.buf.StringCache.cacheSize", "200")); + + + protected static final int maxStringSize = + Integer.parseInt(System.getProperty( + "tomcat.util.buf.StringCache.maxStringSize", "128")); + + + /** + * Statistics hash map for byte chunk. + */ + protected static final HashMap<ByteEntry,int[]> bcStats = + new HashMap<ByteEntry, int[]>(cacheSize); + + + /** + * toString count for byte chunk. + */ + protected static int bcCount = 0; + + + /** + * Cache for byte chunk. + */ + protected static ByteEntry[] bcCache = null; + + + /** + * Statistics hash map for char chunk. + */ + protected static final HashMap<CharEntry,int[]> ccStats = + new HashMap<CharEntry, int[]>(cacheSize); + + + /** + * toString count for char chunk. + */ + protected static int ccCount = 0; + + + /** + * Cache for char chunk. + */ + protected static CharEntry[] ccCache = null; + + + /** + * Access count. + */ + protected static int accessCount = 0; + + + /** + * Hit count. + */ + protected static int hitCount = 0; + + + // ------------------------------------------------------------ Properties + + + /** + * @return Returns the cacheSize. + */ + public int getCacheSize() { + return cacheSize; + } + + + /** + * @param cacheSize The cacheSize to set. + */ + public void setCacheSize(int cacheSize) { + StringCache.cacheSize = cacheSize; + } + + + /** + * @return Returns the enabled. + */ + public boolean getByteEnabled() { + return byteEnabled; + } + + + /** + * @param byteEnabled The enabled to set. + */ + public void setByteEnabled(boolean byteEnabled) { + StringCache.byteEnabled = byteEnabled; + } + + + /** + * @return Returns the enabled. + */ + public boolean getCharEnabled() { + return charEnabled; + } + + + /** + * @param charEnabled The enabled to set. + */ + public void setCharEnabled(boolean charEnabled) { + StringCache.charEnabled = charEnabled; + } + + + /** + * @return Returns the trainThreshold. + */ + public int getTrainThreshold() { + return trainThreshold; + } + + + /** + * @param trainThreshold The trainThreshold to set. + */ + public void setTrainThreshold(int trainThreshold) { + StringCache.trainThreshold = trainThreshold; + } + + + /** + * @return Returns the accessCount. + */ + public int getAccessCount() { + return accessCount; + } + + + /** + * @return Returns the hitCount. + */ + public int getHitCount() { + return hitCount; + } + + + // -------------------------------------------------- Public Static Methods + + + public void reset() { + hitCount = 0; + accessCount = 0; + synchronized (bcStats) { + bcCache = null; + bcCount = 0; + } + synchronized (ccStats) { + ccCache = null; + ccCount = 0; + } + } + + + public static String toString(ByteChunk bc) { + + // If the cache is null, then either caching is disabled, or we're + // still training + if (bcCache == null) { + String value = bc.toStringInternal(); + if (byteEnabled && (value.length() < maxStringSize)) { + // If training, everything is synced + synchronized (bcStats) { + // If the cache has been generated on a previous invocation + // while waiting for the lock, just return the toString + // value we just calculated + if (bcCache != null) { + return value; + } + // Two cases: either we just exceeded the train count, in + // which case the cache must be created, or we just update + // the count for the string + if (bcCount > trainThreshold) { + long t1 = System.currentTimeMillis(); + // Sort the entries according to occurrence + TreeMap<Integer,ArrayList<ByteEntry>> tempMap = + new TreeMap<Integer, ArrayList<ByteEntry>>(); + for (Entry<ByteEntry,int[]> item : bcStats.entrySet()) { + ByteEntry entry = item.getKey(); + int[] countA = item.getValue(); + Integer count = Integer.valueOf(countA[0]); + // Add to the list for that count + ArrayList<ByteEntry> list = tempMap.get(count); + if (list == null) { + // Create list + list = new ArrayList<ByteEntry>(); + tempMap.put(count, list); + } + list.add(entry); + } + // Allocate array of the right size + int size = bcStats.size(); + if (size > cacheSize) { + size = cacheSize; + } + ByteEntry[] tempbcCache = new ByteEntry[size]; + // Fill it up using an alphabetical order + // and a dumb insert sort + ByteChunk tempChunk = new ByteChunk(); + int n = 0; + while (n < size) { + Object key = tempMap.lastKey(); + ArrayList<ByteEntry> list = tempMap.get(key); + for (int i = 0; i < list.size() && n < size; i++) { + ByteEntry entry = list.get(i); + tempChunk.setBytes(entry.name, 0, + entry.name.length); + int insertPos = findClosest(tempChunk, + tempbcCache, n); + if (insertPos == n) { + tempbcCache[n + 1] = entry; + } else { + System.arraycopy(tempbcCache, insertPos + 1, + tempbcCache, insertPos + 2, + n - insertPos - 1); + tempbcCache[insertPos + 1] = entry; + } + n++; + } + tempMap.remove(key); + } + bcCount = 0; + bcStats.clear(); + bcCache = tempbcCache; + if (log.isDebugEnabled()) { + long t2 = System.currentTimeMillis(); + log.debug("ByteCache generation time: " + + (t2 - t1) + "ms"); + } + } else { + bcCount++; + // Allocate new ByteEntry for the lookup + ByteEntry entry = new ByteEntry(); + entry.value = value; + int[] count = bcStats.get(entry); + if (count == null) { + int end = bc.getEnd(); + int start = bc.getStart(); + // Create byte array and copy bytes + entry.name = new byte[bc.getLength()]; + System.arraycopy(bc.getBuffer(), start, entry.name, + 0, end - start); + // Set encoding + entry.charset = bc.getCharset(); + // Initialize occurrence count to one + count = new int[1]; + count[0] = 1; + // Set in the stats hash map + bcStats.put(entry, count); + } else { + count[0] = count[0] + 1; + } + } + } + } + return value; + } else { + accessCount++; + // Find the corresponding String + String result = find(bc); + if (result == null) { + return bc.toStringInternal(); + } + // Note: We don't care about safety for the stats + hitCount++; + return result; + } + + } + + + public static String toString(CharChunk cc) { + + // If the cache is null, then either caching is disabled, or we're + // still training + if (ccCache == null) { + String value = cc.toStringInternal(); + if (charEnabled && (value.length() < maxStringSize)) { + // If training, everything is synced + synchronized (ccStats) { + // If the cache has been generated on a previous invocation + // while waiting for the lock, just return the toString + // value we just calculated + if (ccCache != null) { + return value; + } + // Two cases: either we just exceeded the train count, in + // which case the cache must be created, or we just update + // the count for the string + if (ccCount > trainThreshold) { + long t1 = System.currentTimeMillis(); + // Sort the entries according to occurrence + TreeMap<Integer,ArrayList<CharEntry>> tempMap = + new TreeMap<Integer, ArrayList<CharEntry>>(); + for (Entry<CharEntry,int[]> item : ccStats.entrySet()) { + CharEntry entry = item.getKey(); + int[] countA = item.getValue(); + Integer count = Integer.valueOf(countA[0]); + // Add to the list for that count + ArrayList<CharEntry> list = tempMap.get(count); + if (list == null) { + // Create list + list = new ArrayList<CharEntry>(); + tempMap.put(count, list); + } + list.add(entry); + } + // Allocate array of the right size + int size = ccStats.size(); + if (size > cacheSize) { + size = cacheSize; + } + CharEntry[] tempccCache = new CharEntry[size]; + // Fill it up using an alphabetical order + // and a dumb insert sort + CharChunk tempChunk = new CharChunk(); + int n = 0; + while (n < size) { + Object key = tempMap.lastKey(); + ArrayList<CharEntry> list = tempMap.get(key); + for (int i = 0; i < list.size() && n < size; i++) { + CharEntry entry = list.get(i); + tempChunk.setChars(entry.name, 0, + entry.name.length); + int insertPos = findClosest(tempChunk, + tempccCache, n); + if (insertPos == n) { + tempccCache[n + 1] = entry; + } else { + System.arraycopy(tempccCache, insertPos + 1, + tempccCache, insertPos + 2, + n - insertPos - 1); + tempccCache[insertPos + 1] = entry; + } + n++; + } + tempMap.remove(key); + } + ccCount = 0; + ccStats.clear(); + ccCache = tempccCache; + if (log.isDebugEnabled()) { + long t2 = System.currentTimeMillis(); + log.debug("CharCache generation time: " + + (t2 - t1) + "ms"); + } + } else { + ccCount++; + // Allocate new CharEntry for the lookup + CharEntry entry = new CharEntry(); + entry.value = value; + int[] count = ccStats.get(entry); + if (count == null) { + int end = cc.getEnd(); + int start = cc.getStart(); + // Create char array and copy chars + entry.name = new char[cc.getLength()]; + System.arraycopy(cc.getBuffer(), start, entry.name, + 0, end - start); + // Initialize occurrence count to one + count = new int[1]; + count[0] = 1; + // Set in the stats hash map + ccStats.put(entry, count); + } else { + count[0] = count[0] + 1; + } + } + } + } + return value; + } else { + accessCount++; + // Find the corresponding String + String result = find(cc); + if (result == null) { + return cc.toStringInternal(); + } + // Note: We don't care about safety for the stats + hitCount++; + return result; + } + + } + + + // ----------------------------------------------------- Protected Methods + + + /** + * Compare given byte chunk with byte array. + * Return -1, 0 or +1 if inferior, equal, or superior to the String. + */ + protected static final int compare(ByteChunk name, byte[] compareTo) { + int result = 0; + + byte[] b = name.getBuffer(); + int start = name.getStart(); + int end = name.getEnd(); + int len = compareTo.length; + + if ((end - start) < len) { + len = end - start; + } + for (int i = 0; (i < len) && (result == 0); i++) { + if (b[i + start] > compareTo[i]) { + result = 1; + } else if (b[i + start] < compareTo[i]) { + result = -1; + } + } + if (result == 0) { + if (compareTo.length > (end - start)) { + result = -1; + } else if (compareTo.length < (end - start)) { + result = 1; + } + } + return result; + } + + + /** + * Find an entry given its name in the cache and return the associated + * String. + */ + protected static final String find(ByteChunk name) { + int pos = findClosest(name, bcCache, bcCache.length); + if ((pos < 0) || (compare(name, bcCache[pos].name) != 0) + || !(name.getCharset().equals(bcCache[pos].charset))) { + return null; + } else { + return bcCache[pos].value; + } + } + + + /** + * Find an entry given its name in a sorted array of map elements. + * This will return the index for the closest inferior or equal item in the + * given array. + */ + protected static final int findClosest(ByteChunk name, ByteEntry[] array, + int len) { + + int a = 0; + int b = len - 1; + + // Special cases: -1 and 0 + if (b == -1) { + return -1; + } + + if (compare(name, array[0].name) < 0) { + return -1; + } + if (b == 0) { + return 0; + } + + int i = 0; + while (true) { + i = (b + a) >>> 1; + int result = compare(name, array[i].name); + if (result == 1) { + a = i; + } else if (result == 0) { + return i; + } else { + b = i; + } + if ((b - a) == 1) { + int result2 = compare(name, array[b].name); + if (result2 < 0) { + return a; + } else { + return b; + } + } + } + + } + + + /** + * Compare given char chunk with char array. + * Return -1, 0 or +1 if inferior, equal, or superior to the String. + */ + protected static final int compare(CharChunk name, char[] compareTo) { + int result = 0; + + char[] c = name.getBuffer(); + int start = name.getStart(); + int end = name.getEnd(); + int len = compareTo.length; + + if ((end - start) < len) { + len = end - start; + } + for (int i = 0; (i < len) && (result == 0); i++) { + if (c[i + start] > compareTo[i]) { + result = 1; + } else if (c[i + start] < compareTo[i]) { + result = -1; + } + } + if (result == 0) { + if (compareTo.length > (end - start)) { + result = -1; + } else if (compareTo.length < (end - start)) { + result = 1; + } + } + return result; + } + + + /** + * Find an entry given its name in the cache and return the associated + * String. + */ + protected static final String find(CharChunk name) { + int pos = findClosest(name, ccCache, ccCache.length); + if ((pos < 0) || (compare(name, ccCache[pos].name) != 0)) { + return null; + } else { + return ccCache[pos].value; + } + } + + + /** + * Find an entry given its name in a sorted array of map elements. + * This will return the index for the closest inferior or equal item in the + * given array. + */ + protected static final int findClosest(CharChunk name, CharEntry[] array, + int len) { + + int a = 0; + int b = len - 1; + + // Special cases: -1 and 0 + if (b == -1) { + return -1; + } + + if (compare(name, array[0].name) < 0 ) { + return -1; + } + if (b == 0) { + return 0; + } + + int i = 0; + while (true) { + i = (b + a) >>> 1; + int result = compare(name, array[i].name); + if (result == 1) { + a = i; + } else if (result == 0) { + return i; + } else { + b = i; + } + if ((b - a) == 1) { + int result2 = compare(name, array[b].name); + if (result2 < 0) { + return a; + } else { + return b; + } + } + } + + } + + + // -------------------------------------------------- ByteEntry Inner Class + + + private static class ByteEntry { + + private byte[] name = null; + private Charset charset = null; + private String value = null; + + @Override + public String toString() { + return value; + } + @Override + public int hashCode() { + return value.hashCode(); + } + @Override + public boolean equals(Object obj) { + if (obj instanceof ByteEntry) { + return value.equals(((ByteEntry) obj).value); + } + return false; + } + + } + + + // -------------------------------------------------- CharEntry Inner Class + + + private static class CharEntry { + + private char[] name = null; + private String value = null; + + @Override + public String toString() { + return value; + } + @Override + public int hashCode() { + return value.hashCode(); + } + @Override + public boolean equals(Object obj) { + if (obj instanceof CharEntry) { + return value.equals(((CharEntry) obj).value); + } + return false; + } + + } + + +} http://git-wip-us.apache.org/repos/asf/struts/blob/76f18840/core/src/main/java/org/apache/struts2/util/tomcat/buf/UDecoder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/struts2/util/tomcat/buf/UDecoder.java b/core/src/main/java/org/apache/struts2/util/tomcat/buf/UDecoder.java new file mode 100644 index 0000000..b52cda7 --- /dev/null +++ b/core/src/main/java/org/apache/struts2/util/tomcat/buf/UDecoder.java @@ -0,0 +1,421 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.struts2.util.tomcat.buf; + +import com.opensymphony.xwork2.util.logging.LoggerFactory; + +import com.opensymphony.xwork2.util.logging.Logger; +import java.io.CharConversionException; +import java.io.IOException; +import java.io.UnsupportedEncodingException; + +/** + * All URL decoding happens here. This way we can reuse, review, optimize + * without adding complexity to the buffers. + * + * The conversion will modify the original buffer. + * + * @author Costin Manolache + */ +public final class UDecoder { + + private static final Logger log = LoggerFactory.getLogger(UDecoder.class); + + public static final boolean ALLOW_ENCODED_SLASH = + Boolean.parseBoolean(System.getProperty("org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH", "false")); + + private static class DecodeException extends CharConversionException { + private static final long serialVersionUID = 1L; + public DecodeException(String s) { + super(s); + } + + @Override + public synchronized Throwable fillInStackTrace() { + // This class does not provide a stack trace + return this; + } + } + + /** Unexpected end of data. */ + private static final IOException EXCEPTION_EOF = new DecodeException("EOF"); + + /** %xx with not-hex digit */ + private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException( + "isHexDigit"); + + /** %-encoded slash is forbidden in resource path */ + private static final IOException EXCEPTION_SLASH = new DecodeException( + "noSlash"); + + public UDecoder() + { + } + + /** URLDecode, will modify the source. + */ + public void convert( ByteChunk mb, boolean query ) + throws IOException + { + int start=mb.getOffset(); + byte buff[]=mb.getBytes(); + int end=mb.getEnd(); + + int idx= ByteChunk.findByte( buff, start, end, (byte) '%' ); + int idx2=-1; + if( query ) { + idx2= ByteChunk.findByte( buff, start, (idx >= 0 ? idx : end), (byte) '+' ); + } + if( idx<0 && idx2<0 ) { + return; + } + + // idx will be the smallest positive index ( first % or + ) + if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) { + idx=idx2; + } + + final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); + + for( int j=idx; j<end; j++, idx++ ) { + if( buff[ j ] == '+' && query) { + buff[idx]= (byte)' ' ; + } else if( buff[ j ] != '%' ) { + buff[idx]= buff[j]; + } else { + // read next 2 digits + if( j+2 >= end ) { + throw EXCEPTION_EOF; + } + byte b1= buff[j+1]; + byte b2=buff[j+2]; + if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) { + throw EXCEPTION_NOT_HEX_DIGIT; + } + + j+=2; + int res=x2c( b1, b2 ); + if (noSlash && (res == '/')) { + throw EXCEPTION_SLASH; + } + buff[idx]=(byte)res; + } + } + + mb.setEnd( idx ); + + return; + } + + // -------------------- Additional methods -------------------- + // XXX What do we do about charset ???? + + /** In-buffer processing - the buffer will be modified + */ + public void convert( CharChunk mb, boolean query ) + throws IOException + { + // log( "Converting a char chunk "); + int start=mb.getOffset(); + char buff[]=mb.getBuffer(); + int cend=mb.getEnd(); + + int idx= CharChunk.indexOf( buff, start, cend, '%' ); + int idx2=-1; + if( query ) { + idx2= CharChunk.indexOf( buff, start, (idx >= 0 ? idx : cend), '+' ); + } + if( idx<0 && idx2<0 ) { + return; + } + + // idx will be the smallest positive index ( first % or + ) + if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) { + idx=idx2; + } + + final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); + + for( int j=idx; j<cend; j++, idx++ ) { + if( buff[ j ] == '+' && query ) { + buff[idx]=( ' ' ); + } else if( buff[ j ] != '%' ) { + buff[idx]=buff[j]; + } else { + // read next 2 digits + if( j+2 >= cend ) { + // invalid + throw EXCEPTION_EOF; + } + char b1= buff[j+1]; + char b2=buff[j+2]; + if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) { + throw EXCEPTION_NOT_HEX_DIGIT; + } + + j+=2; + int res=x2c( b1, b2 ); + if (noSlash && (res == '/')) { + throw EXCEPTION_SLASH; + } + buff[idx]=(char)res; + } + } + mb.setEnd( idx ); + } + + /** URLDecode, will modify the source + */ + public void convert(MessageBytes mb, boolean query) + throws IOException + { + + switch (mb.getType()) { + case MessageBytes.T_STR: + String strValue=mb.toString(); + if( strValue==null ) { + return; + } + try { + mb.setString( convert( strValue, query )); + } catch (RuntimeException ex) { + throw new DecodeException(ex.getMessage()); + } + break; + case MessageBytes.T_CHARS: + CharChunk charC=mb.getCharChunk(); + convert( charC, query ); + break; + case MessageBytes.T_BYTES: + ByteChunk bytesC=mb.getByteChunk(); + convert( bytesC, query ); + break; + } + } + + // XXX Old code, needs to be replaced !!!! + // + public final String convert(String str, boolean query) + { + if (str == null) { + return null; + } + + if( (!query || str.indexOf( '+' ) < 0) && str.indexOf( '%' ) < 0 ) { + return str; + } + + final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); + + StringBuilder dec = new StringBuilder(); // decoded string output + int strPos = 0; + int strLen = str.length(); + + dec.ensureCapacity(str.length()); + while (strPos < strLen) { + int laPos; // lookahead position + + // look ahead to next URLencoded metacharacter, if any + for (laPos = strPos; laPos < strLen; laPos++) { + char laChar = str.charAt(laPos); + if ((laChar == '+' && query) || (laChar == '%')) { + break; + } + } + + // if there were non-metacharacters, copy them all as a block + if (laPos > strPos) { + dec.append(str.substring(strPos,laPos)); + strPos = laPos; + } + + // shortcut out of here if we're at the end of the string + if (strPos >= strLen) { + break; + } + + // process next metacharacter + char metaChar = str.charAt(strPos); + if (metaChar == '+') { + dec.append(' '); + strPos++; + continue; + } else if (metaChar == '%') { + // We throw the original exception - the super will deal with + // it + // try { + char res = (char) Integer.parseInt( + str.substring(strPos + 1, strPos + 3), 16); + if (noSlash && (res == '/')) { + throw new IllegalArgumentException("noSlash"); + } + dec.append(res); + strPos += 3; + } + } + + return dec.toString(); + } + + + /** + * Decode and return the specified URL-encoded String. + * When the byte array is converted to a string, the system default + * character encoding is used... This may be different than some other + * servers. It is assumed the string is not a query string. + * + * @param str The url-encoded string + * + * @exception IllegalArgumentException if a '%' character is not followed + * by a valid 2-digit hexadecimal number + */ + public static String URLDecode(String str) { + return URLDecode(str, null); + } + + + /** + * Decode and return the specified URL-encoded String. It is assumed the + * string is not a query string. + * + * @param str The url-encoded string + * @param enc The encoding to use; if null, the default encoding is used. If + * an unsupported encoding is specified null will be returned + * @exception IllegalArgumentException if a '%' character is not followed + * by a valid 2-digit hexadecimal number + */ + public static String URLDecode(String str, String enc) { + return URLDecode(str, enc, false); + } + + + /** + * Decode and return the specified URL-encoded String. + * + * @param str The url-encoded string + * @param enc The encoding to use; if null, the default encoding is used. If + * an unsupported encoding is specified null will be returned + * @param isQuery Is this a query string being processed + * @exception IllegalArgumentException if a '%' character is not followed + * by a valid 2-digit hexadecimal number + */ + public static String URLDecode(String str, String enc, boolean isQuery) { + if (str == null) + return (null); + + // use the specified encoding to extract bytes out of the + // given string so that the encoding is not lost. If an + // encoding is not specified, use ISO-8859-1 + byte[] bytes = null; + try { + if (enc == null) { + bytes = str.getBytes("ISO-8859-1"); + } else { + bytes = str.getBytes(B2CConverter.getCharset(enc)); + } + } catch (UnsupportedEncodingException uee) { + if (log.isDebugEnabled()) { + log.debug("Unable to URL decode the specified input since the encoding "+ enc + " is not supported.", uee); + } + } + + return URLDecode(bytes, enc, isQuery); + + } + + + /** + * Decode and return the specified URL-encoded byte array. + * + * @param bytes The url-encoded byte array + * @param enc The encoding to use; if null, the default encoding is used. If + * an unsupported encoding is specified null will be returned + * @param isQuery Is this a query string being processed + * @exception IllegalArgumentException if a '%' character is not followed + * by a valid 2-digit hexadecimal number + */ + public static String URLDecode(byte[] bytes, String enc, boolean isQuery) { + + if (bytes == null) + return null; + + int len = bytes.length; + int ix = 0; + int ox = 0; + while (ix < len) { + byte b = bytes[ix++]; // Get byte to test + if (b == '+' && isQuery) { + b = (byte)' '; + } else if (b == '%') { + if (ix + 2 > len) { + throw new IllegalArgumentException( + "The % character must be followed by two hexademical digits"); + } + b = (byte) ((convertHexDigit(bytes[ix++]) << 4) + + convertHexDigit(bytes[ix++])); + } + bytes[ox++] = b; + } + if (enc != null) { + try { + return new String(bytes, 0, ox, B2CConverter.getCharset(enc)); + } catch (UnsupportedEncodingException uee) { + if (log.isDebugEnabled()) { + log.debug("Unable to URL decode the specified input since the encoding " + enc + " is not supported.", uee); + } + return null; + } + } + return new String(bytes, 0, ox); + + } + + + private static byte convertHexDigit( byte b ) { + if ((b >= '0') && (b <= '9')) return (byte)(b - '0'); + if ((b >= 'a') && (b <= 'f')) return (byte)(b - 'a' + 10); + if ((b >= 'A') && (b <= 'F')) return (byte)(b - 'A' + 10); + throw new IllegalArgumentException(((char) b) + " is not a hexadecimal digit"); + } + + + private static boolean isHexDigit( int c ) { + return ( ( c>='0' && c<='9' ) || + ( c>='a' && c<='f' ) || + ( c>='A' && c<='F' )); + } + + + private static int x2c( byte b1, byte b2 ) { + int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 : + (b1 -'0'); + digit*=16; + digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 : + (b2 -'0'); + return digit; + } + + + private static int x2c( char b1, char b2 ) { + int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 : + (b1 -'0'); + digit*=16; + digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 : + (b2 -'0'); + return digit; + } +} http://git-wip-us.apache.org/repos/asf/struts/blob/76f18840/core/src/main/java/org/apache/struts2/util/tomcat/buf/Utf8Decoder.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/struts2/util/tomcat/buf/Utf8Decoder.java b/core/src/main/java/org/apache/struts2/util/tomcat/buf/Utf8Decoder.java new file mode 100644 index 0000000..b08b236 --- /dev/null +++ b/core/src/main/java/org/apache/struts2/util/tomcat/buf/Utf8Decoder.java @@ -0,0 +1,293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.struts2.util.tomcat.buf; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; + +/** + * Decodes bytes to UTF-8. Extracted from Apache Harmony and modified to reject + * code points from U+D800 to U+DFFF as per RFC3629. The standard Java decoder + * does not reject these. It has also been modified to reject code points + * greater than U+10FFFF which the standard Java decoder rejects but the harmony + * one does not. + */ +public class Utf8Decoder extends CharsetDecoder { + + // The next table contains information about UTF-8 charset and + // correspondence of 1st byte to the length of sequence + // For information please visit http://www.ietf.org/rfc/rfc3629.txt + // + // Please note, o means 0, actually. + // ------------------------------------------------------------------- + // 0 1 2 3 Value + // ------------------------------------------------------------------- + // oxxxxxxx 00000000 00000000 0xxxxxxx + // 11oyyyyy 1oxxxxxx 00000000 00000yyy yyxxxxxx + // 111ozzzz 1oyyyyyy 1oxxxxxx 00000000 zzzzyyyy yyxxxxxx + // 1111ouuu 1ouuzzzz 1oyyyyyy 1oxxxxxx 000uuuuu zzzzyyyy yyxxxxxx + private static final int remainingBytes[] = { + // 1owwwwww + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // 11oyyyyy + -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // 111ozzzz + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // 1111ouuu + 3, 3, 3, 3, 3, -1, -1, -1, + // > 11110111 + -1, -1, -1, -1, -1, -1, -1, -1}; + private static final int remainingNumbers[] = {0, // 0 1 2 3 + 4224, // (01o00000b << 6)+(1o000000b) + 401536, // (011o0000b << 12)+(1o000000b << 6)+(1o000000b) + 29892736 // (0111o000b << 18)+(1o000000b << 12)+(1o000000b << + // 6)+(1o000000b) + }; + private static final int lowerEncodingLimit[] = {-1, 0x80, 0x800, 0x10000}; + + + public Utf8Decoder() { + super(B2CConverter.UTF_8, 1.0f, 1.0f); + } + + + @Override + protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + if (in.hasArray() && out.hasArray()) { + return decodeHasArray(in, out); + } + return decodeNotHasArray(in, out); + } + + + private CoderResult decodeNotHasArray(ByteBuffer in, CharBuffer out) { + int outRemaining = out.remaining(); + int pos = in.position(); + int limit = in.limit(); + try { + while (pos < limit) { + if (outRemaining == 0) { + return CoderResult.OVERFLOW; + } + int jchar = in.get(); + if (jchar < 0) { + jchar = jchar & 0x7F; + int tail = remainingBytes[jchar]; + if (tail == -1) { + return CoderResult.malformedForLength(1); + } + if (limit - pos < 1 + tail) { + // No early test for invalid sequences here as peeking + // at the next byte is harder + return CoderResult.UNDERFLOW; + } + int nextByte; + for (int i = 0; i < tail; i++) { + nextByte = in.get() & 0xFF; + if ((nextByte & 0xC0) != 0x80) { + return CoderResult.malformedForLength(1 + i); + } + jchar = (jchar << 6) + nextByte; + } + jchar -= remainingNumbers[tail]; + if (jchar < lowerEncodingLimit[tail]) { + // Should have been encoded in a fewer octets + return CoderResult.malformedForLength(1); + } + pos += tail; + } + // Apache Tomcat added test + if (jchar >= 0xD800 && jchar <= 0xDFFF) { + return CoderResult.unmappableForLength(3); + } + // Apache Tomcat added test + if (jchar > 0x10FFFF) { + return CoderResult.unmappableForLength(4); + } + if (jchar <= 0xffff) { + out.put((char) jchar); + outRemaining--; + } else { + if (outRemaining < 2) { + return CoderResult.OVERFLOW; + } + out.put((char) ((jchar >> 0xA) + 0xD7C0)); + out.put((char) ((jchar & 0x3FF) + 0xDC00)); + outRemaining -= 2; + } + pos++; + } + return CoderResult.UNDERFLOW; + } finally { + in.position(pos); + } + } + + + private CoderResult decodeHasArray(ByteBuffer in, CharBuffer out) { + int outRemaining = out.remaining(); + int pos = in.position(); + int limit = in.limit(); + final byte[] bArr = in.array(); + final char[] cArr = out.array(); + final int inIndexLimit = limit + in.arrayOffset(); + int inIndex = pos + in.arrayOffset(); + int outIndex = out.position() + out.arrayOffset(); + // if someone would change the limit in process, + // he would face consequences + for (; inIndex < inIndexLimit && outRemaining > 0; inIndex++) { + int jchar = bArr[inIndex]; + if (jchar < 0) { + jchar = jchar & 0x7F; + // If first byte is invalid, tail will be set to -1 + int tail = remainingBytes[jchar]; + if (tail == -1) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + // Additional checks to detect invalid sequences ASAP + // Checks derived from Unicode 6.2, Chapter 3, Table 3-7 + // Check 2nd byte + int tailAvailable = inIndexLimit - inIndex - 1; + if (tailAvailable > 0) { + // First byte C2..DF, second byte 80..BF + if (jchar > 0x41 && jchar < 0x60 && + (bArr[inIndex + 1] & 0xC0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + // First byte E0, second byte A0..BF + if (jchar == 0x60 && (bArr[inIndex + 1] & 0xE0) != 0xA0) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + // First byte E1..EC, second byte 80..BF + if (jchar > 0x60 && jchar < 0x6D && + (bArr[inIndex + 1] & 0xC0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + // First byte ED, second byte 80..9F + if (jchar == 0x6D && (bArr[inIndex + 1] & 0xE0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + // First byte EE..EF, second byte 80..BF + if (jchar > 0x6D && jchar < 0x70 && + (bArr[inIndex + 1] & 0xC0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + // First byte F0, second byte 90..BF + if (jchar == 0x70 && + ((bArr[inIndex + 1] & 0xFF) < 0x90 || + (bArr[inIndex + 1] & 0xFF) > 0xBF)) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + // First byte F1..F3, second byte 80..BF + if (jchar > 0x70 && jchar < 0x74 && + (bArr[inIndex + 1] & 0xC0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + // First byte F4, second byte 80..8F + if (jchar == 0x74 && + (bArr[inIndex + 1] & 0xF0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + } + // Check third byte if present and expected + if (tailAvailable > 1 && tail > 1) { + if ((bArr[inIndex + 2] & 0xC0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(2); + } + } + // Check fourth byte if present and expected + if (tailAvailable > 2 && tail > 2) { + if ((bArr[inIndex + 3] & 0xC0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(3); + } + } + if (tailAvailable < tail) { + break; + } + for (int i = 0; i < tail; i++) { + int nextByte = bArr[inIndex + i + 1] & 0xFF; + if ((nextByte & 0xC0) != 0x80) { + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1 + i); + } + jchar = (jchar << 6) + nextByte; + } + jchar -= remainingNumbers[tail]; + if (jchar < lowerEncodingLimit[tail]) { + // Should have been encoded in fewer octets + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return CoderResult.malformedForLength(1); + } + inIndex += tail; + } + // Apache Tomcat added test + if (jchar >= 0xD800 && jchar <= 0xDFFF) { + return CoderResult.unmappableForLength(3); + } + // Apache Tomcat added test + if (jchar > 0x10FFFF) { + return CoderResult.unmappableForLength(4); + } + if (jchar <= 0xffff) { + cArr[outIndex++] = (char) jchar; + outRemaining--; + } else { + if (outRemaining < 2) { + return CoderResult.OVERFLOW; + } + cArr[outIndex++] = (char) ((jchar >> 0xA) + 0xD7C0); + cArr[outIndex++] = (char) ((jchar & 0x3FF) + 0xDC00); + outRemaining -= 2; + } + } + in.position(inIndex - in.arrayOffset()); + out.position(outIndex - out.arrayOffset()); + return (outRemaining == 0 && inIndex < inIndexLimit) ? + CoderResult.OVERFLOW : + CoderResult.UNDERFLOW; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/struts/blob/76f18840/core/src/main/java/org/apache/struts2/views/util/DefaultUrlHelper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/struts2/views/util/DefaultUrlHelper.java b/core/src/main/java/org/apache/struts2/views/util/DefaultUrlHelper.java index 4e62c21..9e8418b 100644 --- a/core/src/main/java/org/apache/struts2/views/util/DefaultUrlHelper.java +++ b/core/src/main/java/org/apache/struts2/views/util/DefaultUrlHelper.java @@ -27,6 +27,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.struts2.StrutsConstants; +import org.apache.struts2.util.URLDecoderUtil; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -267,15 +268,15 @@ public class DefaultUrlHelper implements UrlHelper { } /** - * Decodes the URL using {@link java.net.URLDecoder#decode(String, String)} with the encoding specified in the configuration. + * Decodes the URL using {@link URLDecoderUtil#decode(String, String)} with the encoding specified in the configuration. * * @param input the input to decode * @return the encoded string */ public String decode( String input ) { try { - return URLDecoder.decode(input, encoding); - } catch (UnsupportedEncodingException e) { + return URLDecoderUtil.decode(input, encoding); + } catch (Exception e) { LOG.warn("Could not decode URL parameter '{}', returning value un-decoded", input); return input; } http://git-wip-us.apache.org/repos/asf/struts/blob/76f18840/core/src/test/java/org/apache/struts2/util/URLDecoderUtilTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/struts2/util/URLDecoderUtilTest.java b/core/src/test/java/org/apache/struts2/util/URLDecoderUtilTest.java new file mode 100644 index 0000000..f21c08f --- /dev/null +++ b/core/src/test/java/org/apache/struts2/util/URLDecoderUtilTest.java @@ -0,0 +1,71 @@ +package org.apache.struts2.util; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class URLDecoderUtilTest { + + @Test + public void testURLDecodeStringInvalid() { + // %n rather than %nn should throw an IAE according to the Javadoc + Exception exception = null; + try { + URLDecoderUtil.decode("%5xxxxx", "ISO-8859-1"); + } catch (Exception e) { + exception = e; + } + assertTrue(exception instanceof IllegalArgumentException); + + // Edge case trying to trigger ArrayIndexOutOfBoundsException + exception = null; + try { + URLDecoderUtil.decode("%5", "ISO-8859-1"); + } catch (Exception e) { + exception = e; + } + assertTrue(exception instanceof IllegalArgumentException); + } + + @Test + public void testURLDecodeStringValidIso88591Start() { + + String result = URLDecoderUtil.decode("%41xxxx", "ISO-8859-1"); + assertEquals("Axxxx", result); + } + + @Test + public void testURLDecodeStringValidIso88591Middle() { + + String result = URLDecoderUtil.decode("xx%41xx", "ISO-8859-1"); + assertEquals("xxAxx", result); + } + + @Test + public void testURLDecodeStringValidIso88591End() { + + String result = URLDecoderUtil.decode("xxxx%41", "ISO-8859-1"); + assertEquals("xxxxA", result); + } + + @Test + public void testURLDecodeStringValidUtf8Start() { + String result = URLDecoderUtil.decode("%c3%aaxxxx", "UTF-8"); + assertEquals("\u00eaxxxx", result); + } + + @Test + public void testURLDecodeStringValidUtf8Middle() { + + String result = URLDecoderUtil.decode("xx%c3%aaxx", "UTF-8"); + assertEquals("xx\u00eaxx", result); + } + + @Test + public void testURLDecodeStringValidUtf8End() { + + String result = URLDecoderUtil.decode("xxxx%c3%aa", "UTF-8"); + assertEquals("xxxx\u00ea", result); + } + +} \ No newline at end of file