Author: damjan Date: Tue Oct 28 04:01:00 2014 New Revision: 1634775 URL: http://svn.apache.org/r1634775 Log: Optimize the LZW implementation, support reading bit patterns that are either big or little endian, and make LZWInputStream public.
Added: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java (with props) commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html (with props) commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java (with props) Removed: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/_internal_/ Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java?rev=1634775&r1=1634774&r2=1634775&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java Tue Oct 28 04:01:00 2014 @@ -20,21 +20,22 @@ package org.apache.commons.compress.arch import java.io.IOException; import java.io.InputStream; +import java.nio.ByteOrder; -import org.apache.commons.compress.compressors.z._internal_.InternalLZWInputStream; +import org.apache.commons.compress.compressors.lzw.LZWInputStream; /** * Input stream that decompresses ZIP method 1 (unshrinking). A variation of the LZW algorithm, with some twists. * @NotThreadSafe * @since 1.7 */ -class UnshrinkingInputStream extends InternalLZWInputStream { +class UnshrinkingInputStream extends LZWInputStream { private static final int MAX_CODE_SIZE = 13; private static final int MAX_TABLE_SIZE = 1 << MAX_CODE_SIZE; private final boolean[] isUsed; public UnshrinkingInputStream(InputStream inputStream) throws IOException { - super(inputStream); + super(inputStream, ByteOrder.LITTLE_ENDIAN); setClearCode(codeSize); initializeTables(MAX_CODE_SIZE); isUsed = new boolean[prefixes.length]; Added: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java?rev=1634775&view=auto ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java (added) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java Tue Oct 28 04:01:00 2014 @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.compressors.lzw; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; + +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.utils.BitInputStream; + +/** + * <p>Generic LZW implementation. It is used internally for + * the Z decompressor and the Unshrinking Zip file compression method, + * but may be useful for third-party projects in implementing their own LZW variations.</p> + * + * @NotThreadSafe + * @since 1.10 + */ +public abstract class LZWInputStream extends CompressorInputStream { + private final byte[] oneByte = new byte[1]; + + protected final BitInputStream in; + protected int clearCode = -1; + protected int codeSize = 9; + protected byte previousCodeFirstChar; + protected int previousCode = -1; + protected int tableSize = 0; + protected int[] prefixes; + protected byte[] characters; + private byte[] outputStack; + private int outputStackLocation; + + protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) { + this.in = new BitInputStream(inputStream, byteOrder); + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public int read() throws IOException { + int ret = read(oneByte); + if (ret < 0) { + return ret; + } + return 0xff & oneByte[0]; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int bytesRead = readFromStack(b, off, len); + while (len - bytesRead > 0) { + int result = decompressNextSymbol(); + if (result < 0) { + if (bytesRead > 0) { + count(bytesRead); + return bytesRead; + } + return result; + } + bytesRead += readFromStack(b, off + bytesRead, len - bytesRead); + } + count(bytesRead); + return bytesRead; + } + + /** + * Read the next code and expand it. + */ + protected abstract int decompressNextSymbol() throws IOException; + + /** + * Add a new entry to the dictionary. + */ + protected abstract int addEntry(int previousCode, byte character) + throws IOException; + + /** + * Sets the clear code based on the code size. + */ + protected void setClearCode(int codeSize) { + clearCode = (1 << (codeSize - 1)); + } + + /** + * Initializes the arrays based on the maximum code size. + */ + protected void initializeTables(int maxCodeSize) { + final int maxTableSize = 1 << maxCodeSize; + prefixes = new int[maxTableSize]; + characters = new byte[maxTableSize]; + outputStack = new byte[maxTableSize]; + outputStackLocation = maxTableSize; + final int max = 1 << 8; + for (int i = 0; i < max; i++) { + prefixes[i] = -1; + characters[i] = (byte) i; + } + } + + /** + * Reads the next code from the stream. + */ + protected int readNextCode() throws IOException { + return in.readBits(codeSize); + } + + /** + * Adds a new entry if the maximum table size hasn't been exceeded + * and returns the new index. + */ + protected int addEntry(int previousCode, byte character, int maxTableSize) { + if (tableSize < maxTableSize) { + prefixes[tableSize] = previousCode; + characters[tableSize] = character; + return tableSize++; + } + return -1; + } + + /** + * Add entry for repeat of previousCode we haven't added, yet. + */ + protected int addRepeatOfPreviousCode() throws IOException { + if (previousCode == -1) { + // can't have a repeat for the very first code + throw new IOException("The first code can't be a reference to its preceding code"); + } + return addEntry(previousCode, previousCodeFirstChar); + } + + /** + * Expands the entry with index code to the output stack and may + * create a new entry + */ + protected int expandCodeToOutputStack(int code, boolean addedUnfinishedEntry) + throws IOException { + for (int entry = code; entry >= 0; entry = prefixes[entry]) { + outputStack[--outputStackLocation] = characters[entry]; + } + if (previousCode != -1 && !addedUnfinishedEntry) { + addEntry(previousCode, outputStack[outputStackLocation]); + } + previousCode = code; + previousCodeFirstChar = outputStack[outputStackLocation]; + return outputStackLocation; + } + + private int readFromStack(byte[] b, int off, int len) { + int remainingInStack = outputStack.length - outputStackLocation; + if (remainingInStack > 0) { + int maxLength = Math.min(remainingInStack, len); + System.arraycopy(outputStack, outputStackLocation, b, off, maxLength); + outputStackLocation += maxLength; + return maxLength; + } + return 0; + } +} Propchange: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java ------------------------------------------------------------------------------ svn:eol-style = native Added: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html?rev=1634775&view=auto ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html (added) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html Tue Oct 28 04:01:00 2014 @@ -0,0 +1,23 @@ +<html> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + <body> + <p>Generic LZW implementation.</p> + </body> +</html> Propchange: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html ------------------------------------------------------------------------------ svn:eol-style = native Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java?rev=1634775&r1=1634774&r2=1634775&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java Tue Oct 28 04:01:00 2014 @@ -20,14 +20,16 @@ package org.apache.commons.compress.comp import java.io.IOException; import java.io.InputStream; -import org.apache.commons.compress.compressors.z._internal_.InternalLZWInputStream; +import java.nio.ByteOrder; + +import org.apache.commons.compress.compressors.lzw.LZWInputStream; /** * Input stream that decompresses .Z files. * @NotThreadSafe * @since 1.7 */ -public class ZCompressorInputStream extends InternalLZWInputStream { +public class ZCompressorInputStream extends LZWInputStream { private static final int MAGIC_1 = 0x1f; private static final int MAGIC_2 = 0x9d; private static final int BLOCK_MODE_MASK = 0x80; @@ -37,10 +39,10 @@ public class ZCompressorInputStream exte private long totalCodesRead = 0; public ZCompressorInputStream(InputStream inputStream) throws IOException { - super(inputStream); - int firstByte = in.read(); - int secondByte = in.read(); - int thirdByte = in.read(); + super(inputStream, ByteOrder.LITTLE_ENDIAN); + int firstByte = in.readBits(8); + int secondByte = in.readBits(8); + int thirdByte = in.readBits(8); if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) { throw new IOException("Input is not in .Z format"); } @@ -87,8 +89,7 @@ public class ZCompressorInputStream exte for (long i = 0; i < codeReadsToThrowAway; i++) { readNextCode(); } - bitsCached = 0; - bitsCachedSize = 0; + in.clearBitCache(); } /** Added: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java?rev=1634775&view=auto ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java (added) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java Tue Oct 28 04:01:00 2014 @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; + +/** + * Reads bits from an InputStream. + * @since 1.10 + * @NotThreadSafe + */ +public class BitInputStream implements Closeable { + private final InputStream in; + private final ByteOrder byteOrder; + private int bitsCached = 0; + private int bitsCachedSize = 0; + + /** + * Constructor taking an InputStream and its bit arrangement. + * @param in the InputStream + * @param byteOrder the bit arrangement across byte boundaries, + * either BIG_ENDIAN (aaaaabbb bb000000) or LITTLE_ENDIAN (bbbaaaaa 000000bb) + */ + public BitInputStream(final InputStream in, final ByteOrder byteOrder) { + this.in = in; + this.byteOrder = byteOrder; + } + + public void close() throws IOException { + in.close(); + } + + public void clearBitCache() { + bitsCached = 0; + bitsCachedSize = 0; + } + + public int readBits(final int count) throws IOException { + while (bitsCachedSize < count) { + final int nextByte = in.read(); + if (nextByte < 0) { + return nextByte; + } + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + bitsCached |= (nextByte << bitsCachedSize); + } else { + bitsCached <<= 8; + bitsCached |= nextByte; + } + bitsCachedSize += 8; + } + + final int mask = (1 << count) - 1; + final int bitsOut; + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + bitsOut = (bitsCached & mask); + bitsCached >>>= count; + } else { + bitsOut = (bitsCached >> (bitsCachedSize - count)) & mask; + } + bitsCachedSize -= count; + return bitsOut; + } +} Propchange: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java ------------------------------------------------------------------------------ svn:eol-style = native