add lazy matching to LZ77 compressors

Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/f7e0a4a0
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/f7e0a4a0
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/f7e0a4a0

Branch: refs/heads/master
Commit: f7e0a4a0212424edd1765d1abe66772af069dafb
Parents: 86e2bd0
Author: Stefan Bodewig <bode...@apache.org>
Authored: Sun Apr 2 13:09:54 2017 +0200
Committer: Stefan Bodewig <bode...@apache.org>
Committed: Sun Apr 2 13:09:54 2017 +0200

----------------------------------------------------------------------
 .../compressors/lz77support/LZ77Compressor.java | 32 +++++++++++
 .../compressors/lz77support/Parameters.java     | 58 ++++++++++++++++++--
 2 files changed, 85 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/f7e0a4a0/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
 
b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
index c284f4c..5379259 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
@@ -394,6 +394,8 @@ public class LZ77Compressor {
 
     private void compress() throws IOException {
         final int minMatch = params.getMinBackReferenceLength();
+        final boolean lazy = params.getLazyMatching();
+        final int lazyThreshold = params.getLazyMatchingThreshold();
 
         while (lookahead >= minMatch) {
             catchUpMissedInserts();
@@ -402,6 +404,11 @@ public class LZ77Compressor {
             if (hashHead != NO_MATCH && hashHead - currentPosition <= 
params.getMaxOffset()) {
                 // sets matchStart as a side effect
                 matchLength = longestMatch(hashHead);
+
+                if (lazy && matchLength <= lazyThreshold && lookahead > 
minMatch) {
+                    // try to find a longer match using the next position
+                    matchLength = longestMatchForNextPosition(matchLength);
+                }
             }
             if (matchLength >= minMatch) {
                 if (blockStart != currentPosition) {
@@ -441,6 +448,31 @@ public class LZ77Compressor {
         return hashHead;
     }
 
+    private int longestMatchForNextPosition(final int prevMatchLength) {
+        // save a bunch of values to restore them if the next match isn't 
better than the current one
+        final int prevMatchStart = matchStart;
+        final int prevInsertHash = insertHash;
+
+        lookahead--;
+        currentPosition++;
+        int hashHead = insertString(currentPosition);
+        final int prevHashHead = prev[currentPosition & wMask];
+        int matchLength = longestMatch(hashHead);
+
+        if (matchLength <= prevMatchLength) {
+            // use the first match, as the next one isn't any better
+            matchLength = prevMatchLength;
+            matchStart = prevMatchStart;
+
+            // restore modified values
+            head[insertHash] = prevHashHead;
+            insertHash = prevInsertHash;
+            currentPosition--;
+            lookahead++;
+        }
+        return matchLength;
+    }
+
     private void insertStringsInMatch(int matchLength) {
         // inserts strings contained in current match
         // insertString inserts the byte 2 bytes after position, which may not 
yet be available -> missedInserts

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/f7e0a4a0/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
 
b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
index e4e9e47..5cdbae9 100644
--- 
a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
+++ 
b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
@@ -52,7 +52,8 @@ public final class Parameters {
     public static class Builder {
         private final int windowSize;
         private int minBackReferenceLength, maxBackReferenceLength, maxOffset, 
maxLiteralLength;
-        private Integer niceBackReferenceLength, maxCandidates;
+        private Integer niceBackReferenceLength, maxCandidates, lazyThreshold;
+        private Boolean lazyMatches;
 
         private Builder(int windowSize) {
             if (windowSize < 2 || !isPowerOfTwo(windowSize)) {
@@ -173,6 +174,30 @@ public final class Parameters {
         }
 
         /**
+         * Sets whether lazy matching should be performed.
+         *
+         * <p>Lazy matching means that after a back-reference for a certain 
position has been found the compressor will
+         * try to find a longer match for the next position.</p>
+         *
+         * <p>Lazy matching is enabled by default and disabled when tuning for 
speed.</p>
+         */
+        public Builder withLazyMatching(boolean lazy) {
+            lazyMatches = lazy;
+            return this;
+        }
+
+        /**
+         * Sets the threshold for lazy matching.
+         *
+         * <p>Even if lazy matching is enabled it will not be performed if the 
length of the back-reference found for
+         * the current position is longer than this value.</p>
+         */
+        public Builder withLazyThreshold(int threshold) {
+            lazyThreshold = threshold;
+            return this;
+        }
+
+        /**
          * Changes the default setting for "nice back-reference length" and 
"maximum number of candidates" for improved
          * compression speed at the cost of compression ratio.
          *
@@ -181,6 +206,8 @@ public final class Parameters {
         public Builder tunedForSpeed() {
             niceBackReferenceLength = Math.max(minBackReferenceLength, 
maxBackReferenceLength / 8);
             maxCandidates = Math.max(32, windowSize / 1024);
+            lazyMatches = false;
+            lazyThreshold = minBackReferenceLength;
             return this;
         }
 
@@ -191,8 +218,9 @@ public final class Parameters {
          * <p>Use this method after configuring "maximum back-reference 
length".</p>
          */
         public Builder tunedForCompressionRatio() {
-            niceBackReferenceLength = maxBackReferenceLength;
+            niceBackReferenceLength = lazyThreshold = maxBackReferenceLength;
             maxCandidates = Math.max(32, windowSize / 16);
+            lazyMatches = true;
             return this;
         }
 
@@ -205,17 +233,21 @@ public final class Parameters {
             int niceLen = niceBackReferenceLength != null ? 
niceBackReferenceLength
                 : Math.max(minBackReferenceLength, maxBackReferenceLength / 2);
             int candidates = maxCandidates != null ? maxCandidates : 
Math.max(256, windowSize / 128);
+            boolean lazy = lazyMatches != null ? lazyMatches : true;
+            int threshold = lazy ? (lazyThreshold != null ? lazyThreshold : 
niceLen) : minBackReferenceLength;
 
             return new Parameters(windowSize, minBackReferenceLength, 
maxBackReferenceLength,
-                maxOffset, maxLiteralLength, niceLen, candidates);
+                maxOffset, maxLiteralLength, niceLen, candidates, lazy, 
threshold);
         }
     }
 
     private final int windowSize, minBackReferenceLength, 
maxBackReferenceLength, maxOffset, maxLiteralLength,
-        niceBackReferenceLength, maxCandidates;
+        niceBackReferenceLength, maxCandidates, lazyThreshold;
+    private final boolean lazyMatching;
 
     private Parameters(int windowSize, int minBackReferenceLength, int 
maxBackReferenceLength, int maxOffset,
-        int maxLiteralLength, int niceBackReferenceLength, int maxCandidates) {
+            int maxLiteralLength, int niceBackReferenceLength, int 
maxCandidates, boolean lazyMatching,
+            int lazyThreshold) {
         this.windowSize = windowSize;
         this.minBackReferenceLength = minBackReferenceLength;
         this.maxBackReferenceLength = maxBackReferenceLength;
@@ -223,6 +255,8 @@ public final class Parameters {
         this.maxLiteralLength = maxLiteralLength;
         this.niceBackReferenceLength = niceBackReferenceLength;
         this.maxCandidates = maxCandidates;
+        this.lazyMatching = lazyMatching;
+        this.lazyThreshold = lazyThreshold;
     }
 
     /**
@@ -276,6 +310,20 @@ public final class Parameters {
         return maxCandidates;
     }
 
+    /**
+     * Gets whether to perform lazy matching.
+     */
+    public boolean getLazyMatching() {
+        return lazyMatching;
+    }
+
+    /**
+     * Gets the threshold for lazy matching.
+     */
+    public int getLazyMatchingThreshold() {
+        return lazyThreshold;
+    }
+
     private static final boolean isPowerOfTwo(int x) {
         // pre-condition: x > 0
         return (x & (x - 1)) == 0;

Reply via email to