dungba88 commented on code in PR #12738:
URL: https://github.com/apache/lucene/pull/12738#discussion_r1379960873


##########
lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java:
##########
@@ -328,7 +298,100 @@ private void rehash(long lastNodeAddress) throws 
IOException {
       }
 
       mask = newMask;
-      entries = newEntries;
+      fstNodeAddress = newEntries;
+      copiedNodeAddress = newCopiedOffsets;
+    }
+
+    // hash code for a frozen node.  this must precisely match the hash 
computation of an unfrozen
+    // node!
+    private long hash(long node, long pos) throws IOException {
+      FST.BytesReader in = getBytesReader(node, pos);
+
+      final int PRIME = 31;
+
+      long h = 0;
+      fstCompiler.fst.readFirstRealTargetArc(node, scratchArc, in);
+      while (true) {
+        h = PRIME * h + scratchArc.label();
+        h = PRIME * h + (int) (scratchArc.target() ^ (scratchArc.target() >> 
32));
+        h = PRIME * h + scratchArc.output().hashCode();
+        h = PRIME * h + scratchArc.nextFinalOutput().hashCode();
+        if (scratchArc.isFinal()) {
+          h += 17;
+        }
+        if (scratchArc.isLast()) {
+          break;
+        }
+        fstCompiler.fst.readNextRealArc(scratchArc, in);
+      }
+
+      return h;
+    }
+
+    /**
+     * Compares an unfrozen node (UnCompiledNode) with a frozen node at byte 
location address
+     * (long), returning the node length if the two nodes are matched, or -1 
otherwise
+     */
+    private int getMatchedNodeLength(FSTCompiler.UnCompiledNode<T> node, long 
address, long pos)
+        throws IOException {
+      FST.BytesReader in = getBytesReader(address, pos);
+      fstCompiler.fst.readFirstRealTargetArc(address, scratchArc, in);
+
+      // fail fast for a node with fixed length arcs
+      if (scratchArc.bytesPerArc() != 0) {
+        assert node.numArcs > 0;
+        // the frozen node uses fixed-with arc encoding (same number of bytes 
per arc), but may be
+        // sparse or dense
+        switch (scratchArc.nodeFlags()) {
+          case FST.ARCS_FOR_BINARY_SEARCH:
+            // sparse
+            if (node.numArcs != scratchArc.numArcs()) {
+              return -1;
+            }
+            break;
+          case FST.ARCS_FOR_DIRECT_ADDRESSING:
+            // dense -- compare both the number of labels allocated in the 
array (some of which may
+            // not actually be arcs), and the number of arcs
+            if ((node.arcs[node.numArcs - 1].label - node.arcs[0].label + 1) 
!= scratchArc.numArcs()
+                || node.numArcs != FST.Arc.BitTable.countBits(scratchArc, in)) 
{
+              return -1;
+            }
+            break;
+          default:
+            throw new AssertionError("unhandled scratchArc.nodeFlag() " + 
scratchArc.nodeFlags());
+        }
+      }
+
+      // compare arc by arc to see if there is a difference
+      for (int arcUpto = 0; arcUpto < node.numArcs; arcUpto++) {
+        final FSTCompiler.Arc<T> arc = node.arcs[arcUpto];
+        if (arc.label != scratchArc.label()
+            || arc.output.equals(scratchArc.output()) == false
+            || ((FSTCompiler.CompiledNode) arc.target).node != 
scratchArc.target()
+            || arc.nextFinalOutput.equals(scratchArc.nextFinalOutput()) == 
false
+            || arc.isFinal != scratchArc.isFinal()) {
+          return -1;
+        }
+
+        if (scratchArc.isLast()) {
+          if (arcUpto == node.numArcs - 1) {
+            return Math.toIntExact(address - in.getPosition() + 1);
+          } else {
+            return -1;
+          }
+        }
+
+        fstCompiler.fst.readNextRealArc(scratchArc, in);
+      }
+
+      // unfrozen node has fewer arcs than frozen node
+
+      return -1;
+    }
+
+    private FST.BytesReader getBytesReader(long address, long pos) {
+      long localAddress = copiedNodeAddress.get(pos);
+      return new ByteBlockPoolReverseBytesReader(copiedNodes, address - 
localAddress);

Review Comment:
   I set it as the property of PagedGrowableHash, with a setter for the 
`posDelta`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to