mikemccand commented on issue #12714: URL: https://github.com/apache/lucene/issues/12714#issuecomment-1780282563
I made a quick hackity change, just to measure the number of additional bytes we'd "typically" have to copy in order to duplicate suffix bytes from the growing (forced append-only) FST, and the results are kinda depressing :) ``` on fallback: 41394190 bytes would be copied; ramBytesUsed=16777218 on fallback: 37687027 bytes would be copied; ramBytesUsed=16777216 on fallback: 23689680 bytes would be copied; ramBytesUsed=16777216 on fallback: 29950030 bytes would be copied; ramBytesUsed=16777216 on fallback: 30583201 bytes would be copied; ramBytesUsed=16777218 on fallback: 27922501 bytes would be copied; ramBytesUsed=16777218 on fallback: 27712324 bytes would be copied; ramBytesUsed=16777218 on fallback: 29407790 bytes would be copied; ramBytesUsed=16777218 on fallback: 31510848 bytes would be copied; ramBytesUsed=16777218 on fallback: 31530558 bytes would be copied; ramBytesUsed=16777217 on fallback: 30333984 bytes would be copied; ramBytesUsed=16777217 on fallback: 33169962 bytes would be copied; ramBytesUsed=16777217 on fallback: 29209248 bytes would be copied; ramBytesUsed=16777217 ``` This is just printing the number of bytes we'd have to duplicate out of the FST's growing `byte[]`, each time one half of the double-barrel LRU cache is full (fallback). Net/net it'd result in about 3X more RAM usage in the `NodeHash`, to achieve the same minimal FST as today, sigh. But a more glass-half-full outlook is that it would allow us to directly append the growing FST bytes to disk, instead of holding them indefinitely growing in RAM, and so net/net the two changes combined would necessarily mean less RAM used to build a given FST. It's just that we are shifting the RAM budget away from holding the full FST, to only holding the necessary suffix nodes bytes, and that is tunable by the provided RAM budget. Net/net it's still a win. Here's the change I tested -- only to measure the added RAM needed, not actually implementing the copying yet: ``` diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java index 04c1be414c2..762be55bf10 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java @@ -19,6 +19,7 @@ package org.apache.lucene.util.fst; import java.io.IOException; import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PagedGrowableWriter; +import org.apache.lucene.util.ByteBlockPool; // TODO: any way to make a reverse suffix lookup (msokolov's idea) instead of more costly hash? // hmmm, though, hash is not so wasteful @@ -51,6 +52,8 @@ final class NodeHash<T> { private final FST.Arc<T> scratchArc = new FST.Arc<>(); private final FST.BytesReader in; + private int lastNodeNumBytes; + /** * ramLimitMB is the max RAM we can use for recording suffixes. If we hit this limit, the least * recently used suffixes are discarded, and the FST is no longer minimalI. Still, larger @@ -110,13 +113,22 @@ final class NodeHash<T> { node = getFallback(nodeIn, hash); if (node != 0) { // it was already in fallback -- promote to primary - primaryTable.set(pos, node); + primaryTable.set(pos, node, lastNodeNumBytes); } else { // not in fallback either -- freeze & add the incoming node + long posStart = fstCompiler.bytes.getPosition(); + // freeze & add node = fstCompiler.addNode(nodeIn); + long posEnd = fstCompiler.bytes.getPosition(); + + // make sure i get the order right: + assert posEnd > posStart; + + int numBytesToCopy = Math.toIntExact(posEnd - posStart); + // we use 0 as empty marker in hash table, so it better be impossible to get a frozen node // at 0: assert node != 0; @@ -124,7 +136,7 @@ final class NodeHash<T> { // confirm frozen hash and unfrozen hash are the same assert hash(node) == hash : "mismatch frozenHash=" + hash(node) + " vs hash=" + hash; - primaryTable.set(pos, node); + primaryTable.set(pos, node, numBytesToCopy); } // how many bytes would be used if we had "perfect" hashing: @@ -144,6 +156,7 @@ final class NodeHash<T> { // time to fallback -- fallback is now used read-only to promote a node (suffix) to // primary if we encounter it again fallbackTable = primaryTable; + System.out.println("on fallback: " + fallbackTable.numBytesCopied + " bytes would be copied; ramBytesUsed=" + ramBytesUsed); // size primary table the same size to reduce rehash cost // TODO: we could clear & reuse the previous fallbackTable, instead of allocating a new // to reduce GC load @@ -210,6 +223,8 @@ final class NodeHash<T> { return h; } + // nocommit explain side effect setting lastNodeNumBytes + /** * Compares an unfrozen node (UnCompiledNode) with a frozen node at byte location address (long), * returning true if they are equal. @@ -255,6 +270,7 @@ final class NodeHash<T> { if (scratchArc.isLast()) { if (arcUpto == node.numArcs - 1) { + lastNodeNumBytes = Math.toIntExact(address - in.getPosition()); return true; } else { return false; @@ -274,6 +290,8 @@ final class NodeHash<T> { private PagedGrowableWriter entries; private long count; private long mask; + private long numBytesCopied; + private ByteBlockPool copiedNodes; // 256K blocks, but note that the final block is sized only as needed so it won't use the full // block size when just a few elements were written to it @@ -282,6 +300,7 @@ final class NodeHash<T> { public PagedGrowableHash() { entries = new PagedGrowableWriter(16, BLOCK_SIZE_BYTES, 8, PackedInts.COMPACT); mask = 15; + copiedNodes = new ByteBlockPool(new ByteBlockPool.DirectAllocator()); } public PagedGrowableHash(long lastNodeAddress, long size) { @@ -290,16 +309,25 @@ final class NodeHash<T> { size, BLOCK_SIZE_BYTES, PackedInts.bitsRequired(lastNodeAddress), PackedInts.COMPACT); mask = size - 1; assert (mask & size) == 0 : "size must be a power-of-2; got size=" + size + " mask=" + mask; + copiedNodes = new ByteBlockPool(new ByteBlockPool.DirectAllocator()); } public long get(long index) { return entries.get(index); } + /* public void set(long index, long pointer) throws IOException { entries.set(index, pointer); count++; } + */ + + public void set(long index, long pointer, int numBytesToCopy) throws IOException { + entries.set(index, pointer); + count++; + numBytesCopied += numBytesToCopy; + } private void rehash(long lastNodeAddress) throws IOException { // double hash table size on each rehash ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org