msokolov commented on a change in pull request #157: URL: https://github.com/apache/lucene/pull/157#discussion_r643925065
########## File path: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java ########## @@ -273,6 +260,32 @@ private boolean releaseBufferedToken() { return false; } + /** + * Free output nodes before the given outputs. Free inputs nodes before the minimum input node for + * this output. + * + * @param output target output node + */ + private void freeBefore(OutputNode output) { + // We've released all of the tokens that end at the current output, + // so free all output nodes before this. Input nodes are more complex. + // The second shingled tokens with alternate paths can appear later in the output graph than Review comment: "than than" ########## File path: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java ########## @@ -273,6 +260,32 @@ private boolean releaseBufferedToken() { return false; } + /** + * Free output nodes before the given outputs. Free inputs nodes before the minimum input node for Review comment: "Free input nodes" I think ########## File path: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java ########## @@ -310,8 +323,11 @@ public boolean incrementToken() throws IOException { int outMax = outputNodes.getMaxPos(); // If positionIncrement > 1 this node should be at the end of the flattened graph if (positionIncrement > 1 && src.outputNode < outMax) { - // We crossed a gap that we need to account for. This node exists from a length >1 path - // jumping to get here. + // If there was a hole at the end of an alternate path then the input and output nodes Review comment: minor, but: if you use block comments, then our autoformatter won't apply its annoying line-break rules :) ########## File path: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java ########## @@ -273,6 +260,32 @@ private boolean releaseBufferedToken() { return false; } + /** + * Free output nodes before the given outputs. Free inputs nodes before the minimum input node for + * this output. + * + * @param output target output node + */ + private void freeBefore(OutputNode output) { + // We've released all of the tokens that end at the current output, + // so free all output nodes before this. Input nodes are more complex. + // The second shingled tokens with alternate paths can appear later in the output graph than + // than some of their alternate path tokens. + // Because of this case we can only free from the minimum because the minimum node will have + // come from before the second shingled token. + // This means we have to hold onto input nodes who's tokens get stacked on previous nodes until Review comment: "whose" ########## File path: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java ########## @@ -392,12 +408,20 @@ private OutputNode recoverFromHole(InputNode src, int startOffset) { int maxOutIndex = outputNodes.getMaxPos(); OutputNode outSrc = outputNodes.get(maxOutIndex); // There are two types of holes, neighbor holes and consumed holes. A neighbor hole is between - // two tokens. A consumed hole is - // between the start a long token and the next token that is "under" the path of the long token. - // A consumed hole should have the outputsrc node of the short token be the out dest + // two tokens, it looks like a->*hole*->b. + // A consumed hole is between the start a long token and the next token that is "under" the path + // of the long token. + // It looks like : ___abc__ + // | | Review comment: Have you run the formatter? I think it might mess these pictures up unless you use block comments -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org