dsmiley commented on a change in pull request #1037: LUCENE-9062: QueryVisitor.consumeTermsMatching URL: https://github.com/apache/lucene-solr/pull/1037#discussion_r350726970
########## File path: lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java ########## @@ -47,44 +47,39 @@ public boolean run(byte[] s, int offset, int length) { return accept[p]; } - public CharArrayMatcher asCharArrayMatcher(String label) { - return new CharArrayMatcher() { - @Override - public boolean run(char[] chars, int offset, int length) { - int state = 0; - final int maxIdx = offset + length; - for (int i = offset; i < maxIdx; i++) { - final int code = chars[i]; - int b; - // UTF16 to UTF8 (inlined logic from UnicodeUtil.UTF16toUTF8 ) - if (code < 0x80) { - state = step(state, code); - if (state == -1) return false; - } else if (code < 0x800) { - b = (0xC0 | (code >> 6)); - state = step(state, b); - if (state == -1) return false; - b = (0x80 | (code & 0x3F)); - state = step(state, b); + /** + * Returns a representation of the automaton that matches char[] instead of byte[] + */ + public CharArrayMatcher asCharArrayMatcher() { + return (chars, offset, length) -> { + int state = 0; + final int maxIdx = offset + length; + for (int i = offset; i < maxIdx; i++) { + final int code = chars[i]; + int b; + // UTF16 to UTF8 (inlined logic from UnicodeUtil.UTF16toUTF8 ) + if (code < 0x80) { + state = step(state, code); + if (state == -1) return false; + } else if (code < 0x800) { + b = (0xC0 | (code >> 6)); + state = step(state, b); + if (state == -1) return false; + b = (0x80 | (code & 0x3F)); + state = step(state, b); + if (state == -1) return false; + } else { + // more complex + byte[] utf8Bytes = new byte[4 * (maxIdx - i)]; Review comment: We should probably create/use this once instead of for each such char? I know it was like this before; you needn't touch it. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org