rmuir commented on code in PR #14381: URL: https://github.com/apache/lucene/pull/14381#discussion_r2007499003
########## lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java: ########## @@ -778,6 +786,53 @@ private int[] toCaseInsensitiveChar(int codepoint) { } } + /** + * Expands range to include case-insensitive matches. + * + * <p>This is expensive: case-insensitive range involves iterating over the range space, adding + * alternatives. Jump on the grenade here, contain CPU and memory explosion just to this method + * activated by optional flag. + */ + private void expandCaseInsensitiveRange( + int start, int end, List<Integer> rangeStarts, List<Integer> rangeEnds) { + if (start > end) + throw new IllegalArgumentException( + "invalid range: from (" + start + ") cannot be > to (" + end + ")"); + + // contain the explosion of transitions by using a throwaway state + Automaton scratch = new Automaton(); + int state = scratch.createState(); + + // iterate over range, adding codepoint and any alternatives as transitions + for (int i = start; i <= end; i++) { + scratch.addTransition(state, state, i); + int[] altCodePoints = CaseFolding.lookupAlternates(i); + if (altCodePoints != null) { + for (int alt : altCodePoints) { + scratch.addTransition(state, state, alt); + } + } else { + int altCase = + Character.isLowerCase(i) ? Character.toUpperCase(i) : Character.toLowerCase(i); + if (altCase != i) { + scratch.addTransition(state, state, altCase); + } + } + } Review Comment: this one is best as a separate PR. I will work it today. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org