gf2121 commented on code in PR #12825: URL: https://github.com/apache/lucene/pull/12825#discussion_r1398917695
########## lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java: ########## @@ -1087,42 +1087,7 @@ String toTitleCase(String word) { private String sortWordsOffline( Directory tempDir, String tempFileNamePrefix, IndexOutput unsorted) throws IOException { - OfflineSorter sorter = - new OfflineSorter( - tempDir, - tempFileNamePrefix, - new Comparator<>() { - final BytesRef scratch1 = new BytesRef(); - final BytesRef scratch2 = new BytesRef(); - - private void initScratch(BytesRef o, BytesRef scratch) { - scratch.bytes = o.bytes; - scratch.offset = o.offset; - scratch.length = o.length; - - for (int i = scratch.length - 1; i >= 0; i--) { - if (scratch.bytes[scratch.offset + i] == FLAG_SEPARATOR - || scratch.bytes[scratch.offset + i] == MORPH_SEPARATOR) { - scratch.length = i; - break; - } - } - } - - @Override - public int compare(BytesRef o1, BytesRef o2) { - initScratch(o1, scratch1); - initScratch(o2, scratch2); - - int cmp = scratch1.compareTo(scratch2); - if (cmp == 0) { - // tie break on whole row - return o1.compareTo(o2); - } else { - return cmp; - } - } - }); + var sorter = new OfflineSorter(tempDir, tempFileNamePrefix, Comparator.naturalOrder()); Review Comment: Should not block this great optimization, but maybe `BytesRefComparator#NATURAL` will help the performance here as it can take advantage of the radix sorter :) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org