gautamworah96 commented on a change in pull request #2247: URL: https://github.com/apache/lucene-solr/pull/2247#discussion_r572447037
########## File path: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java ########## @@ -353,12 +349,104 @@ public FacetLabel getPath(int ordinal) throws IOException { } synchronized (categoryCache) { - categoryCache.put(catIDInteger, ret); + categoryCache.put(ordinal, ret); } return ret; } + private FacetLabel getPathFromCache(int ordinal) { + // TODO: can we use an int-based hash impl, such as IntToObjectMap, + // wrapped as LRU? + synchronized (categoryCache) { + return categoryCache.get(ordinal); + } + } + + private void isOrdinalInIndexReaderRange(int ordinal, int indexReaderMaxDoc) + throws IllegalArgumentException { + if (ordinal < 0 || ordinal >= indexReaderMaxDoc) { + throw new IllegalArgumentException( + "ordinal " + + ordinal + + " is out of the range of the indexReader " + + indexReader.toString()); + } + } + + /** + * Returns an array of FacetLabels for a given array of ordinals. + * + * <p>This API is generally faster than iteratively calling {@link + * org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader#getPath} over an array of + * ordinals. + * + * <p>This API is only available for Lucene indexes created with 8.7+ codec because it uses + * BinaryDocValues instead of StoredFields. Use the {@link + * org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader#getPath} method for indices + * created with codec version older than 8.7. + * + * @param ordinals Array of ordinals that are assigned to categories inserted into the taxonomy + * index + * @throws IOException if the taxonomy index is created using the older StoredFields based codec. + */ + public FacetLabel[] getBulkPath(int... ordinals) throws IOException { + ensureOpen(); + + FacetLabel[] bulkPath = new FacetLabel[ordinals.length]; + // remember the original positions of ordinals before they are sorted + IntIntScatterMap originalPosition = new IntIntScatterMap(); + int indexReaderMaxDoc = indexReader.maxDoc(); + for (int i = 0; i < ordinals.length; i++) { + // check whether the ordinal is valid before accessing the cache + isOrdinalInIndexReaderRange(ordinals[i], indexReaderMaxDoc); + // check the cache before trying to find it in the index + FacetLabel ordinalPath = getPathFromCache(ordinals[i]); + if (ordinalPath != null) { + bulkPath[i] = ordinalPath; + } + originalPosition.put(ordinals[i], i); + } + + Arrays.sort(ordinals); + int readerIndex; + int leafReaderMaxDoc = 0; + int leafReaderDocBase = 0; + LeafReader leafReader; + LeafReaderContext leafReaderContext = null; + BinaryDocValues values = null; + + for (int ord : ordinals) { + if (bulkPath[originalPosition.get(ord)] == null) { + if (values == null || ord > leafReaderMaxDoc) { + + readerIndex = ReaderUtil.subIndex(ord, indexReader.leaves()); + leafReaderContext = indexReader.leaves().get(readerIndex); + leafReader = leafReaderContext.reader(); + leafReaderMaxDoc = leafReader.maxDoc(); + leafReaderDocBase = leafReaderContext.docBase; + values = leafReader.getBinaryDocValues(Consts.FULL); + + // this check is only needed once to confirm that the index uses BinaryDocValues + boolean success = values.advanceExact(ord - leafReaderDocBase); + if (success == false) { + throw new IOException( Review comment: Done. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org