epotyom commented on code in PR #13568: URL: https://github.com/apache/lucene/pull/13568#discussion_r1711353146
########## lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/SandboxFacetTestCase.java: ########## @@ -0,0 +1,407 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.sandbox.facet; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.FacetsConfig; +import org.apache.lucene.facet.LabelAndValue; +import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.TaxonomyFacetLabels; +import org.apache.lucene.facet.taxonomy.TaxonomyFacetLabels.FacetLabelReader; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.sandbox.facet.abstracts.OrdLabelBiMap; +import org.apache.lucene.sandbox.facet.abstracts.OrdToComparable; +import org.apache.lucene.sandbox.facet.abstracts.OrdinalIterator; +import org.apache.lucene.sandbox.facet.ordinal_iterators.TopnOrdinalIterator; +import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder; +import org.apache.lucene.sandbox.facet.taxonomy.TaxonomyChildrenOrdinalIterator; +import org.apache.lucene.sandbox.facet.taxonomy.TaxonomyOrdLabelBiMap; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BytesRef; + +public abstract class SandboxFacetTestCase extends LuceneTestCase { + // we don't have access to overall count for all facets from count recorder, + // and we can't compute it as a SUM of values for each facet ID because we need to respect cases + // where + // the same doc belongs to multiple facets (e.g. overlapping ranges and + // multi value fields). We can add an extra range that includes everything, + // or consider supporting overall count in CountFacetRecorder. But it is not exactly the value + // we can get now, as this value wouldn't respect top-n cutoff. Is this value a must have facets + // feature? + static final int VALUE_CANT_BE_COMPUTED = -5; + + /** + * Utility method that uses {@link FacetLabelReader} to get facet labels for each hit in {@link + * MatchingDocs}. The method returns {@code List<List<FacetLabel>>} where outer list has one entry + * per document and inner list has all {@link FacetLabel} entries that belong to a document. The + * inner list may be empty if no {@link FacetLabel} are found for a hit. + * + * @param taxoReader {@link TaxonomyReader} used to read taxonomy during search. This instance is + * expected to be open for reading. + * @param fc {@link FacetsCollector} A collector with matching hits. + * @param dimension facet dimension for which labels are requested. A null value fetches labels + * for all dimensions. + * @return {@code List<List<FacetLabel>} where outer list has one non-null entry per document. and + * inner list contain all {@link FacetLabel} entries that belong to a document. + * @throws IOException when a low-level IO issue occurs. + */ + public List<List<FacetLabel>> getAllTaxonomyFacetLabels( + String dimension, TaxonomyReader taxoReader, FacetsCollector fc) throws IOException { + List<List<FacetLabel>> actualLabels = new ArrayList<>(); + TaxonomyFacetLabels taxoLabels = + new TaxonomyFacetLabels(taxoReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME); + for (MatchingDocs m : fc.getMatchingDocs()) { + FacetLabelReader facetLabelReader = taxoLabels.getFacetLabelReader(m.context); + DocIdSetIterator disi = m.bits.iterator(); + while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + actualLabels.add(allFacetLabels(disi.docID(), dimension, facetLabelReader)); + } + } + return actualLabels; + } + + /** + * Utility method to get all facet labels for an input docId and dimension using the supplied + * {@link FacetLabelReader}. + * + * @param docId docId for which facet labels are needed. + * @param dimension Retain facet labels for supplied dimension only. A null value fetches all + * facet labels. + * @param facetLabelReader {@FacetLabelReader} instance use to get facet labels for input docId. + * @return {@code List<FacetLabel>} containing matching facet labels. + * @throws IOException when a low-level IO issue occurs while reading facet labels. + */ + List<FacetLabel> allFacetLabels(int docId, String dimension, FacetLabelReader facetLabelReader) + throws IOException { + List<FacetLabel> facetLabels = new ArrayList<>(); + FacetLabel facetLabel; + if (dimension != null) { + for (facetLabel = facetLabelReader.nextFacetLabel(docId, dimension); facetLabel != null; ) { + facetLabels.add(facetLabel); + facetLabel = facetLabelReader.nextFacetLabel(docId, dimension); + } + } else { + for (facetLabel = facetLabelReader.nextFacetLabel(docId); facetLabel != null; ) { + facetLabels.add(facetLabel); + facetLabel = facetLabelReader.nextFacetLabel(docId); + } + } + return facetLabels; + } + + protected String[] getRandomTokens(int count) { + String[] tokens = new String[count]; + for (int i = 0; i < tokens.length; i++) { + tokens[i] = TestUtil.randomRealisticUnicodeString(random(), 1, 10); + // tokens[i] = _TestUtil.randomSimpleString(random(), 1, 10); + } + return tokens; + } + + protected String pickToken(String[] tokens) { Review Comment: I removed the method - thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org