gsmiller commented on code in PR #13568: URL: https://github.com/apache/lucene/pull/13568#discussion_r1693661698
########## lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java: ########## @@ -398,130 +490,56 @@ private <R> ConcurrentDrillSidewaysResult<R> searchSequentially( } Query[] drillDownQueries = query.getDrillDownQueries(); - int numDims = drillDownDims.size(); - - FacetsCollectorManager drillDownCollectorManager = createDrillDownFacetsCollectorManager(); - - FacetsCollectorManager[] drillSidewaysFacetsCollectorManagers = - new FacetsCollectorManager[numDims]; - for (int i = 0; i < numDims; i++) { - drillSidewaysFacetsCollectorManagers[i] = createDrillSidewaysFacetsCollectorManager(); - } - DrillSidewaysQuery dsq = new DrillSidewaysQuery( baseQuery, - drillDownCollectorManager, - drillSidewaysFacetsCollectorManagers, + // drillDownCollectorOwner, + // Don't pass drill down collector because drill down is collected by IndexSearcher + // itself. + // TODO: deprecate drillDown collection in DrillSidewaysQuery? + null, + drillSidewaysCollectorOwners, drillDownQueries, scoreSubDocsAtOnce()); - R collectorResult = searcher.search(dsq, hitCollectorManager); - - FacetsCollector drillDownCollector; - if (drillDownCollectorManager != null) { - drillDownCollector = drillDownCollectorManager.reduce(dsq.managedDrillDownCollectors); - } else { - drillDownCollector = null; - } - - FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[numDims]; - int numSlices = dsq.managedDrillSidewaysCollectors.size(); - - for (int dim = 0; dim < numDims; dim++) { - List<FacetsCollector> facetsCollectorsForDim = new ArrayList<>(numSlices); - - for (int slice = 0; slice < numSlices; slice++) { - facetsCollectorsForDim.add(dsq.managedDrillSidewaysCollectors.get(slice)[dim]); - } - - drillSidewaysCollectors[dim] = - drillSidewaysFacetsCollectorManagers[dim].reduce(facetsCollectorsForDim); - } - - String[] drillSidewaysDims = drillDownDims.keySet().toArray(new String[0]); - - return new ConcurrentDrillSidewaysResult<>( - buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillSidewaysDims), - null, - collectorResult, - drillDownCollector, - drillSidewaysCollectors, - drillSidewaysDims); + searcher.search(dsq, drillDownCollectorOwner); } - @SuppressWarnings("unchecked") - private <R> ConcurrentDrillSidewaysResult<R> searchConcurrently( - final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager) + private void searchConcurrently( + final DrillDownQuery query, + final CollectorOwner<?, ?> drillDownCollectorOwner, + final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners) throws IOException { final Map<String, Integer> drillDownDims = query.getDims(); final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1); - // Add the main DrillDownQuery - FacetsCollectorManager drillDownFacetsCollectorManager = - createDrillDownFacetsCollectorManager(); - CollectorManager<?, ?> mainCollectorManager; - if (drillDownFacetsCollectorManager != null) { - // Make sure we populate a facet collector corresponding to the base query if desired: - mainCollectorManager = - new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager); - } else { - mainCollectorManager = hitCollectorManager; - } - callableCollectors.add(new CallableCollector(-1, searcher, query, mainCollectorManager)); + callableCollectors.add(new CallableCollector(-1, searcher, query, drillDownCollectorOwner)); int i = 0; final Query[] filters = query.getDrillDownQueries(); - for (String dim : drillDownDims.keySet()) + for (String dim : drillDownDims.keySet()) { callableCollectors.add( new CallableCollector( - i++, + i, searcher, getDrillDownQuery(query, filters, dim), - createDrillSidewaysFacetsCollectorManager())); - - final FacetsCollector mainFacetsCollector; - final FacetsCollector[] facetsCollectors = new FacetsCollector[drillDownDims.size()]; - final R collectorResult; + drillSidewaysCollectorOwners.get(i))); + i++; + } try { // Run the query pool - final List<Future<CallableResult>> futures = executor.invokeAll(callableCollectors); - - // Extract the results - if (drillDownFacetsCollectorManager != null) { - // If we populated a facets collector for the main query, make sure to unpack it properly - final Object[] mainResults = (Object[]) futures.get(0).get().result; - mainFacetsCollector = (FacetsCollector) mainResults[0]; - collectorResult = (R) mainResults[1]; - } else { - mainFacetsCollector = null; - collectorResult = (R) futures.get(0).get().result; - } - for (i = 1; i < futures.size(); i++) { - final CallableResult result = futures.get(i).get(); - facetsCollectors[result.pos] = (FacetsCollector) result.result; - } - // Fill the null results with the mainFacetsCollector - for (i = 0; i < facetsCollectors.length; i++) - if (facetsCollectors[i] == null) facetsCollectors[i] = mainFacetsCollector; + final List<Future<Void>> futures = executor.invokeAll(callableCollectors); + // Wait for results. We don't read the results as they are collected by CollectorOwners + for (i = 0; i < futures.size(); i++) { + futures.get(0).get(); Review Comment: Should this be `i` not `0`? ########## lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java: ########## @@ -349,45 +344,142 @@ private DrillDownQuery getDrillDownQuery( public <R> ConcurrentDrillSidewaysResult<R> search( final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager) throws IOException { + // Main query + FacetsCollectorManager drillDownFacetsCollectorManager = + createDrillDownFacetsCollectorManager(); + final CollectorOwner<?, ?> mainCollectorOwner; + if (drillDownFacetsCollectorManager != null) { + // Make sure we populate a facet collector corresponding to the base query if desired: + mainCollectorOwner = + new CollectorOwner<>( + new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager)); + } else { + mainCollectorOwner = new CollectorOwner<>(hitCollectorManager); + } + // Drill sideways dimensions + final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners; + if (query.getDims().isEmpty() == false) { + drillSidewaysCollectorOwners = new ArrayList<>(query.getDims().size()); + for (int i = 0; i < query.getDims().size(); i++) { + drillSidewaysCollectorOwners.add( + new CollectorOwner<>(createDrillSidewaysFacetsCollectorManager())); + } + } else { + drillSidewaysCollectorOwners = null; + } + // Execute query if (executor != null) { - return searchConcurrently(query, hitCollectorManager); + searchConcurrently(query, mainCollectorOwner, drillSidewaysCollectorOwners); } else { - return searchSequentially(query, hitCollectorManager); + searchSequentially(query, mainCollectorOwner, drillSidewaysCollectorOwners); } + + // Collect results + final FacetsCollector facetsCollectorResult; + final R hitCollectorResult; + if (drillDownFacetsCollectorManager != null) { + // drill down collected using MultiCollector + // Extract the results: + Object[] drillDownResult = (Object[]) mainCollectorOwner.getResult(); + facetsCollectorResult = (FacetsCollector) drillDownResult[0]; + hitCollectorResult = (R) drillDownResult[1]; + } else { + facetsCollectorResult = null; + hitCollectorResult = (R) mainCollectorOwner.getResult(); + } + + // Getting results for drill sideways dimensions (if any) + final String[] drillSidewaysDims; + final FacetsCollector[] drillSidewaysCollectors; + if (query.getDims().isEmpty() == false) { + drillSidewaysDims = query.getDims().keySet().toArray(new String[0]); + int numDims = query.getDims().size(); + assert drillSidewaysCollectorOwners != null; + assert drillSidewaysCollectorOwners.size() == numDims; + drillSidewaysCollectors = new FacetsCollector[numDims]; + for (int dim = 0; dim < numDims; dim++) { + drillSidewaysCollectors[dim] = + (FacetsCollector) drillSidewaysCollectorOwners.get(dim).getResult(); + } + } else { + drillSidewaysDims = null; + drillSidewaysCollectors = null; + } + + return new ConcurrentDrillSidewaysResult<>( + buildFacetsResult(facetsCollectorResult, drillSidewaysCollectors, drillSidewaysDims), + null, + hitCollectorResult, + facetsCollectorResult, + drillSidewaysCollectors, + drillSidewaysDims); } - @SuppressWarnings("unchecked") - private <R> ConcurrentDrillSidewaysResult<R> searchSequentially( - final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager) + /** + * Search using DrillDownQuery with custom collectors. This method can be used with any {@link + * CollectorOwner}s. It doesn't return anything because it is expected that you read results from + * provided {@link CollectorOwner}s. + * + * <p>To read the results, run {@link CollectorOwner#getResult()} for drill down and all drill + * sideways dimensions. + * + * <p>Note: use {@link Collections#unmodifiableList(List)} to wrap {@code + * drillSidewaysCollectorOwners} to convince compiler that it is safe to use List here. + * + * <p>TODO: Class CollectorOwner was created so that we can ignore CollectorManager type C, + * because we want each dimensions to be able to use their own types. Alternatively, we can use + * typesafe heterogeneous container and provide CollectorManager type for each dimension to this + * method? I do like CollectorOwner approach as it seems more intuitive? + */ + public void search( + final DrillDownQuery query, + CollectorOwner<?, ?> drillDownCollectorOwner, Review Comment: So if users want to collect hits _and_ do faceting on the results that match the entire query they would use a `MultiCollectorManager` under the hood of this collector owner right? I think that makes sense for this more advanced functionality, but it might be worth making a note of that in the javadoc. ########## lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java: ########## @@ -349,45 +344,142 @@ private DrillDownQuery getDrillDownQuery( public <R> ConcurrentDrillSidewaysResult<R> search( final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager) throws IOException { + // Main query + FacetsCollectorManager drillDownFacetsCollectorManager = + createDrillDownFacetsCollectorManager(); + final CollectorOwner<?, ?> mainCollectorOwner; + if (drillDownFacetsCollectorManager != null) { + // Make sure we populate a facet collector corresponding to the base query if desired: + mainCollectorOwner = + new CollectorOwner<>( + new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager)); + } else { + mainCollectorOwner = new CollectorOwner<>(hitCollectorManager); + } + // Drill sideways dimensions + final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners; + if (query.getDims().isEmpty() == false) { + drillSidewaysCollectorOwners = new ArrayList<>(query.getDims().size()); + for (int i = 0; i < query.getDims().size(); i++) { + drillSidewaysCollectorOwners.add( + new CollectorOwner<>(createDrillSidewaysFacetsCollectorManager())); + } + } else { + drillSidewaysCollectorOwners = null; + } + // Execute query if (executor != null) { - return searchConcurrently(query, hitCollectorManager); + searchConcurrently(query, mainCollectorOwner, drillSidewaysCollectorOwners); } else { - return searchSequentially(query, hitCollectorManager); + searchSequentially(query, mainCollectorOwner, drillSidewaysCollectorOwners); } + + // Collect results + final FacetsCollector facetsCollectorResult; + final R hitCollectorResult; + if (drillDownFacetsCollectorManager != null) { + // drill down collected using MultiCollector + // Extract the results: + Object[] drillDownResult = (Object[]) mainCollectorOwner.getResult(); + facetsCollectorResult = (FacetsCollector) drillDownResult[0]; + hitCollectorResult = (R) drillDownResult[1]; + } else { + facetsCollectorResult = null; + hitCollectorResult = (R) mainCollectorOwner.getResult(); + } + + // Getting results for drill sideways dimensions (if any) + final String[] drillSidewaysDims; + final FacetsCollector[] drillSidewaysCollectors; + if (query.getDims().isEmpty() == false) { + drillSidewaysDims = query.getDims().keySet().toArray(new String[0]); + int numDims = query.getDims().size(); + assert drillSidewaysCollectorOwners != null; + assert drillSidewaysCollectorOwners.size() == numDims; + drillSidewaysCollectors = new FacetsCollector[numDims]; + for (int dim = 0; dim < numDims; dim++) { + drillSidewaysCollectors[dim] = + (FacetsCollector) drillSidewaysCollectorOwners.get(dim).getResult(); + } + } else { + drillSidewaysDims = null; + drillSidewaysCollectors = null; + } + + return new ConcurrentDrillSidewaysResult<>( + buildFacetsResult(facetsCollectorResult, drillSidewaysCollectors, drillSidewaysDims), + null, + hitCollectorResult, + facetsCollectorResult, + drillSidewaysCollectors, + drillSidewaysDims); } - @SuppressWarnings("unchecked") - private <R> ConcurrentDrillSidewaysResult<R> searchSequentially( - final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager) + /** + * Search using DrillDownQuery with custom collectors. This method can be used with any {@link + * CollectorOwner}s. It doesn't return anything because it is expected that you read results from + * provided {@link CollectorOwner}s. + * + * <p>To read the results, run {@link CollectorOwner#getResult()} for drill down and all drill + * sideways dimensions. + * + * <p>Note: use {@link Collections#unmodifiableList(List)} to wrap {@code + * drillSidewaysCollectorOwners} to convince compiler that it is safe to use List here. + * + * <p>TODO: Class CollectorOwner was created so that we can ignore CollectorManager type C, + * because we want each dimensions to be able to use their own types. Alternatively, we can use + * typesafe heterogeneous container and provide CollectorManager type for each dimension to this + * method? I do like CollectorOwner approach as it seems more intuitive? + */ + public void search( + final DrillDownQuery query, + CollectorOwner<?, ?> drillDownCollectorOwner, + List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners) + throws IOException { + if (drillDownCollectorOwner == null) { + throw new IllegalArgumentException( + "This search method requires client to provide drill down collector manager"); + } + if (drillSidewaysCollectorOwners == null) { + if (query.getDims().isEmpty() == false) { + throw new IllegalArgumentException( + "The query requires not null drillSidewaysCollectorOwners"); + } + } else if (drillSidewaysCollectorOwners.size() != query.getDims().size()) { + throw new IllegalArgumentException( + "drillSidewaysCollectorOwners size must be equal to number of dimensions in the query."); + } + if (executor != null) { + searchConcurrently(query, drillDownCollectorOwner, drillSidewaysCollectorOwners); + } else { + searchSequentially(query, drillDownCollectorOwner, drillSidewaysCollectorOwners); + } + + // This method doesn't return results as each dimension might have its own result type. + // But we call getResult to trigger results reducing, so that users don't have to worry about + // it. + // TODO: do we want to run reduce in parallel if executor is provided? Review Comment: Let's add a note that `CollectorOwner#getResult` is not currently written to be particularly threadsafe, so we'd need to address that if we tackle this TODO (I think I left a comment about this elsewhere). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org