[ 
https://issues.apache.org/jira/browse/LUCENE-9335?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17319882#comment-17319882
 ] 

Zach Chen commented on LUCENE-9335:
-----------------------------------

I made the following changes, and actually still saw varying benchmark result 
across runs (randomized queries?). I've listed them down below:

Changes in Boolean2ScorerSupplier.java (use DisjunctionSumScorer instead of 
DisjunctionMaxScorer)

 
{code:java}
diff --git 
a/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java 
b/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java
index bdf085d4669..10478ab45bf 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Boolean2ScorerSupplier.java
@@ -238,11 +238,34 @@ final class Boolean2ScorerSupplier extends ScorerSupplier 
{
       //
       // However, as WANDScorer uses more complex algorithm and data 
structure, we would like to
       // still use DisjunctionSumScorer to handle exhaustive pure 
disjunctions, which may be faster
-      if (scoreMode == ScoreMode.TOP_SCORES || minShouldMatch > 1) {
+      boolean isPureDisjunction =
+              subs.get(Occur.FILTER).isEmpty()
+                      && subs.get(Occur.MUST).isEmpty()
+                      && subs.get(Occur.MUST_NOT).isEmpty();
+      // top-level boolean term query
+      boolean allTermScorers =
+              optionalScorers.stream().allMatch(scorer -> scorer instanceof 
TermScorer);
+
+      if (isPureDisjunction && allTermScorers && 
isSimilarCost(optionalScorers) && minShouldMatch <= 1) {
+        return new DisjunctionSumScorer(weight, optionalScorers, scoreMode);
+      } else if (scoreMode == ScoreMode.TOP_SCORES || minShouldMatch > 1) {
         return new WANDScorer(weight, optionalScorers, minShouldMatch, 
scoreMode);
       } else {
         return new DisjunctionSumScorer(weight, optionalScorers, scoreMode);
       }
     }
   }
+
+  private boolean isSimilarCost(List<Scorer> optionalScorers) {
+    long minCost = Long.MAX_VALUE;
+    long maxCost = Long.MIN_VALUE;
+    for (Scorer scorer : optionalScorers) {
+      long cost = scorer.iterator().cost();
+      minCost = Math.min(minCost, cost);
+      maxCost = Math.max(maxCost, cost);
+    }
+
+    // TODO heuristic based cost-similarity threshold
+    return maxCost / minCost < 2;
+  }
 }
{code}
 

 

Changes in benchUtil.py to not verify counts
{code:java}
diff --git a/src/python/benchUtil.py b/src/python/benchUtil.py
index fb50033..3579f45 100644
--- a/src/python/benchUtil.py
+++ b/src/python/benchUtil.py
@@ -1203,7 +1203,7 @@ class RunAlgs:
     cmpRawResults, heapCmp = parseResults(cmpLogFiles)
 
     # make sure they got identical results
-    cmpDiffs = compareHits(baseRawResults, cmpRawResults, self.verifyScores, 
self.verifyCounts)
+    cmpDiffs = compareHits(baseRawResults, cmpRawResults, self.verifyScores, 
False)
 
     baseResults = collateResults(baseRawResults)
     cmpResults = collateResults(cmpRawResults)
{code}
 

Benchmark result 1 with source wikimedium5m:
{code:java}
  TaskQPS baseline      StdDevQPS my_modified_version      StdDev               
 Pct diff p-value
              OrHighHigh       87.27      (4.9%)       51.12      (1.7%)  
-41.4% ( -45% -  -36%) 0.000
               OrHighLow      624.55      (7.6%)      589.16      (8.1%)   
-5.7% ( -19% -   10%) 0.022
               OrHighMed      135.02      (3.7%)      129.51      (6.6%)   
-4.1% ( -13% -    6%) 0.016
                Wildcard      214.30      (3.3%)      209.33      (2.8%)   
-2.3% (  -8% -    3%) 0.017
           OrNotHighHigh      728.60      (8.5%)      713.53      (6.3%)   
-2.1% ( -15% -   13%) 0.383
                HighTerm     1195.98      (6.0%)     1174.51      (4.2%)   
-1.8% ( -11% -    8%) 0.273
                 LowTerm     1757.60      (6.0%)     1728.64      (4.8%)   
-1.6% ( -11% -    9%) 0.336
              AndHighMed      231.78      (4.0%)      227.96      (3.7%)   
-1.6% (  -8% -    6%) 0.175
                 Prefix3      196.03      (3.4%)      193.19      (3.5%)   
-1.4% (  -8% -    5%) 0.180
                 Respell       59.52      (2.8%)       59.05      (2.7%)   
-0.8% (  -6% -    4%) 0.362
                 MedTerm     1507.60      (5.3%)     1495.89      (3.3%)   
-0.8% (  -8% -    8%) 0.580
    BrowseDateTaxoFacets       11.04      (3.3%)       10.97      (2.8%)   
-0.7% (  -6% -    5%) 0.462
   BrowseMonthTaxoFacets       13.21      (3.4%)       13.12      (3.9%)   
-0.7% (  -7% -    6%) 0.542
         MedSloppyPhrase       67.05      (3.4%)       66.58      (4.0%)   
-0.7% (  -7% -    6%) 0.544
                  IntNRQ      215.89      (4.2%)      214.39      (2.9%)   
-0.7% (  -7% -    6%) 0.543
BrowseDayOfYearTaxoFacets       11.02      (3.2%)       10.96      (2.7%)   
-0.6% (  -6% -    5%) 0.546
               LowPhrase      193.14      (4.0%)      192.05      (4.5%)   
-0.6% (  -8% -    8%) 0.678
BrowseDayOfYearSSDVFacets       27.80      (5.2%)       27.67      (5.5%)   
-0.5% ( -10% -   10%) 0.781
            OrHighNotLow      823.92      (6.1%)      820.15      (4.8%)   
-0.5% ( -10% -   11%) 0.790
                PKLookup      215.92      (3.9%)      215.02      (3.8%)   
-0.4% (  -7% -    7%) 0.734
                  Fuzzy1       65.82      (7.8%)       65.58     (11.0%)   
-0.4% ( -17% -   20%) 0.904
        HighSloppyPhrase       42.05      (3.9%)       41.91      (3.3%)   
-0.3% (  -7% -    7%) 0.771
             MedSpanNear      155.25      (3.5%)      154.78      (3.3%)   
-0.3% (  -6% -    6%) 0.779
             AndHighHigh       84.97      (4.4%)       84.79      (3.0%)   
-0.2% (  -7% -    7%) 0.857
         LowSloppyPhrase      100.76      (3.6%)      100.55      (3.6%)   
-0.2% (  -7% -    7%) 0.857
    HighIntervalsOrdered       42.39      (3.4%)       42.34      (3.7%)   
-0.1% (  -6% -    7%) 0.921
   HighTermDayOfYearSort      210.65     (15.0%)      210.79     (11.5%)    
0.1% ( -23% -   31%) 0.987
              HighPhrase      468.21      (4.5%)      468.66      (4.0%)    
0.1% (  -7% -    8%) 0.943
            HighSpanNear      148.68      (3.6%)      148.94      (3.6%)    
0.2% (  -6% -    7%) 0.880
            OrNotHighMed      682.83      (7.2%)      684.51      (4.4%)    
0.2% ( -10% -   12%) 0.896
            OrHighNotMed      733.07      (7.3%)      736.52      (4.9%)    
0.5% ( -10% -   13%) 0.811
           OrHighNotHigh      638.40      (7.2%)      642.31      (4.5%)    
0.6% ( -10% -   13%) 0.747
   BrowseMonthSSDVFacets       31.42      (5.2%)       31.65      (2.6%)    
0.7% (  -6% -    8%) 0.577
              AndHighLow      923.45      (5.9%)      933.64      (5.3%)    
1.1% (  -9% -   13%) 0.534
               MedPhrase      347.33      (5.7%)      351.57      (3.3%)    
1.2% (  -7% -   10%) 0.404
             LowSpanNear      311.32      (6.2%)      315.13      (4.2%)    
1.2% (  -8% -   12%) 0.466
                  Fuzzy2       59.05     (12.3%)       60.19     (10.6%)    
1.9% ( -18% -   28%) 0.594
            OrNotHighLow      851.87      (6.4%)      869.95      (5.6%)    
2.1% (  -9% -   15%) 0.263
       HighTermMonthSort       99.63     (12.7%)      102.07     (15.1%)    
2.5% ( -22% -   34%) 0.578
    HighTermTitleBDVSort      161.36     (16.9%)      165.53     (18.3%)    
2.6% ( -27% -   45%) 0.643
              TermDTSort      195.70     (11.5%)      201.16     (10.4%)    
2.8% ( -17% -   27%) 0.420
WARNING: cat=OrHighHigh: hit counts differ: 13070+ vs 357939+


{code}
 

Benchmark result 2 with source wikimedium5m:
{code:java}
        TaskQPS baseline      StdDevQPS my_modified_version      StdDev         
       Pct diff p-value
                  Fuzzy1       69.00     (13.7%)       64.57     (14.5%)   
-6.4% ( -30% -   25%) 0.150
                  Fuzzy2       50.00     (15.2%)       47.36     (17.9%)   
-5.3% ( -33% -   32%) 0.313
            OrHighNotLow      780.22      (5.3%)      764.22      (3.9%)   
-2.1% ( -10% -    7%) 0.165
               OrHighLow      235.87      (3.5%)      232.57      (3.2%)   
-1.4% (  -7% -    5%) 0.187
            OrHighNotMed      741.20      (4.0%)      733.20      (4.9%)   
-1.1% (  -9% -    8%) 0.450
           OrNotHighHigh      850.77      (5.9%)      842.95      (5.3%)   
-0.9% ( -11% -   10%) 0.606
                  IntNRQ      159.89      (1.5%)      158.54      (3.1%)   
-0.8% (  -5% -    3%) 0.270
               OrHighMed      161.49      (5.0%)      160.24      (6.0%)   
-0.8% ( -11% -   10%) 0.659
                 MedTerm     1534.83      (4.9%)     1524.53      (5.7%)   
-0.7% ( -10% -   10%) 0.691
                 LowTerm     1854.25      (6.1%)     1842.02      (5.7%)   
-0.7% ( -11% -   11%) 0.725
                 Respell       67.40      (1.8%)       66.96      (2.5%)   
-0.7% (  -4% -    3%) 0.346
                Wildcard      222.44      (2.5%)      221.67      (2.3%)   
-0.3% (  -5% -    4%) 0.645
                 Prefix3      213.45      (3.6%)      212.73      (3.9%)   
-0.3% (  -7% -    7%) 0.776
              OrHighHigh       62.29      (2.8%)       62.08      (2.5%)   
-0.3% (  -5% -    5%) 0.700
BrowseDayOfYearSSDVFacets       27.76      (7.0%)       27.69      (6.9%)   
-0.3% ( -13% -   14%) 0.897
            HighSpanNear      108.87      (2.3%)      108.63      (3.8%)   
-0.2% (  -6% -    6%) 0.820
   BrowseMonthTaxoFacets       13.29      (2.3%)       13.30      (1.9%)    
0.0% (  -4% -    4%) 0.940
         LowSloppyPhrase      171.64      (2.8%)      171.88      (2.9%)    
0.1% (  -5% -    6%) 0.875
                PKLookup      217.98      (3.9%)      218.52      (3.6%)    
0.2% (  -7% -    8%) 0.836
                HighTerm     1392.99      (4.5%)     1396.79      (4.5%)    
0.3% (  -8% -    9%) 0.847
    HighIntervalsOrdered       29.54      (2.7%)       29.63      (2.6%)    
0.3% (  -4% -    5%) 0.732
              AndHighLow      797.99      (4.0%)      800.55      (3.7%)    
0.3% (  -7% -    8%) 0.792
            OrNotHighLow      885.90      (4.3%)      888.91      (5.4%)    
0.3% (  -8% -   10%) 0.826
         MedSloppyPhrase       65.81      (2.9%)       66.06      (3.1%)    
0.4% (  -5% -    6%) 0.689
             LowSpanNear       59.31      (2.6%)       59.55      (2.5%)    
0.4% (  -4% -    5%) 0.609
    BrowseDateTaxoFacets       11.19      (2.8%)       11.25      (2.8%)    
0.5% (  -5% -    6%) 0.542
               LowPhrase      170.61      (2.6%)      171.55      (2.6%)    
0.6% (  -4% -    5%) 0.502
             MedSpanNear      192.13      (3.2%)      193.32      (2.2%)    
0.6% (  -4% -    6%) 0.469
BrowseDayOfYearTaxoFacets       11.20      (2.9%)       11.28      (2.9%)    
0.7% (  -4% -    6%) 0.460
        HighSloppyPhrase       82.88      (5.4%)       83.47      (5.5%)    
0.7% (  -9% -   12%) 0.681
   BrowseMonthSSDVFacets       31.59      (4.7%)       31.91      (2.0%)    
1.0% (  -5% -    8%) 0.387
               MedPhrase      138.53      (2.1%)      140.00      (2.7%)    
1.1% (  -3% -    5%) 0.164
              HighPhrase      294.46      (2.7%)      297.99      (2.3%)    
1.2% (  -3% -    6%) 0.135
           OrHighNotHigh      654.84      (5.4%)      663.25      (4.8%)    
1.3% (  -8% -   12%) 0.427
       HighTermMonthSort      175.68     (10.6%)      178.02     (11.3%)    
1.3% ( -18% -   25%) 0.700
   HighTermDayOfYearSort      301.64     (16.6%)      306.26     (17.4%)    
1.5% ( -27% -   42%) 0.776
            OrNotHighMed      694.25      (6.0%)      705.04      (5.6%)    
1.6% (  -9% -   13%) 0.396
             AndHighHigh      105.76      (2.4%)      107.44      (3.4%)    
1.6% (  -4% -    7%) 0.087
              TermDTSort      227.39     (12.5%)      232.52     (11.9%)    
2.3% ( -19% -   30%) 0.559
              AndHighMed      241.36      (2.9%)      246.82      (3.5%)    
2.3% (  -4% -    8%) 0.026
    HighTermTitleBDVSort      345.18     (13.3%)      361.51     (14.7%)    
4.7% ( -20% -   37%) 0.286


{code}
 

> Add a bulk scorer for disjunctions that does dynamic pruning
> ------------------------------------------------------------
>
>                 Key: LUCENE-9335
>                 URL: https://issues.apache.org/jira/browse/LUCENE-9335
>             Project: Lucene - Core
>          Issue Type: Improvement
>            Reporter: Adrien Grand
>            Priority: Minor
>
> Lucene often gets benchmarked against other engines, e.g. against Tantivy and 
> PISA at [https://tantivy-search.github.io/bench/] or against research 
> prototypes in Table 1 of 
> [https://cs.uwaterloo.ca/~jimmylin/publications/Grand_etal_ECIR2020_preprint.pdf].
>  Given that top-level disjunctions of term queries are commonly used for 
> benchmarking, it would be nice to optimize this case a bit more, I suspect 
> that we could make fewer per-document decisions by implementing a BulkScorer 
> instead of a Scorer.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to