madrob commented on a change in pull request #1042: LUCENE-9068: Build 
FuzzyQuery automata up-front
URL: https://github.com/apache/lucene-solr/pull/1042#discussion_r362871453
 
 

 ##########
 File path: lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
 ##########
 @@ -92,76 +105,62 @@
    * 
    * @param terms Delivers terms.
    * @param atts {@link AttributeSource} created by the rewrite method of 
{@link MultiTermQuery}
-   * thats contains information about competitive boosts during rewrite. It is 
also used
-   * to cache DFAs between segment transitions.
+   *              that contains information about competitive boosts during 
rewrite
    * @param term Pattern term.
    * @param maxEdits Maximum edit distance.
-   * @param prefixLength Length of required common prefix. Default value is 0.
+   * @param automata An array of levenshtein automata to match against terms,
+   *                 see {@link #buildAutomata(String, int[], int, boolean, 
int)}
    * @throws IOException if there is a low-level IO error
    */
-  public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, 
-      final int maxEdits, final int prefixLength, boolean transpositions) 
throws IOException {
-    if (maxEdits < 0 || maxEdits > 
LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
-      throw new IllegalArgumentException("max edits must be 0.." + 
LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + ", inclusive; got: " + 
maxEdits);
-    }
-    if (prefixLength < 0) {
-      throw new IllegalArgumentException("prefixLength cannot be less than 0");
-    }
+  public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, int 
termLength,
+      final int maxEdits, CompiledAutomaton[] automata) throws IOException {
+
     this.maxEdits = maxEdits;
     this.terms = terms;
     this.term = term;
-    
-    // convert the string into a utf32 int[] representation for fast 
comparisons
-    this.termText = stringToUTF32(term.text());
-    this.termLength = termText.length;
+    this.atts = atts;
+    this.termLength = termLength;
 
-    this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);
     this.maxBoostAtt = 
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
+    this.boostAtt = atts.addAttribute(BoostAttribute.class);
 
-    // NOTE: boostAtt must pulled from attributes() not from atts!  This is 
because TopTermsRewrite looks for boostAtt from this TermsEnum's
-    // private attributes() and not the global atts passed to us from 
MultiTermQuery:
-    this.boostAtt = attributes().addAttribute(BoostAttribute.class);
-
-    //The prefix could be longer than the word.
-    //It's kind of silly though.  It means we must match the entire word.
-    this.realPrefixLength = prefixLength > termLength ? termLength : 
prefixLength;
-    this.transpositions = transpositions;
-
-    CompiledAutomaton[] prevAutomata = dfaAtt.automata();
-    if (prevAutomata == null) {
-      prevAutomata = new CompiledAutomaton[maxEdits+1];
-      Automaton[] automata = buildAutomata(termText, prefixLength, 
transpositions, maxEdits);
-      for (int i = 0; i <= maxEdits; i++) {
-        try {
-          prevAutomata[i] = new CompiledAutomaton(automata[i], true, false);
-        } catch (TooComplexToDeterminizeException e) {
-          throw new FuzzyTermsException(term.text(), e);
-        }
-      }
-      // first segment computes the automata, and we share with subsequent 
segments via this Attribute:
-      dfaAtt.setAutomata(prevAutomata);
-    }
+    this.automata = automata;
 
-    this.automata = prevAutomata;
     bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
     bottomTerm = maxBoostAtt.getCompetitiveTerm();
     bottomChanged(null);
   }
 
   /**
-   * Builds a binary Automaton to match a fuzzy term
-   * @param text            the term to match
-   * @param prefixLength    length of a required common prefix
-   * @param transpositions  {@code true} if transpositions should count as a 
single edit
-   * @param maxEdits        the maximum edit distance of matching terms
+   * Sets the maximum non-competitive boost, which may allow switching to a
+   * lower max-edit automaton at run time
+   */
+  public void setMaxNonCompetitiveBoost(float boost) {
+    this.maxBoostAtt.setMaxNonCompetitiveBoost(boost);
 
 Review comment:
   Does this need to call `bottomChanged`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to