npawar commented on code in PR #16832:
URL: https://github.com/apache/pinot/pull/16832#discussion_r2360440498


##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java:
##########
@@ -48,9 +52,41 @@ private RegexpLikePredicateEvaluatorFactory() {
    * @return Dictionary based REGEXP_LIKE predicate evaluator
    */
   public static BaseDictionaryBasedPredicateEvaluator 
newDictionaryBasedEvaluator(
-      RegexpLikePredicate regexpLikePredicate, Dictionary dictionary, DataType 
dataType) {
+      RegexpLikePredicate regexpLikePredicate, Dictionary dictionary, DataType 
dataType,
+      @Nullable QueryContext queryContext) {
     Preconditions.checkArgument(dataType.getStoredType() == DataType.STRING, 
"Unsupported data type: " + dataType);
-    if (dictionary.length() < DICTIONARY_CARDINALITY_THRESHOLD_FOR_SCAN) {
+
+    // 1. If useDictForRegexpLikePredicate is set to true, always use 
dictionary
+    if (queryContext != null && queryContext.getQueryOptions() != null) {
+      String useDictOption =
+          
queryContext.getQueryOptions().get(QueryOptionKey.USE_DICT_FOR_REGEXP_LIKE_PREDICATE_OPTION);
+      if ("true".equalsIgnoreCase(useDictOption)) {
+        return new 
DictIdBasedRegexpLikePredicateEvaluator(regexpLikePredicate, dictionary);
+      }
+    }
+
+    // 2. Otherwise, get the threshold number from 
regexpDictCardinalityThreshold (default 10K)
+    int cardinalityThreshold = 
QueryOptionKey.DEFAULT_DICTIONARY_CARDINALITY_THRESHOLD;

Review Comment:
   can we make the default as max(10k, 10% of total docs) ?



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java:
##########
@@ -48,9 +52,41 @@ private RegexpLikePredicateEvaluatorFactory() {
    * @return Dictionary based REGEXP_LIKE predicate evaluator
    */
   public static BaseDictionaryBasedPredicateEvaluator 
newDictionaryBasedEvaluator(
-      RegexpLikePredicate regexpLikePredicate, Dictionary dictionary, DataType 
dataType) {
+      RegexpLikePredicate regexpLikePredicate, Dictionary dictionary, DataType 
dataType,
+      @Nullable QueryContext queryContext) {
     Preconditions.checkArgument(dataType.getStoredType() == DataType.STRING, 
"Unsupported data type: " + dataType);
-    if (dictionary.length() < DICTIONARY_CARDINALITY_THRESHOLD_FOR_SCAN) {
+
+    // 1. If useDictForRegexpLikePredicate is set to true, always use 
dictionary
+    if (queryContext != null && queryContext.getQueryOptions() != null) {
+      String useDictOption =
+          
queryContext.getQueryOptions().get(QueryOptionKey.USE_DICT_FOR_REGEXP_LIKE_PREDICATE_OPTION);
+      if ("true".equalsIgnoreCase(useDictOption)) {
+        return new 
DictIdBasedRegexpLikePredicateEvaluator(regexpLikePredicate, dictionary);
+      }
+    }
+
+    // 2. Otherwise, get the threshold number from 
regexpDictCardinalityThreshold (default 10K)
+    int cardinalityThreshold = 
QueryOptionKey.DEFAULT_DICTIONARY_CARDINALITY_THRESHOLD;
+    if (queryContext != null && queryContext.getQueryOptions() != null) {
+      String queryOptionValue =
+          
queryContext.getQueryOptions().get(QueryOptionKey.REGEXP_DICTIONARY_CARDINALITY_THRESHOLD_OPTION);

Review Comment:
   A percent threshold would be better than having a fixed value?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to