Jackie-Jiang commented on code in PR #10254: URL: https://github.com/apache/pinot/pull/10254#discussion_r1107837904
########## pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java: ########## @@ -157,9 +160,11 @@ public static final class DictionaryBasedNotInPredicateEvaluator extends BaseDic int[] _matchingDictIds; int[] _nonMatchingDictIds; - DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary, DataType dataType) { + DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary, DataType dataType, + @Nullable QueryContext queryContext) { super(notInPredicate); - _nonMatchingDictIdSet = PredicateUtils.getDictIdSet(notInPredicate, dictionary, dataType); + _nonMatchingDictIdSet = PredicateUtils.getDictIdSet(notInPredicate, dictionary, dataType, + queryContext); Review Comment: (nit) Should be one line ########## pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java: ########## @@ -139,11 +144,22 @@ public static IntSet getDictIdSet(BaseInPredicate inPredicate, Dictionary dictio } break; case STRING: - for (String value : values) { - int dictId = dictionary.indexOf(value); - if (dictId >= 0) { - dictIdSet.add(dictId); + if (queryContext == null || values.size() < Integer.parseInt(queryContext.getQueryOptions() + .getOrDefault(CommonConstants.Broker.Request.QueryOptionKey.IN_PREDICATE_SORT_THRESHOLD, + CommonConstants.Broker.Request.QueryOptionValue.DEFAULT_IN_PREDICATE_SORT_THRESHOLD))) { + for (String value : values) { + int dictId = dictionary.indexOf(value); + if (dictId >= 0) { + dictIdSet.add(dictId); + } } + } else { + List<String> sortedValues = + queryContext.getOrComputeSharedValue(List.class, Equivalence.identity().wrap(inPredicate), v -> { Review Comment: (minor) ```suggestion queryContext.getOrComputeSharedValue(List.class, Equivalence.identity().wrap(inPredicate), k -> { ``` ########## pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java: ########## @@ -139,11 +144,22 @@ public static IntSet getDictIdSet(BaseInPredicate inPredicate, Dictionary dictio } break; case STRING: - for (String value : values) { - int dictId = dictionary.indexOf(value); - if (dictId >= 0) { - dictIdSet.add(dictId); + if (queryContext == null || values.size() < Integer.parseInt(queryContext.getQueryOptions() + .getOrDefault(CommonConstants.Broker.Request.QueryOptionKey.IN_PREDICATE_SORT_THRESHOLD, + CommonConstants.Broker.Request.QueryOptionValue.DEFAULT_IN_PREDICATE_SORT_THRESHOLD))) { + for (String value : values) { + int dictId = dictionary.indexOf(value); + if (dictId >= 0) { + dictIdSet.add(dictId); + } } + } else { + List<String> sortedValues = + queryContext.getOrComputeSharedValue(List.class, Equivalence.identity().wrap(inPredicate), v -> { + values.sort(String::compareTo); Review Comment: This can potentially cause problem as `values` is shared across segments, and this method is modifying it. We might want to make a clone of the list and sort on the clone. (minor) This is equivalent to `values.sort(null)` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java: ########## @@ -279,4 +281,34 @@ protected byte[] getBytes(int dictId) { protected byte[] getBuffer() { return new byte[_numBytesPerValue]; } + + /** + * Returns the dictionary id for the given sorted values. + * @param sortedValues + * @param dictIds + */ + public void getDictIds(List<String> sortedValues, IntSet dictIds) { Review Comment: (minor) Annotate with `@Override` ########## pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java: ########## @@ -157,9 +160,11 @@ public static final class DictionaryBasedNotInPredicateEvaluator extends BaseDic int[] _matchingDictIds; int[] _nonMatchingDictIds; - DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary, DataType dataType) { + DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary, DataType dataType, + QueryContext queryContext) { Review Comment: Annotate it as `Nullable` ########## pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java: ########## @@ -139,11 +144,22 @@ public static IntSet getDictIdSet(BaseInPredicate inPredicate, Dictionary dictio } break; case STRING: - for (String value : values) { - int dictId = dictionary.indexOf(value); - if (dictId >= 0) { - dictIdSet.add(dictId); + if (queryContext == null || values.size() < Integer.parseInt(queryContext.getQueryOptions() Review Comment: (minor) Suggest changing to `<=` for consistency, we usually treat threshold as exclusive ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BaseImmutableDictionary.java: ########## @@ -279,4 +281,34 @@ protected byte[] getBytes(int dictId) { protected byte[] getBuffer() { return new byte[_numBytesPerValue]; } + + /** + * Returns the dictionary id for the given sorted values. + * @param sortedValues + * @param dictIds + */ + public void getDictIds(List<String> sortedValues, IntSet dictIds) { + int valueIdx = 0; + int dictIdx = 0; + byte[] utf8 = null; + boolean needNewUtf8 = true; + while (valueIdx < sortedValues.size() && dictIdx < length()) { Review Comment: Consider caching the size/length to local variable -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org