Copilot commented on code in PR #61999:
URL: https://github.com/apache/doris/pull/61999#discussion_r3025552081


##########
be/src/exprs/function/in.h:
##########
@@ -113,7 +113,7 @@ class FunctionIn : public IFunction {
                    context->get_arg_type(0)->get_primitive_type() == 
PrimitiveType::TYPE_VARCHAR ||
                    context->get_arg_type(0)->get_primitive_type() == 
PrimitiveType::TYPE_STRING) {
             // the StringValue's memory is held by FunctionContext, so we can 
use StringValueSet here directly
-            
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
+            
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context),
 true));
         } else {
             
state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),

Review Comment:
   This change fixes more than a perf issue: previously 
`create_string_value_set(get_size_with_out_null(context))` could call the `bool 
null_aware` overload via `size_t -> bool`, which makes `null_aware=false` when 
the IN-list contains only NULLs (size==0). That changes query semantics (NULL 
propagation) for string IN/NOT IN with all-NULL constant lists. The PR 
description/checklist currently says no behavior change; it should be updated 
to reflect this bug fix.



##########
be/src/exprs/function/in.h:
##########
@@ -113,7 +113,7 @@ class FunctionIn : public IFunction {
                    context->get_arg_type(0)->get_primitive_type() == 
PrimitiveType::TYPE_VARCHAR ||
                    context->get_arg_type(0)->get_primitive_type() == 
PrimitiveType::TYPE_STRING) {
             // the StringValue's memory is held by FunctionContext, so we can 
use StringValueSet here directly
-            
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
+            
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context),
 true));
         } else {
             
state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),

Review Comment:
   The overload-resolution fix in `open()` is subtle and affects correctness 
for cases like `col IN (NULL)` / `col NOT IN (NULL)` on string types 
(null-aware behavior when the non-null list size is 0). There are existing BE 
tests around `FunctionIn` (e.g. `be/test/testutil/mock/mock_in_expr.cpp`), but 
none appear to cover the all-NULL constant list case for string types. Adding a 
unit/regression test for this scenario would prevent regressions.



##########
be/src/storage/predicate/predicate_creator_in_list_not_in.cpp:
##########
@@ -27,34 +27,42 @@ template <PrimitiveType TYPE, PredicateType PT>
 static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
         const uint32_t cid, const std::string col_name, const 
std::shared_ptr<HybridSetBase>& set,
         bool is_opposite, size_t char_length = 0) {
-    auto set_size = set->size();
-    if (set_size == 1) {
-        return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 2) {
-        return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 3) {
-        return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 4) {
-        return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 5) {
-        return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 6) {
-        return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 7) {
-        return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
-        return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else {
+    // Only string types construct their own HybridSetType in the constructor 
(to convert
+    // from DynamicContainer to FixedContainer<std::string, N>), so N dispatch 
is only needed
+    // for them. All other types directly share the caller's hybrid_set.
+    if constexpr (!is_string_type(TYPE)) {
         return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE + 
1>::create_shared(
                 cid, col_name, set, is_opposite, char_length);
+    } else {
+        auto set_size = set->size();
+        if (set_size == 1) {
+            return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 2) {
+            return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 3) {
+            return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 4) {
+            return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 5) {
+            return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 6) {
+            return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 7) {
+            return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+            return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);

Review Comment:
   The N-dispatch still hard-codes the max fixed-container specialization as 
template parameter `8` when `set_size == FIXED_CONTAINER_MAX_SIZE`. This 
becomes incorrect if `FIXED_CONTAINER_MAX_SIZE` changes. Prefer using 
`InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE>::create_shared(...)` 
(and keep the condition in sync) to avoid magic numbers.
   ```suggestion
               return InListPredicateBase<TYPE, PT, 
FIXED_CONTAINER_MAX_SIZE>::create_shared(
                       cid, col_name, set, is_opposite, char_length);
   ```



##########
be/src/storage/predicate/predicate_creator_in_list_in.cpp:
##########
@@ -27,34 +27,42 @@ template <PrimitiveType TYPE, PredicateType PT>
 static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
         const uint32_t cid, const std::string col_name, const 
std::shared_ptr<HybridSetBase>& set,
         bool is_opposite, size_t char_length = 0) {
-    auto set_size = set->size();
-    if (set_size == 1) {
-        return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 2) {
-        return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 3) {
-        return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 4) {
-        return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 5) {
-        return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 6) {
-        return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 7) {
-        return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
-        return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else {
+    // Only string types construct their own HybridSetType in the constructor 
(to convert
+    // from DynamicContainer to FixedContainer<std::string, N>), so N dispatch 
is only needed
+    // for them. All other types directly share the caller's hybrid_set.
+    if constexpr (!is_string_type(TYPE)) {
         return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE + 
1>::create_shared(
                 cid, col_name, set, is_opposite, char_length);
+    } else {
+        auto set_size = set->size();
+        if (set_size == 1) {
+            return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 2) {
+            return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 3) {
+            return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 4) {
+            return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 5) {
+            return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 6) {
+            return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == 7) {
+            return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);
+        } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+            return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, 
col_name, set, is_opposite,
+                                                                   
char_length);

Review Comment:
   The N-dispatch still hard-codes the max fixed-container specialization as 
template parameter `8` when `set_size == FIXED_CONTAINER_MAX_SIZE`. This 
becomes incorrect if `FIXED_CONTAINER_MAX_SIZE` changes. Prefer using 
`InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE>::create_shared(...)` 
(and keep the condition in sync) to avoid magic numbers.
   ```suggestion
               return InListPredicateBase<TYPE, PT, 
FIXED_CONTAINER_MAX_SIZE>::create_shared(
                       cid, col_name, set, is_opposite, char_length);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to