Copilot commented on code in PR #61999:
URL: https://github.com/apache/doris/pull/61999#discussion_r3025552081
##########
be/src/exprs/function/in.h:
##########
@@ -113,7 +113,7 @@ class FunctionIn : public IFunction {
context->get_arg_type(0)->get_primitive_type() ==
PrimitiveType::TYPE_VARCHAR ||
context->get_arg_type(0)->get_primitive_type() ==
PrimitiveType::TYPE_STRING) {
// the StringValue's memory is held by FunctionContext, so we can
use StringValueSet here directly
-
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
+
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context),
true));
} else {
state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
Review Comment:
This change fixes more than a perf issue: previously
`create_string_value_set(get_size_with_out_null(context))` could call the `bool
null_aware` overload via `size_t -> bool`, which makes `null_aware=false` when
the IN-list contains only NULLs (size==0). That changes query semantics (NULL
propagation) for string IN/NOT IN with all-NULL constant lists. The PR
description/checklist currently says no behavior change; it should be updated
to reflect this bug fix.
##########
be/src/exprs/function/in.h:
##########
@@ -113,7 +113,7 @@ class FunctionIn : public IFunction {
context->get_arg_type(0)->get_primitive_type() ==
PrimitiveType::TYPE_VARCHAR ||
context->get_arg_type(0)->get_primitive_type() ==
PrimitiveType::TYPE_STRING) {
// the StringValue's memory is held by FunctionContext, so we can
use StringValueSet here directly
-
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
+
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context),
true));
} else {
state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
Review Comment:
The overload-resolution fix in `open()` is subtle and affects correctness
for cases like `col IN (NULL)` / `col NOT IN (NULL)` on string types
(null-aware behavior when the non-null list size is 0). There are existing BE
tests around `FunctionIn` (e.g. `be/test/testutil/mock/mock_in_expr.cpp`), but
none appear to cover the all-NULL constant list case for string types. Adding a
unit/regression test for this scenario would prevent regressions.
##########
be/src/storage/predicate/predicate_creator_in_list_not_in.cpp:
##########
@@ -27,34 +27,42 @@ template <PrimitiveType TYPE, PredicateType PT>
static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
const uint32_t cid, const std::string col_name, const
std::shared_ptr<HybridSetBase>& set,
bool is_opposite, size_t char_length = 0) {
- auto set_size = set->size();
- if (set_size == 1) {
- return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 2) {
- return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 3) {
- return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 4) {
- return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 5) {
- return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 6) {
- return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 7) {
- return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
- return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else {
+ // Only string types construct their own HybridSetType in the constructor
(to convert
+ // from DynamicContainer to FixedContainer<std::string, N>), so N dispatch
is only needed
+ // for them. All other types directly share the caller's hybrid_set.
+ if constexpr (!is_string_type(TYPE)) {
return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
cid, col_name, set, is_opposite, char_length);
+ } else {
+ auto set_size = set->size();
+ if (set_size == 1) {
+ return InListPredicateBase<TYPE, PT, 1>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 2) {
+ return InListPredicateBase<TYPE, PT, 2>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 3) {
+ return InListPredicateBase<TYPE, PT, 3>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 4) {
+ return InListPredicateBase<TYPE, PT, 4>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 5) {
+ return InListPredicateBase<TYPE, PT, 5>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 6) {
+ return InListPredicateBase<TYPE, PT, 6>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 7) {
+ return InListPredicateBase<TYPE, PT, 7>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+ return InListPredicateBase<TYPE, PT, 8>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
Review Comment:
The N-dispatch still hard-codes the max fixed-container specialization as
template parameter `8` when `set_size == FIXED_CONTAINER_MAX_SIZE`. This
becomes incorrect if `FIXED_CONTAINER_MAX_SIZE` changes. Prefer using
`InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE>::create_shared(...)`
(and keep the condition in sync) to avoid magic numbers.
```suggestion
return InListPredicateBase<TYPE, PT,
FIXED_CONTAINER_MAX_SIZE>::create_shared(
cid, col_name, set, is_opposite, char_length);
```
##########
be/src/storage/predicate/predicate_creator_in_list_in.cpp:
##########
@@ -27,34 +27,42 @@ template <PrimitiveType TYPE, PredicateType PT>
static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
const uint32_t cid, const std::string col_name, const
std::shared_ptr<HybridSetBase>& set,
bool is_opposite, size_t char_length = 0) {
- auto set_size = set->size();
- if (set_size == 1) {
- return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 2) {
- return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 3) {
- return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 4) {
- return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 5) {
- return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 6) {
- return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 7) {
- return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
- return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else {
+ // Only string types construct their own HybridSetType in the constructor
(to convert
+ // from DynamicContainer to FixedContainer<std::string, N>), so N dispatch
is only needed
+ // for them. All other types directly share the caller's hybrid_set.
+ if constexpr (!is_string_type(TYPE)) {
return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
cid, col_name, set, is_opposite, char_length);
+ } else {
+ auto set_size = set->size();
+ if (set_size == 1) {
+ return InListPredicateBase<TYPE, PT, 1>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 2) {
+ return InListPredicateBase<TYPE, PT, 2>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 3) {
+ return InListPredicateBase<TYPE, PT, 3>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 4) {
+ return InListPredicateBase<TYPE, PT, 4>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 5) {
+ return InListPredicateBase<TYPE, PT, 5>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 6) {
+ return InListPredicateBase<TYPE, PT, 6>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == 7) {
+ return InListPredicateBase<TYPE, PT, 7>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
+ } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+ return InListPredicateBase<TYPE, PT, 8>::create_shared(cid,
col_name, set, is_opposite,
+
char_length);
Review Comment:
The N-dispatch still hard-codes the max fixed-container specialization as
template parameter `8` when `set_size == FIXED_CONTAINER_MAX_SIZE`. This
becomes incorrect if `FIXED_CONTAINER_MAX_SIZE` changes. Prefer using
`InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE>::create_shared(...)`
(and keep the condition in sync) to avoid magic numbers.
```suggestion
return InListPredicateBase<TYPE, PT,
FIXED_CONTAINER_MAX_SIZE>::create_shared(
cid, col_name, set, is_opposite, char_length);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]