This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1a1b62aad8f [env](compiler) Reduce template instantiations in
predicate_creator.h for faster compilation (#61858)
1a1b62aad8f is described below
commit 1a1b62aad8f16abf58b2bfca9e9cf8c0c1c6c99d
Author: Mryange <[email protected]>
AuthorDate: Tue Mar 31 15:14:11 2026 +0800
[env](compiler) Reduce template instantiations in predicate_creator.h for
faster compilation (#61858)
`predicate_creator.h` was a header-only file containing two heavy
template functions: `create_in_list_predicate<PT>` and
`create_comparison_predicate<PT>`. Every `.cpp` file that included this
header had to independently instantiate massive template class
hierarchies:
- **InListPredicateBase<TYPE, PT, N>**: 23 types × 2 PT × 9 N = **414
class instantiations**, each with ~34 member functions (676 LOC class)
- **ComparisonPredicateBase<TYPE, PT>**: 23 types × 6 PT = **132 class
instantiations**, each with ~40 member functions (720 LOC class)
This resulted in ~19,000 function instantiations **per consumer file**.
The top 2 slowest files in the entire BE codebase (`scan_operator.cpp`
at 143.5s and `delete_handler.cpp` at 141.0s) both included this header
— confirmed via `-ftime-trace` profiling.
### What this PR does
1. **Move template definitions from header to `.cpp`**: Replace the full
template function bodies in `predicate_creator.h` with declarations
only. Add explicit template instantiations in `.cpp` so the templates
are compiled once and linked.
2. **Prune heavy includes from the header**: Remove
`in_list_predicate.h` (676 LOC), `comparison_predicate.h` (720 LOC),
`bloom_filter_predicate.h`, `null_predicate.h`, and other transitive
includes that are no longer needed in the header. Add forward
declarations for `BloomFilterFuncBase` and `BitmapFilterFuncBase`.
3. **Split into 4 `.cpp` files for parallel compilation**: The
concentrated template instantiations in a single `.cpp` would create a
new 224s bottleneck. Split by template family to enable parallel builds:
- `predicate_creator.cpp` — bloom_filter + bitmap_filter (lightweight,
~28s)
- `predicate_creator_in_list_in.cpp` —
`create_in_list_predicate<IN_LIST>` (~61s)
- `predicate_creator_in_list_not_in.cpp` —
`create_in_list_predicate<NOT_IN_LIST>` (~60s)
- `predicate_creator_comparison.cpp` —
`create_comparison_predicate<EQ/NE/LT/GT/LE/GE>` (~46s)
4. **Fix broken transitive includes**: Add `#include
"storage/predicate/null_predicate.h"` to `delete_handler.cpp` which
previously got it transitively through the old header.
### Compilation time results (ASAN, single-threaded measurement)
| File | Before (s) | After (s) | Change |
|------|--------:|--------:|--------|
| scan_operator.cpp | 143.50 | 45.96 | **-68.0%** |
| delete_handler.cpp | 141.03 | 29.62 | **-79.0%** |
| predicate_creator.cpp | 33.05 | 28.13 | -14.9% |
| predicate_creator_in_list_in.cpp | — | 60.85 | new |
| predicate_creator_in_list_not_in.cpp | — | 59.84 | new |
| predicate_creator_comparison.cpp | — | 46.23 | new |
**Parallel build critical path: 143.5s → 60.9s (-57.5%)**
---
be/src/storage/delete/delete_handler.cpp | 1 +
be/src/storage/predicate/predicate_creator.cpp | 5 +
be/src/storage/predicate/predicate_creator.h | 252 +--------------------
.../predicate/predicate_creator_comparison.cpp | 153 +++++++++++++
.../predicate/predicate_creator_in_list_in.cpp | 166 ++++++++++++++
.../predicate/predicate_creator_in_list_not_in.cpp | 166 ++++++++++++++
6 files changed, 499 insertions(+), 244 deletions(-)
diff --git a/be/src/storage/delete/delete_handler.cpp
b/be/src/storage/delete/delete_handler.cpp
index c13c17700a5..8aab3c42296 100644
--- a/be/src/storage/delete/delete_handler.cpp
+++ b/be/src/storage/delete/delete_handler.cpp
@@ -30,6 +30,7 @@
#include "core/data_type_serde/data_type_serde.h"
#include "storage/olap_common.h"
#include "storage/predicate/block_column_predicate.h"
+#include "storage/predicate/null_predicate.h"
#include "storage/predicate/predicate_creator.h"
#include "storage/tablet/tablet_schema.h"
#include "storage/utils.h"
diff --git a/be/src/storage/predicate/predicate_creator.cpp
b/be/src/storage/predicate/predicate_creator.cpp
index 1c57a754e8a..6bfa938a4d2 100644
--- a/be/src/storage/predicate/predicate_creator.cpp
+++ b/be/src/storage/predicate/predicate_creator.cpp
@@ -17,6 +17,11 @@
#include "storage/predicate/predicate_creator.h"
+#include "common/exception.h"
+#include "exprs/create_predicate_function.h"
+#include "storage/predicate/bitmap_filter_predicate.h"
+#include "storage/predicate/bloom_filter_predicate.h"
+
namespace doris {
std::shared_ptr<ColumnPredicate> create_bloom_filter_predicate(
diff --git a/be/src/storage/predicate/predicate_creator.h
b/be/src/storage/predicate/predicate_creator.h
index 3fd5e99b1ec..54f18838359 100644
--- a/be/src/storage/predicate/predicate_creator.h
+++ b/be/src/storage/predicate/predicate_creator.h
@@ -17,271 +17,35 @@
#pragma once
-#include <fast_float/fast_float.h>
-
-#include <charconv>
-#include <stdexcept>
+#include <memory>
#include <string>
-#include <type_traits>
-#include "common/exception.h"
-#include "common/status.h"
#include "core/data_type/data_type.h"
-#include "core/data_type/define_primitive_type.h"
#include "core/data_type/primitive_type.h"
-#include "core/string_ref.h"
-#include "exprs/create_predicate_function.h"
-#include "exprs/function/cast/cast_parameters.h"
-#include "exprs/function/cast/cast_to_basic_number_common.h"
+#include "core/field.h"
#include "exprs/hybrid_set.h"
-#include "storage/olap_utils.h"
-#include "storage/predicate/bloom_filter_predicate.h"
#include "storage/predicate/column_predicate.h"
-#include "storage/predicate/comparison_predicate.h"
-#include "storage/predicate/in_list_predicate.h"
-#include "storage/predicate/null_predicate.h"
-#include "storage/tablet/tablet_schema.h"
-#include "util/date_func.h"
-#include "util/string_util.h"
namespace doris {
#include "common/compile_check_begin.h"
-template <PrimitiveType TYPE, PredicateType PT>
-std::shared_ptr<ColumnPredicate> create_in_list_predicate(const uint32_t cid,
- const std::string
col_name,
- const
std::shared_ptr<HybridSetBase>& set,
- bool is_opposite,
- size_t char_length =
0) {
- auto set_size = set->size();
- if (set_size == 1) {
- return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 2) {
- return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 3) {
- return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 4) {
- return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 5) {
- return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 6) {
- return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == 7) {
- return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
- return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name,
set, is_opposite,
- char_length);
- } else {
- return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
- cid, col_name, set, is_opposite, char_length);
- }
-}
+class BloomFilterFuncBase;
+class BitmapFilterFuncBase;
+// Defined in predicate_creator.cpp with explicit instantiations.
template <PredicateType PT>
std::shared_ptr<ColumnPredicate> create_in_list_predicate(const uint32_t cid,
const std::string
col_name,
const DataTypePtr&
data_type,
const
std::shared_ptr<HybridSetBase> set,
- bool is_opposite) {
- switch (data_type->get_primitive_type()) {
- case TYPE_TINYINT: {
- return create_in_list_predicate<TYPE_TINYINT, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_SMALLINT: {
- return create_in_list_predicate<TYPE_SMALLINT, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_INT: {
- return create_in_list_predicate<TYPE_INT, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_BIGINT: {
- return create_in_list_predicate<TYPE_BIGINT, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_LARGEINT: {
- return create_in_list_predicate<TYPE_LARGEINT, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_FLOAT: {
- return create_in_list_predicate<TYPE_FLOAT, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_DOUBLE: {
- return create_in_list_predicate<TYPE_DOUBLE, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_DECIMALV2: {
- return create_in_list_predicate<TYPE_DECIMALV2, PT>(cid, col_name,
set, is_opposite);
- }
- case TYPE_DECIMAL32: {
- return create_in_list_predicate<TYPE_DECIMAL32, PT>(cid, col_name,
set, is_opposite);
- }
- case TYPE_DECIMAL64: {
- return create_in_list_predicate<TYPE_DECIMAL64, PT>(cid, col_name,
set, is_opposite);
- }
- case TYPE_DECIMAL128I: {
- return create_in_list_predicate<TYPE_DECIMAL128I, PT>(cid, col_name,
set, is_opposite);
- }
- case TYPE_DECIMAL256: {
- return create_in_list_predicate<TYPE_DECIMAL256, PT>(cid, col_name,
set, is_opposite);
- }
- case TYPE_CHAR: {
- return create_in_list_predicate<TYPE_CHAR, PT>(
- cid, col_name, set, is_opposite,
- assert_cast<const
DataTypeString*>(remove_nullable(data_type).get())->len());
- }
- case TYPE_VARCHAR: {
- return create_in_list_predicate<TYPE_VARCHAR, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_STRING: {
- return create_in_list_predicate<TYPE_STRING, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_DATE: {
- return create_in_list_predicate<TYPE_DATE, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_DATEV2: {
- return create_in_list_predicate<TYPE_DATEV2, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_DATETIME: {
- return create_in_list_predicate<TYPE_DATETIME, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_DATETIMEV2: {
- return create_in_list_predicate<TYPE_DATETIMEV2, PT>(cid, col_name,
set, is_opposite);
- }
- case TYPE_TIMESTAMPTZ: {
- return create_in_list_predicate<TYPE_TIMESTAMPTZ, PT>(cid, col_name,
set, is_opposite);
- }
- case TYPE_BOOLEAN: {
- return create_in_list_predicate<TYPE_BOOLEAN, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_IPV4: {
- return create_in_list_predicate<TYPE_IPV4, PT>(cid, col_name, set,
is_opposite);
- }
- case TYPE_IPV6: {
- return create_in_list_predicate<TYPE_IPV6, PT>(cid, col_name, set,
is_opposite);
- }
- default:
- throw Exception(Status::InternalError("Unsupported type {} for
in_predicate",
-
type_to_string(data_type->get_primitive_type())));
- return nullptr;
- }
-}
+ bool is_opposite);
+// Defined in predicate_creator.cpp with explicit instantiations.
template <PredicateType PT>
std::shared_ptr<ColumnPredicate> create_comparison_predicate(const uint32_t
cid,
const std::string
col_name,
const
DataTypePtr& data_type,
- const Field&
value, bool opposite) {
- switch (data_type->get_primitive_type()) {
- case TYPE_TINYINT: {
- return ComparisonPredicateBase<TYPE_TINYINT, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_SMALLINT: {
- return ComparisonPredicateBase<TYPE_SMALLINT, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_INT: {
- return ComparisonPredicateBase<TYPE_INT, PT>::create_shared(cid,
col_name, value, opposite);
- }
- case TYPE_BIGINT: {
- return ComparisonPredicateBase<TYPE_BIGINT, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_LARGEINT: {
- return ComparisonPredicateBase<TYPE_LARGEINT, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_FLOAT: {
- return ComparisonPredicateBase<TYPE_FLOAT, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_DOUBLE: {
- return ComparisonPredicateBase<TYPE_DOUBLE, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_DECIMALV2: {
- return ComparisonPredicateBase<TYPE_DECIMALV2, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_DECIMAL32: {
- return ComparisonPredicateBase<TYPE_DECIMAL32, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_DECIMAL64: {
- return ComparisonPredicateBase<TYPE_DECIMAL64, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_DECIMAL128I: {
- return ComparisonPredicateBase<TYPE_DECIMAL128I,
PT>::create_shared(cid, col_name, value,
-
opposite);
- }
- case TYPE_DECIMAL256: {
- return ComparisonPredicateBase<TYPE_DECIMAL256,
PT>::create_shared(cid, col_name, value,
-
opposite);
- }
- case TYPE_CHAR: {
- auto target = std::max(cast_set<size_t>(assert_cast<const
DataTypeString*>(
-
remove_nullable(data_type).get())
- ->len()),
- value.template get<TYPE_CHAR>().size());
- if (target > value.template get<TYPE_CHAR>().size()) {
- std::string tmp(target, '\0');
- memcpy(tmp.data(), value.template get<TYPE_CHAR>().data(),
- value.template get<TYPE_CHAR>().size());
- return ComparisonPredicateBase<TYPE_CHAR, PT>::create_shared(
- cid, col_name,
Field::create_field<TYPE_CHAR>(std::move(tmp)), opposite);
- } else {
- return ComparisonPredicateBase<TYPE_CHAR, PT>::create_shared(
- cid, col_name,
Field::create_field<TYPE_CHAR>(value.template get<TYPE_CHAR>()),
- opposite);
- }
- }
- case TYPE_VARCHAR:
- case TYPE_STRING: {
- return ComparisonPredicateBase<TYPE_STRING, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_DATE: {
- return ComparisonPredicateBase<TYPE_DATE, PT>::create_shared(cid,
col_name, value,
- opposite);
- }
- case TYPE_DATEV2: {
- return ComparisonPredicateBase<TYPE_DATEV2, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_DATETIME: {
- return ComparisonPredicateBase<TYPE_DATETIME, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_DATETIMEV2: {
- return ComparisonPredicateBase<TYPE_DATETIMEV2,
PT>::create_shared(cid, col_name, value,
-
opposite);
- }
- case TYPE_TIMESTAMPTZ: {
- return ComparisonPredicateBase<TYPE_TIMESTAMPTZ,
PT>::create_shared(cid, col_name, value,
-
opposite);
- }
- case TYPE_BOOLEAN: {
- return ComparisonPredicateBase<TYPE_BOOLEAN, PT>::create_shared(cid,
col_name, value,
-
opposite);
- }
- case TYPE_IPV4: {
- return ComparisonPredicateBase<TYPE_IPV4, PT>::create_shared(cid,
col_name, value,
- opposite);
- }
- case TYPE_IPV6: {
- return ComparisonPredicateBase<TYPE_IPV6, PT>::create_shared(cid,
col_name, value,
- opposite);
- }
- default:
- throw Exception(Status::InternalError("Unsupported type {} for
comparison_predicate",
-
type_to_string(data_type->get_primitive_type())));
- return nullptr;
- }
-}
+ const Field&
value, bool opposite);
template <PrimitiveType TYPE>
std::shared_ptr<HybridSetBase> build_set() {
diff --git a/be/src/storage/predicate/predicate_creator_comparison.cpp
b/be/src/storage/predicate/predicate_creator_comparison.cpp
new file mode 100644
index 00000000000..bfec1262cfc
--- /dev/null
+++ b/be/src/storage/predicate/predicate_creator_comparison.cpp
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/data_type/data_type_string.h"
+#include "storage/predicate/comparison_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris {
+
+template <PredicateType PT>
+std::shared_ptr<ColumnPredicate> create_comparison_predicate(const uint32_t
cid,
+ const std::string
col_name,
+ const
DataTypePtr& data_type,
+ const Field&
value, bool opposite) {
+ switch (data_type->get_primitive_type()) {
+ case TYPE_TINYINT: {
+ return ComparisonPredicateBase<TYPE_TINYINT, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_SMALLINT: {
+ return ComparisonPredicateBase<TYPE_SMALLINT, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_INT: {
+ return ComparisonPredicateBase<TYPE_INT, PT>::create_shared(cid,
col_name, value, opposite);
+ }
+ case TYPE_BIGINT: {
+ return ComparisonPredicateBase<TYPE_BIGINT, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_LARGEINT: {
+ return ComparisonPredicateBase<TYPE_LARGEINT, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_FLOAT: {
+ return ComparisonPredicateBase<TYPE_FLOAT, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_DOUBLE: {
+ return ComparisonPredicateBase<TYPE_DOUBLE, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_DECIMALV2: {
+ return ComparisonPredicateBase<TYPE_DECIMALV2, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_DECIMAL32: {
+ return ComparisonPredicateBase<TYPE_DECIMAL32, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_DECIMAL64: {
+ return ComparisonPredicateBase<TYPE_DECIMAL64, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_DECIMAL128I: {
+ return ComparisonPredicateBase<TYPE_DECIMAL128I,
PT>::create_shared(cid, col_name, value,
+
opposite);
+ }
+ case TYPE_DECIMAL256: {
+ return ComparisonPredicateBase<TYPE_DECIMAL256,
PT>::create_shared(cid, col_name, value,
+
opposite);
+ }
+ case TYPE_CHAR: {
+ auto target = std::max(cast_set<size_t>(assert_cast<const
DataTypeString*>(
+
remove_nullable(data_type).get())
+ ->len()),
+ value.template get<TYPE_CHAR>().size());
+ if (target > value.template get<TYPE_CHAR>().size()) {
+ std::string tmp(target, '\0');
+ memcpy(tmp.data(), value.template get<TYPE_CHAR>().data(),
+ value.template get<TYPE_CHAR>().size());
+ return ComparisonPredicateBase<TYPE_CHAR, PT>::create_shared(
+ cid, col_name,
Field::create_field<TYPE_CHAR>(std::move(tmp)), opposite);
+ } else {
+ return ComparisonPredicateBase<TYPE_CHAR, PT>::create_shared(
+ cid, col_name,
Field::create_field<TYPE_CHAR>(value.template get<TYPE_CHAR>()),
+ opposite);
+ }
+ }
+ case TYPE_VARCHAR:
+ case TYPE_STRING: {
+ return ComparisonPredicateBase<TYPE_STRING, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_DATE: {
+ return ComparisonPredicateBase<TYPE_DATE, PT>::create_shared(cid,
col_name, value,
+ opposite);
+ }
+ case TYPE_DATEV2: {
+ return ComparisonPredicateBase<TYPE_DATEV2, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_DATETIME: {
+ return ComparisonPredicateBase<TYPE_DATETIME, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_DATETIMEV2: {
+ return ComparisonPredicateBase<TYPE_DATETIMEV2,
PT>::create_shared(cid, col_name, value,
+
opposite);
+ }
+ case TYPE_TIMESTAMPTZ: {
+ return ComparisonPredicateBase<TYPE_TIMESTAMPTZ,
PT>::create_shared(cid, col_name, value,
+
opposite);
+ }
+ case TYPE_BOOLEAN: {
+ return ComparisonPredicateBase<TYPE_BOOLEAN, PT>::create_shared(cid,
col_name, value,
+
opposite);
+ }
+ case TYPE_IPV4: {
+ return ComparisonPredicateBase<TYPE_IPV4, PT>::create_shared(cid,
col_name, value,
+ opposite);
+ }
+ case TYPE_IPV6: {
+ return ComparisonPredicateBase<TYPE_IPV6, PT>::create_shared(cid,
col_name, value,
+ opposite);
+ }
+ default:
+ throw Exception(Status::InternalError("Unsupported type {} for
comparison_predicate",
+
type_to_string(data_type->get_primitive_type())));
+ return nullptr;
+ }
+}
+
+template std::shared_ptr<ColumnPredicate>
create_comparison_predicate<PredicateType::EQ>(
+ const uint32_t, const std::string, const DataTypePtr&, const Field&,
bool);
+template std::shared_ptr<ColumnPredicate>
create_comparison_predicate<PredicateType::NE>(
+ const uint32_t, const std::string, const DataTypePtr&, const Field&,
bool);
+template std::shared_ptr<ColumnPredicate>
create_comparison_predicate<PredicateType::LT>(
+ const uint32_t, const std::string, const DataTypePtr&, const Field&,
bool);
+template std::shared_ptr<ColumnPredicate>
create_comparison_predicate<PredicateType::GT>(
+ const uint32_t, const std::string, const DataTypePtr&, const Field&,
bool);
+template std::shared_ptr<ColumnPredicate>
create_comparison_predicate<PredicateType::LE>(
+ const uint32_t, const std::string, const DataTypePtr&, const Field&,
bool);
+template std::shared_ptr<ColumnPredicate>
create_comparison_predicate<PredicateType::GE>(
+ const uint32_t, const std::string, const DataTypePtr&, const Field&,
bool);
+
+} // namespace doris
diff --git a/be/src/storage/predicate/predicate_creator_in_list_in.cpp
b/be/src/storage/predicate/predicate_creator_in_list_in.cpp
new file mode 100644
index 00000000000..7b40a9adef6
--- /dev/null
+++ b/be/src/storage/predicate/predicate_creator_in_list_in.cpp
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/data_type/data_type_string.h"
+#include "storage/predicate/in_list_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris {
+
+template <PrimitiveType TYPE, PredicateType PT>
+static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
+ const uint32_t cid, const std::string col_name, const
std::shared_ptr<HybridSetBase>& set,
+ bool is_opposite, size_t char_length = 0) {
+ auto set_size = set->size();
+ if (set_size == 1) {
+ return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 2) {
+ return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 3) {
+ return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 4) {
+ return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 5) {
+ return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 6) {
+ return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 7) {
+ return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+ return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else {
+ return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
+ cid, col_name, set, is_opposite, char_length);
+ }
+}
+
+template <>
+std::shared_ptr<ColumnPredicate>
create_in_list_predicate<PredicateType::IN_LIST>(
+ const uint32_t cid, const std::string col_name, const DataTypePtr&
data_type,
+ const std::shared_ptr<HybridSetBase> set, bool is_opposite) {
+ switch (data_type->get_primitive_type()) {
+ case TYPE_TINYINT: {
+ return create_in_list_predicate_impl<TYPE_TINYINT,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_SMALLINT: {
+ return create_in_list_predicate_impl<TYPE_SMALLINT,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_INT: {
+ return create_in_list_predicate_impl<TYPE_INT,
PredicateType::IN_LIST>(cid, col_name, set,
+
is_opposite);
+ }
+ case TYPE_BIGINT: {
+ return create_in_list_predicate_impl<TYPE_BIGINT,
PredicateType::IN_LIST>(cid, col_name,
+
set, is_opposite);
+ }
+ case TYPE_LARGEINT: {
+ return create_in_list_predicate_impl<TYPE_LARGEINT,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_FLOAT: {
+ return create_in_list_predicate_impl<TYPE_FLOAT,
PredicateType::IN_LIST>(cid, col_name, set,
+
is_opposite);
+ }
+ case TYPE_DOUBLE: {
+ return create_in_list_predicate_impl<TYPE_DOUBLE,
PredicateType::IN_LIST>(cid, col_name,
+
set, is_opposite);
+ }
+ case TYPE_DECIMALV2: {
+ return create_in_list_predicate_impl<TYPE_DECIMALV2,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMAL32: {
+ return create_in_list_predicate_impl<TYPE_DECIMAL32,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMAL64: {
+ return create_in_list_predicate_impl<TYPE_DECIMAL64,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMAL128I: {
+ return create_in_list_predicate_impl<TYPE_DECIMAL128I,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMAL256: {
+ return create_in_list_predicate_impl<TYPE_DECIMAL256,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_CHAR: {
+ return create_in_list_predicate_impl<TYPE_CHAR,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite,
+ assert_cast<const
DataTypeString*>(remove_nullable(data_type).get())->len());
+ }
+ case TYPE_VARCHAR: {
+ return create_in_list_predicate_impl<TYPE_VARCHAR,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_STRING: {
+ return create_in_list_predicate_impl<TYPE_STRING,
PredicateType::IN_LIST>(cid, col_name,
+
set, is_opposite);
+ }
+ case TYPE_DATE: {
+ return create_in_list_predicate_impl<TYPE_DATE,
PredicateType::IN_LIST>(cid, col_name, set,
+
is_opposite);
+ }
+ case TYPE_DATEV2: {
+ return create_in_list_predicate_impl<TYPE_DATEV2,
PredicateType::IN_LIST>(cid, col_name,
+
set, is_opposite);
+ }
+ case TYPE_DATETIME: {
+ return create_in_list_predicate_impl<TYPE_DATETIME,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DATETIMEV2: {
+ return create_in_list_predicate_impl<TYPE_DATETIMEV2,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_TIMESTAMPTZ: {
+ return create_in_list_predicate_impl<TYPE_TIMESTAMPTZ,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_BOOLEAN: {
+ return create_in_list_predicate_impl<TYPE_BOOLEAN,
PredicateType::IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_IPV4: {
+ return create_in_list_predicate_impl<TYPE_IPV4,
PredicateType::IN_LIST>(cid, col_name, set,
+
is_opposite);
+ }
+ case TYPE_IPV6: {
+ return create_in_list_predicate_impl<TYPE_IPV6,
PredicateType::IN_LIST>(cid, col_name, set,
+
is_opposite);
+ }
+ default:
+ throw Exception(Status::InternalError("Unsupported type {} for
in_predicate",
+
type_to_string(data_type->get_primitive_type())));
+ return nullptr;
+ }
+}
+
+} // namespace doris
diff --git a/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
new file mode 100644
index 00000000000..be28f2fabc1
--- /dev/null
+++ b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/data_type/data_type_string.h"
+#include "storage/predicate/in_list_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris {
+
+template <PrimitiveType TYPE, PredicateType PT>
+static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
+ const uint32_t cid, const std::string col_name, const
std::shared_ptr<HybridSetBase>& set,
+ bool is_opposite, size_t char_length = 0) {
+ auto set_size = set->size();
+ if (set_size == 1) {
+ return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 2) {
+ return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 3) {
+ return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 4) {
+ return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 5) {
+ return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 6) {
+ return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == 7) {
+ return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+ return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name,
set, is_opposite,
+ char_length);
+ } else {
+ return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE +
1>::create_shared(
+ cid, col_name, set, is_opposite, char_length);
+ }
+}
+
+template <>
+std::shared_ptr<ColumnPredicate>
create_in_list_predicate<PredicateType::NOT_IN_LIST>(
+ const uint32_t cid, const std::string col_name, const DataTypePtr&
data_type,
+ const std::shared_ptr<HybridSetBase> set, bool is_opposite) {
+ switch (data_type->get_primitive_type()) {
+ case TYPE_TINYINT: {
+ return create_in_list_predicate_impl<TYPE_TINYINT,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_SMALLINT: {
+ return create_in_list_predicate_impl<TYPE_SMALLINT,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_INT: {
+ return create_in_list_predicate_impl<TYPE_INT,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_BIGINT: {
+ return create_in_list_predicate_impl<TYPE_BIGINT,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_LARGEINT: {
+ return create_in_list_predicate_impl<TYPE_LARGEINT,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_FLOAT: {
+ return create_in_list_predicate_impl<TYPE_FLOAT,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DOUBLE: {
+ return create_in_list_predicate_impl<TYPE_DOUBLE,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMALV2: {
+ return create_in_list_predicate_impl<TYPE_DECIMALV2,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMAL32: {
+ return create_in_list_predicate_impl<TYPE_DECIMAL32,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMAL64: {
+ return create_in_list_predicate_impl<TYPE_DECIMAL64,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMAL128I: {
+ return create_in_list_predicate_impl<TYPE_DECIMAL128I,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DECIMAL256: {
+ return create_in_list_predicate_impl<TYPE_DECIMAL256,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_CHAR: {
+ return create_in_list_predicate_impl<TYPE_CHAR,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite,
+ assert_cast<const
DataTypeString*>(remove_nullable(data_type).get())->len());
+ }
+ case TYPE_VARCHAR: {
+ return create_in_list_predicate_impl<TYPE_VARCHAR,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_STRING: {
+ return create_in_list_predicate_impl<TYPE_STRING,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DATE: {
+ return create_in_list_predicate_impl<TYPE_DATE,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DATEV2: {
+ return create_in_list_predicate_impl<TYPE_DATEV2,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DATETIME: {
+ return create_in_list_predicate_impl<TYPE_DATETIME,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_DATETIMEV2: {
+ return create_in_list_predicate_impl<TYPE_DATETIMEV2,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_TIMESTAMPTZ: {
+ return create_in_list_predicate_impl<TYPE_TIMESTAMPTZ,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_BOOLEAN: {
+ return create_in_list_predicate_impl<TYPE_BOOLEAN,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_IPV4: {
+ return create_in_list_predicate_impl<TYPE_IPV4,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ case TYPE_IPV6: {
+ return create_in_list_predicate_impl<TYPE_IPV6,
PredicateType::NOT_IN_LIST>(
+ cid, col_name, set, is_opposite);
+ }
+ default:
+ throw Exception(Status::InternalError("Unsupported type {} for
in_predicate",
+
type_to_string(data_type->get_primitive_type())));
+ return nullptr;
+ }
+}
+
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]