HappenLee commented on code in PR #62589: URL: https://github.com/apache/doris/pull/62589#discussion_r3330500997
########## be/src/exec/runtime_filter/runtime_filter_partition_pruner.cpp: ########## @@ -0,0 +1,866 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/runtime_filter/runtime_filter_partition_pruner.h" + +#include <gen_cpp/PlanNodes_types.h> + +#include <optional> +#include <unordered_set> +#include <utility> + +#include "core/block/block.h" +#include "core/column/column.h" +#include "core/column/column_nullable.h" +#include "core/data_type/data_type_nullable.h" +#include "core/field.h" +#include "exprs/bloom_filter_func.h" +#include "exprs/hybrid_set.h" +#include "exprs/runtime_filter_expr.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" +#include "exprs/vliteral.h" +#include "exprs/vslot_ref.h" +#include "runtime/descriptors.h" + +namespace doris { + +// NOLINTBEGIN(readability-function-cognitive-complexity,readability-function-size) +// Complexity is inflated by macro expansion for each PrimitiveType case. +Status ParsedPartitionBoundaries::parse( + const std::vector<TPartitionBoundary>& boundaries, + const phmap::flat_hash_map<int, SlotDescriptor*>& slot_descs) { + for (const auto& tb : boundaries) { + DORIS_CHECK(tb.__isset.partition_id); + DORIS_CHECK(tb.__isset.slot_id); + SlotId slot_id = tb.slot_id; + + auto slot_it = slot_descs.find(slot_id); + DORIS_CHECK(slot_it != slot_descs.end()); + SlotDescriptor* slot = slot_it->second; + // Reuse the slot's pre-built DataType: walking through VLiteral here + // would cost a `DataTypeFactory::create_data_type(node)` heap allocation + // and a one-row `ColumnConst` allocation per boundary endpoint. With + // thousands of partitions that dominates BuildTasksTime. + const DataTypePtr& slot_type = slot->type(); + PrimitiveType ptype = slot_type->get_primitive_type(); + int precision = cast_set<int>(slot_type->get_precision()); + int scale = cast_set<int>(slot_type->get_scale()); + bool is_nullable = slot->is_nullable(); + + // Store slot data type for potential projection use + _slot_data_types[slot_id] = slot_type; + + ParsedBoundary boundary; + boundary.partition_id = tb.partition_id; + boundary.slot_id = slot_id; + boundary.is_nullable = is_nullable; + + bool parsed_ok = false; + +#define BUILD_BOUNDARY_CVR(NAME) \ + case TYPE_##NAME: { \ + using CppType = typename PrimitiveTypeTraits<TYPE_##NAME>::CppType; \ + bool is_list = tb.__isset.list_values && !tb.list_values.empty(); \ + bool is_range = tb.__isset.range_start || tb.__isset.range_end; \ + DORIS_CHECK(is_list || is_range); \ + ColumnValueRange<TYPE_##NAME> cvr(slot->col_name(), is_nullable, precision, scale); \ + /* Returns nullopt if `node` is a NULL literal; the caller then sets contain_null */ \ + /* on the CVR instead of trying to extract a typed value (which would dereference */ \ + /* a null data pointer for the non-string branch). */ \ + auto parse_texpr_node = [&](const TExprNode& node) -> std::optional<CppType> { \ + if (node.node_type == TExprNodeType::NULL_LITERAL) { \ + return std::nullopt; \ + } \ + /* `Field` value is copied into the CVR by `add_fixed_value` / */ \ + /* `add_range` (both take CppType by const-ref / by value), so the */ \ + /* temporary `Field`'s lifetime ending at this expression's full-statement */ \ + /* boundary is safe -- including for `String` payloads. */ \ + Field field = slot_type->get_field(node); \ + return std::make_optional<CppType>(field.get<TYPE_##NAME>()); \ + }; \ + if (is_list) { \ + auto empty_cvr = ColumnValueRange<TYPE_##NAME>::create_empty_column_value_range( \ + is_nullable, precision, scale); \ + bool list_has_null = false; \ + bool list_has_value = false; \ + for (const auto& node : tb.list_values) { \ + auto parsed = parse_texpr_node(node); \ + if (!parsed) { \ + list_has_null = true; \ + continue; \ + } \ + static_cast<void>(empty_cvr.add_fixed_value(*parsed)); \ + list_has_value = true; \ + } \ + if (list_has_value) { \ + cvr.intersection(empty_cvr); \ + } \ + if (list_has_null && is_nullable) { \ Review Comment: 如果不是nullable是不是规划错误要报错呢? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
