github-actions[bot] commented on code in PR #62589: URL: https://github.com/apache/doris/pull/62589#discussion_r3310374444
########## be/src/exec/runtime_filter/runtime_filter_partition_pruner.h: ########## @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <mutex> +#include <shared_mutex> +#include <unordered_map> +#include <vector> + +#include "common/global_types.h" +#include "common/status.h" +#include "core/data_type/data_type.h" +#include "exec/common/hash_table/phmap_fwd_decl.h" +#include "exprs/vexpr_fwd.h" +#include "storage/olap_scan_common.h" + +namespace doris { + +class SlotDescriptor; +class VExprContext; +struct TPartitionBoundary; +struct TRuntimeFilterDesc; +struct TTargetExprMonotonicity; + +// Parsed representation of one partition boundary for one slot column. +struct ParsedBoundary { + int64_t partition_id = 0; + SlotId slot_id = 0; + bool is_nullable = false; + ColumnValueRangeType boundary_cvr; + // True if the partition's value set is exactly {NULL} (e.g. LIST + // partition whose only key is NULL). The CVR alone cannot encode + // "only NULL" -- it stays as the whole range with contain_null=true + // -- so we track it explicitly to enable accurate pruning. + bool only_null = false; + // True if the partition's value set includes NULL (covers both + // only-NULL and mixed LIST partitions). Tracked separately because + // ColumnValueRange::set_contain_null(true) destructively clears the + // fixed-value set, so we cannot stash the NULL flag inside the CVR + // alongside the concrete values. + bool contains_null = false; +}; + +// Immutable, fragment-shared parse result of TPartitionBoundary list. +// +// Parsing is expensive (constructs VLiteral per literal, materializes a +// ColumnPtr, builds ColumnValueRange) and depends only on plan-time data. +// All pipeline instances of the same fragment share one parse, performed +// in OperatorX::prepare() which is single-threaded fragment setup. +class ParsedPartitionBoundaries { +public: + ParsedPartitionBoundaries() = default; + + // Build the parse result from the thrift `boundaries` list. Caller must + // ensure this is invoked at most once per instance (OperatorX::prepare() + // is the natural call site). + Status parse(const std::vector<TPartitionBoundary>& boundaries, + const phmap::flat_hash_map<int, SlotDescriptor*>& slot_descs); + + bool empty() const { return _slot_to_boundaries.empty(); } + int64_t total_partitions() const { return _total_partition_count; } + + const std::unordered_map<SlotId, std::vector<ParsedBoundary>>& slot_to_boundaries() const { + return _slot_to_boundaries; + } + + // Lazily compute target-domain boundaries for a monotonic RF target. + // `target_expr` is `impl->children()[0]` of the RF wrapper (a sub-tree of + // the conjunct). `leaf_slot_id` is the unique VSlotRef leaf inside it + // (FE asserted target_expr has exactly one input slot). `leaf_column_id` + // is that slot ref's `column_id()` -- the position in the runtime block. + // Only partitions present in `partition_directions` are projected and each + // partition uses its own FE-proven local direction. + // `ctx` is the conjunct's VExprContext (used to execute the sub-expression). + // + // Direct SlotRef targets reuse the parsed partition boundaries. Expression + // targets project finite RANGE endpoints by executing `target_expr`. + // + // Returns an empty vector when no selected boundary can be projected (e.g. + // every candidate contains NULL). Unexpected FE/BE metadata mismatches + // return an error instead of disabling pruning silently. + // The shared_ptr keeps the computed vector alive even if another pipeline + // instance inserts into the shared map and triggers unordered_map rehash. + // + // Direction: + // MONOTONIC_INCREASING: projected lo and hi keep their roles + // MONOTONIC_DECREASING: swap (projected lo, hi) -> (hi, lo) + // + // Open endpoints (MINVALUE/MAXVALUE) stay open after projection and swap + // sides for monotonic decreasing targets. Boundaries containing NULL + // partition values, or finite endpoints that project to NULL, are omitted + // from the result so this RF conservatively leaves them unpruned. + Status get_or_compute_projected_boundaries( + int filter_id, const VExprSPtr& target_expr, SlotId leaf_slot_id, int leaf_column_id, + const std::unordered_map<int64_t, TTargetExprMonotonicity::type>& partition_directions, Review Comment: This header only forward-declares `struct TTargetExprMonotonicity` above, but this public signature uses the nested type `TTargetExprMonotonicity::type`. A nested type cannot be named through an incomplete forward declaration, and `runtime_filter_partition_pruner.cpp` includes this header before including `gen_cpp/PlanNodes_types.h`, so the new file can fail to compile as soon as the header is parsed. Please include the generated thrift definition in the header (or avoid exposing the nested thrift type in the header signature) instead of relying on a forward declaration. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
