This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 021678c7c30 [fix](window_funnel) fix wrong result of window_funnel #38954 (#39270) 021678c7c30 is described below commit 021678c7c302abab23a3705f5a114b6f9ca7f5e6 Author: TengJianPing <18241664+jackte...@users.noreply.github.com> AuthorDate: Fri Aug 16 09:59:31 2024 +0800 [fix](window_funnel) fix wrong result of window_funnel #38954 (#39270) ## Proposed changes BP #38954 --- be/src/agent/be_exec_version_manager.h | 2 +- be/src/util/simd/bits.h | 16 + .../aggregate_function_simple_factory.cpp | 2 + .../aggregate_function_window_funnel.cpp | 37 +- .../aggregate_function_window_funnel.h | 387 ++++++++++++++++++++- .../aggregate_functions/vec_window_funnel_test.cpp | 9 +- .../main/java/org/apache/doris/common/Config.java | 2 +- .../data/nereids_p0/aggregate/window_funnel.out | 99 ++++++ .../window_functions/window_funnel.out | 5 + .../nereids_p0/aggregate/window_funnel.groovy | 361 +++++++++++++++++++ .../window_functions/window_funnel.sql | 71 ++-- 11 files changed, 928 insertions(+), 63 deletions(-) diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h index 248bcfc7e1a..4cd1a85622c 100644 --- a/be/src/agent/be_exec_version_manager.h +++ b/be/src/agent/be_exec_version_manager.h @@ -71,7 +71,7 @@ private: * g. do local merge of remote runtime filter * h. "now": ALWAYS_NOT_NULLABLE -> DEPEND_ON_ARGUMENTS */ -constexpr inline int BeExecVersionManager::max_be_exec_version = 4; +constexpr inline int BeExecVersionManager::max_be_exec_version = 5; constexpr inline int BeExecVersionManager::min_be_exec_version = 0; /// functional diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h index 45f82b23ac9..a36a95b6eef 100644 --- a/be/src/util/simd/bits.h +++ b/be/src/util/simd/bits.h @@ -136,6 +136,18 @@ static size_t find_byte(const std::vector<T>& vec, size_t start, T byte) { return (T*)p - vec.data(); } +template <class T> +static size_t find_byte(const T* data, size_t start, size_t end, T byte) { + if (start >= end) { + return start; + } + const void* p = std::memchr((const void*)(data + start), byte, end - start); + if (p == nullptr) { + return end; + } + return (T*)p - data; +} + template <typename T> bool contain_byte(const T* __restrict data, const size_t length, const signed char byte) { return nullptr != std::memchr(reinterpret_cast<const void*>(data), byte, length); @@ -145,6 +157,10 @@ inline size_t find_one(const std::vector<uint8_t>& vec, size_t start) { return find_byte<uint8_t>(vec, start, 1); } +inline size_t find_one(const uint8_t* data, size_t start, size_t end) { + return find_byte<uint8_t>(data, start, end, 1); +} + inline size_t find_zero(const std::vector<uint8_t>& vec, size_t start) { return find_byte<uint8_t>(vec, start, 0); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp index cbae8cd28fe..f91311cdf92 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp @@ -54,6 +54,7 @@ void register_aggregate_function_group_array_intersect(AggregateFunctionSimpleFa void register_aggregate_function_group_concat(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_percentile(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory); +void register_aggregate_function_window_funnel_old(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_retention(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_percentile_approx(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactory& factory); @@ -96,6 +97,7 @@ AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() { register_aggregate_function_percentile(instance); register_aggregate_function_percentile_approx(instance); register_aggregate_function_window_funnel(instance); + register_aggregate_function_window_funnel_old(instance); register_aggregate_function_retention(instance); register_aggregate_function_orthogonal_bitmap(instance); register_aggregate_function_collect_list(instance); diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp index 6d9ca27fe10..afd6ceb569c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp @@ -24,11 +24,33 @@ #include "common/logging.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/aggregate_functions/helpers.h" +#include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { +AggregateFunctionPtr create_aggregate_function_window_funnel_old(const std::string& name, + const DataTypes& argument_types, + const bool result_is_nullable) { + if (argument_types.size() < 3) { + LOG(WARNING) << "window_funnel's argument less than 3."; + return nullptr; + } + if (WhichDataType(remove_nullable(argument_types[2])).is_date_time_v2()) { + return creator_without_type::create< + AggregateFunctionWindowFunnelOld<DateV2Value<DateTimeV2ValueType>, UInt64>>( + argument_types, result_is_nullable); + } else if (WhichDataType(remove_nullable(argument_types[2])).is_date_time()) { + return creator_without_type::create< + AggregateFunctionWindowFunnelOld<VecDateTimeValue, Int64>>(argument_types, + result_is_nullable); + } else { + LOG(WARNING) << "Only support DateTime type as window argument!"; + return nullptr; + } +} + AggregateFunctionPtr create_aggregate_function_window_funnel(const std::string& name, const DataTypes& argument_types, const bool result_is_nullable) { @@ -38,11 +60,12 @@ AggregateFunctionPtr create_aggregate_function_window_funnel(const std::string& } if (WhichDataType(remove_nullable(argument_types[2])).is_date_time_v2()) { return creator_without_type::create< - AggregateFunctionWindowFunnel<DateV2Value<DateTimeV2ValueType>, UInt64>>( - argument_types, result_is_nullable); + AggregateFunctionWindowFunnel<TypeIndex::DateTimeV2, UInt64>>(argument_types, + result_is_nullable); } else if (WhichDataType(remove_nullable(argument_types[2])).is_date_time()) { - return creator_without_type::create<AggregateFunctionWindowFunnel<VecDateTimeValue, Int64>>( - argument_types, result_is_nullable); + return creator_without_type::create< + AggregateFunctionWindowFunnel<TypeIndex::DateTime, Int64>>(argument_types, + result_is_nullable); } else { LOG(WARNING) << "Only support DateTime type as window argument!"; return nullptr; @@ -52,4 +75,10 @@ AggregateFunctionPtr create_aggregate_function_window_funnel(const std::string& void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory) { factory.register_function_both("window_funnel", create_aggregate_function_window_funnel); } +void register_aggregate_function_window_funnel_old(AggregateFunctionSimpleFactory& factory) { + factory.register_alternative_function("window_funnel", + create_aggregate_function_window_funnel_old, true); + factory.register_alternative_function("window_funnel", + create_aggregate_function_window_funnel_old, false); +} } // namespace doris::vectorized diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h index 9356cfd4b68..3ab93646775 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h @@ -21,26 +21,29 @@ #pragma once -#include <stddef.h> -#include <stdint.h> +#include <gen_cpp/data.pb.h> #include <algorithm> #include <boost/iterator/iterator_facade.hpp> #include <iterator> #include <memory> #include <optional> +#include <type_traits> #include <utility> -#include <vector> #include "agent/be_exec_version_manager.h" #include "common/compiler_util.h" +#include "common/exception.h" #include "util/binary_cast.hpp" +#include "util/simd/bits.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column_string.h" -#include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" +#include "vec/core/sort_block.h" +#include "vec/core/sort_description.h" #include "vec/core/types.h" +#include "vec/data_types/data_type_factory.hpp" #include "vec/data_types/data_type_number.h" #include "vec/io/var_int.h" #include "vec/runtime/vdatetime_value.h" @@ -72,8 +75,359 @@ WindowFunnelMode string_to_window_funnel_mode(const String& string) { } } -template <typename DateValueType, typename NativeType> +template <TypeIndex TYPE_INDEX, typename NativeType> struct WindowFunnelState { + using DateValueType = std::conditional_t<TYPE_INDEX == TypeIndex::DateTimeV2, + DateV2Value<DateTimeV2ValueType>, VecDateTimeValue>; + int event_count = 0; + int64_t window; + bool enable_mode; + WindowFunnelMode window_funnel_mode; + mutable vectorized::MutableBlock mutable_block; + ColumnVector<NativeType>::Container* timestamp_column_data; + std::vector<ColumnVector<UInt8>::Container*> event_columns_datas; + SortDescription sort_description {1}; + bool sorted; + + WindowFunnelState() { + event_count = 0; + window = 0; + window_funnel_mode = WindowFunnelMode::INVALID; + + sort_description[0].column_number = 0; + sort_description[0].direction = 1; + sort_description[0].nulls_direction = -1; + sorted = false; + } + WindowFunnelState(int arg_event_count) : WindowFunnelState() { + event_count = arg_event_count; + auto timestamp_column = ColumnVector<NativeType>::create(); + timestamp_column_data = + &assert_cast<ColumnVector<NativeType>&>(*timestamp_column).get_data(); + + MutableColumns event_columns; + for (int i = 0; i < event_count; i++) { + auto event_column = ColumnVector<UInt8>::create(); + event_columns_datas.emplace_back( + &assert_cast<ColumnVector<UInt8>&>(*event_column).get_data()); + event_columns.emplace_back(std::move(event_column)); + } + Block tmp_block; + tmp_block.insert({std::move(timestamp_column), + DataTypeFactory::instance().create_data_type(TYPE_INDEX), "timestamp"}); + for (int i = 0; i < event_count; i++) { + tmp_block.insert({std::move(event_columns[i]), + DataTypeFactory::instance().create_data_type(TypeIndex::UInt8), + "event_" + std::to_string(i)}); + } + + mutable_block = MutableBlock(std::move(tmp_block)); + } + + void reset() { + window = 0; + mutable_block.clear(); + timestamp_column_data = nullptr; + event_columns_datas.clear(); + sorted = false; + } + + void add(const IColumn** arg_columns, ssize_t row_num, int64_t win, WindowFunnelMode mode) { + window = win; + window_funnel_mode = enable_mode ? mode : WindowFunnelMode::DEFAULT; + + timestamp_column_data->push_back( + assert_cast<const ColumnVector<NativeType>&>(*arg_columns[2]).get_data()[row_num]); + for (int i = 0; i < event_count; i++) { + event_columns_datas[i]->push_back( + assert_cast<const ColumnVector<UInt8>&>(*arg_columns[3 + i]) + .get_data()[row_num]); + } + } + + void sort() { + if (sorted) { + return; + } + + Block tmp_block = mutable_block.to_block(); + auto block = tmp_block.clone_without_columns(); + sort_block(tmp_block, block, sort_description, 0); + mutable_block = MutableBlock(std::move(block)); + sorted = true; + } + + template <WindowFunnelMode WINDOW_FUNNEL_MODE> + int _match_event_list(size_t& start_row, size_t row_count, + const NativeType* timestamp_data) const { + int matched_count = 0; + DateValueType start_timestamp; + DateValueType end_timestamp; + TimeInterval interval(SECOND, window, false); + + int column_idx = 1; + const auto& first_event_column = mutable_block.get_column_by_position(column_idx); + const auto& first_event_data = + assert_cast<const ColumnVector<UInt8>&>(*first_event_column).get_data(); + auto match_row = simd::find_one(first_event_data.data(), start_row, row_count); + start_row = match_row + 1; + if (match_row < row_count) { + auto prev_timestamp = binary_cast<NativeType, DateValueType>(timestamp_data[match_row]); + end_timestamp = prev_timestamp; + end_timestamp.template date_add_interval<SECOND>(interval); + + matched_count++; + + column_idx++; + auto last_match_row = match_row; + for (; column_idx < event_count + 1; column_idx++) { + const auto& event_column = mutable_block.get_column_by_position(column_idx); + const auto& event_data = + assert_cast<const ColumnVector<UInt8>&>(*event_column).get_data(); + if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::FIXED) { + ++match_row; + if (event_data[match_row] == 1) { + auto current_timestamp = + binary_cast<NativeType, DateValueType>(timestamp_data[match_row]); + if (current_timestamp <= end_timestamp) { + matched_count++; + continue; + } + } + break; + } + match_row = simd::find_one(event_data.data(), match_row + 1, row_count); + if (match_row < row_count) { + auto current_timestamp = + binary_cast<NativeType, DateValueType>(timestamp_data[match_row]); + bool is_matched = current_timestamp <= end_timestamp; + if (is_matched) { + if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::INCREASE) { + is_matched = current_timestamp > prev_timestamp; + } + } + if (!is_matched) { + break; + } + if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::INCREASE) { + prev_timestamp = + binary_cast<NativeType, DateValueType>(timestamp_data[match_row]); + } + if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::DEDUPLICATION) { + bool is_dup = false; + if (match_row != last_match_row + 1) { + for (int tmp_column_idx = 1; tmp_column_idx < column_idx; + tmp_column_idx++) { + const auto& tmp_event_column = + mutable_block.get_column_by_position(tmp_column_idx); + const auto& tmp_event_data = + assert_cast<const ColumnVector<UInt8>&>(*tmp_event_column) + .get_data(); + auto dup_match_row = simd::find_one(tmp_event_data.data(), + last_match_row + 1, match_row); + if (dup_match_row < match_row) { + is_dup = true; + break; + } + } + } + if (is_dup) { + break; + } + last_match_row = match_row; + } + matched_count++; + } else { + break; + } + } + } + return matched_count; + } + + template <WindowFunnelMode WINDOW_FUNNEL_MODE> + int _get_internal() const { + size_t start_row = 0; + int max_found_event_count = 0; + const auto& ts_column = mutable_block.get_column_by_position(0)->get_ptr(); + const auto& timestamp_data = + assert_cast<const ColumnVector<NativeType>&>(*ts_column).get_data().data(); + + auto row_count = mutable_block.rows(); + while (start_row < row_count) { + auto found_event_count = + _match_event_list<WINDOW_FUNNEL_MODE>(start_row, row_count, timestamp_data); + if (found_event_count == event_count) { + return found_event_count; + } + max_found_event_count = std::max(max_found_event_count, found_event_count); + } + return max_found_event_count; + } + int get() const { + auto row_count = mutable_block.rows(); + if (event_count == 0 || row_count == 0) { + return 0; + } + switch (window_funnel_mode) { + case WindowFunnelMode::DEFAULT: + return _get_internal<WindowFunnelMode::DEFAULT>(); + case WindowFunnelMode::DEDUPLICATION: + return _get_internal<WindowFunnelMode::DEDUPLICATION>(); + case WindowFunnelMode::FIXED: + return _get_internal<WindowFunnelMode::FIXED>(); + case WindowFunnelMode::INCREASE: + return _get_internal<WindowFunnelMode::INCREASE>(); + default: + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid window_funnel mode"); + return 0; + } + } + + void merge(const WindowFunnelState& other) { + if (!other.mutable_block.empty()) { + auto st = mutable_block.merge(other.mutable_block.to_block()); + if (!st.ok()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, st.to_string()); + return; + } + } + + event_count = event_count > 0 ? event_count : other.event_count; + window = window > 0 ? window : other.window; + if (enable_mode) { + window_funnel_mode = window_funnel_mode == WindowFunnelMode::INVALID + ? other.window_funnel_mode + : window_funnel_mode; + } else { + window_funnel_mode = WindowFunnelMode::DEFAULT; + } + } + + void write(BufferWritable& out) const { + write_var_int(event_count, out); + write_var_int(window, out); + if (enable_mode) { + write_var_int(static_cast<std::underlying_type_t<WindowFunnelMode>>(window_funnel_mode), + out); + } + PBlock pblock; + size_t uncompressed_bytes = 0; + size_t compressed_bytes = 0; + Status status; + std::string buff; + Block block = mutable_block.to_block(); + status = block.serialize( + BeExecVersionManager::get_newest_version(), &pblock, &uncompressed_bytes, + &compressed_bytes, + segment_v2::CompressionTypePB::ZSTD); // ZSTD for better compression ratio + if (!status.ok()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, status.to_string()); + return; + } + if (!pblock.SerializeToString(&buff)) { + throw doris::Exception(ErrorCode::SERIALIZE_PROTOBUF_ERROR, + "Serialize window_funnel data"); + return; + } + auto data_bytes = buff.size(); + write_var_uint(data_bytes, out); + out.write(buff.data(), data_bytes); + } + + void read(BufferReadable& in) { + int64_t event_level; + read_var_int(event_level, in); + event_count = (int)event_level; + read_var_int(window, in); + window_funnel_mode = WindowFunnelMode::DEFAULT; + if (enable_mode) { + int64_t mode; + read_var_int(mode, in); + window_funnel_mode = static_cast<WindowFunnelMode>(mode); + } + size_t data_bytes = 0; + read_var_uint(data_bytes, in); + std::string buff; + buff.resize(data_bytes); + in.read(buff.data(), data_bytes); + + PBlock pblock; + if (!pblock.ParseFromArray(buff.data(), data_bytes)) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "Failed to parse window_funnel data to block"); + } + Block block; + auto status = block.deserialize(pblock); + if (!status.ok()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, status.to_string()); + } + mutable_block = MutableBlock(std::move(block)); + } +}; + +template <TypeIndex TYPE_INDEX, typename NativeType> +class AggregateFunctionWindowFunnel + : public IAggregateFunctionDataHelper< + WindowFunnelState<TYPE_INDEX, NativeType>, + AggregateFunctionWindowFunnel<TYPE_INDEX, NativeType>> { +public: + AggregateFunctionWindowFunnel(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper<WindowFunnelState<TYPE_INDEX, NativeType>, + AggregateFunctionWindowFunnel<TYPE_INDEX, NativeType>>( + argument_types_) {} + + void create(AggregateDataPtr __restrict place) const override { + auto data = new (place) WindowFunnelState<TYPE_INDEX, NativeType>( + IAggregateFunction::get_argument_types().size() - 3); + /// support window funnel mode from 2.0. See `BeExecVersionManager::max_be_exec_version` + data->enable_mode = version >= 3; + } + + String get_name() const override { return "window_funnel"; } + + DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt32>(); } + + void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } + + void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, + Arena*) const override { + const auto& window = + assert_cast<const ColumnVector<Int64>&>(*columns[0]).get_data()[row_num]; + StringRef mode = columns[1]->get_data_at(row_num); + this->data(place).add(columns, row_num, window, + string_to_window_funnel_mode(mode.to_string())); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, + Arena*) const override { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { + this->data(place).write(buf); + } + + void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, + Arena*) const override { + this->data(place).read(buf); + } + + void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { + this->data(const_cast<AggregateDataPtr>(place)).sort(); + assert_cast<ColumnInt32&>(to).get_data().push_back( + IAggregateFunctionDataHelper< + WindowFunnelState<TYPE_INDEX, NativeType>, + AggregateFunctionWindowFunnel<TYPE_INDEX, NativeType>>::data(place) + .get()); + } + +protected: + using IAggregateFunction::version; +}; + +template <typename DateValueType, typename NativeType> +struct WindowFunnelStateOld { std::vector<std::pair<DateValueType, int>> events; int max_event_level; bool sorted; @@ -81,7 +435,7 @@ struct WindowFunnelState { WindowFunnelMode window_funnel_mode; bool enable_mode; - WindowFunnelState() { + WindowFunnelStateOld() { sorted = true; max_event_level = 0; window = 0; @@ -185,7 +539,7 @@ struct WindowFunnelState { return 0; } - void merge(const WindowFunnelState& other) { + void merge(const WindowFunnelStateOld& other) { if (other.events.empty()) { return; } @@ -258,18 +612,19 @@ struct WindowFunnelState { }; template <typename DateValueType, typename NativeType> -class AggregateFunctionWindowFunnel +class AggregateFunctionWindowFunnelOld : public IAggregateFunctionDataHelper< - WindowFunnelState<DateValueType, NativeType>, - AggregateFunctionWindowFunnel<DateValueType, NativeType>> { + WindowFunnelStateOld<DateValueType, NativeType>, + AggregateFunctionWindowFunnelOld<DateValueType, NativeType>> { public: - AggregateFunctionWindowFunnel(const DataTypes& argument_types_) + AggregateFunctionWindowFunnelOld(const DataTypes& argument_types_) : IAggregateFunctionDataHelper< - WindowFunnelState<DateValueType, NativeType>, - AggregateFunctionWindowFunnel<DateValueType, NativeType>>(argument_types_) {} + WindowFunnelStateOld<DateValueType, NativeType>, + AggregateFunctionWindowFunnelOld<DateValueType, NativeType>>( + argument_types_) {} void create(AggregateDataPtr __restrict place) const override { - auto data = new (place) WindowFunnelState<DateValueType, NativeType>(); + auto data = new (place) WindowFunnelStateOld<DateValueType, NativeType>(); /// support window funnel mode from 2.0. See `BeExecVersionManager::max_be_exec_version` data->enable_mode = version >= 3; } @@ -318,8 +673,8 @@ public: this->data(const_cast<AggregateDataPtr>(place)).sort(); assert_cast<ColumnInt32&>(to).get_data().push_back( IAggregateFunctionDataHelper< - WindowFunnelState<DateValueType, NativeType>, - AggregateFunctionWindowFunnel<DateValueType, NativeType>>::data(place) + WindowFunnelStateOld<DateValueType, NativeType>, + AggregateFunctionWindowFunnelOld<DateValueType, NativeType>>::data(place) .get()); } diff --git a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp index bf5eacdadfa..5c4236814b3 100644 --- a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp +++ b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp @@ -146,11 +146,15 @@ TEST_F(VWindowFunnelTest, testSerialize) { for (int i = 0; i < NUM_CONDS; i++) { agg_function->add(place, column, i, nullptr); } + ColumnVector<Int32> column_result; + agg_function->insert_result_into(place, column_result); + EXPECT_EQ(column_result.get_data()[0], 3); ColumnString buf; VectorBufferWriter buf_writer(buf); agg_function->serialize(place, buf_writer); buf_writer.commit(); + agg_function->destroy(place); std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]); AggregateDataPtr place2 = memory2.get(); @@ -159,11 +163,6 @@ TEST_F(VWindowFunnelTest, testSerialize) { VectorBufferReader buf_reader(buf.get_data_at(0)); agg_function->deserialize(place2, buf_reader, nullptr); - ColumnVector<Int32> column_result; - agg_function->insert_result_into(place, column_result); - EXPECT_EQ(column_result.get_data()[0], 3); - agg_function->destroy(place); - ColumnVector<Int32> column_result2; agg_function->insert_result_into(place2, column_result2); EXPECT_EQ(column_result2.get_data()[0], 3); diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 406395efa38..602e5dffdb2 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1829,7 +1829,7 @@ public class Config extends ConfigBase { * Max data version of backends serialize block. */ @ConfField(mutable = false) - public static int max_be_exec_version = 4; + public static int max_be_exec_version = 5; /** * Min data version of backends serialize block. diff --git a/regression-test/data/nereids_p0/aggregate/window_funnel.out b/regression-test/data/nereids_p0/aggregate/window_funnel.out index 3396dd90e82..f16f050c8e6 100644 --- a/regression-test/data/nereids_p0/aggregate/window_funnel.out +++ b/regression-test/data/nereids_p0/aggregate/window_funnel.out @@ -26,3 +26,102 @@ -- !window_funnel_increase -- 2 +-- !window_funnel_neq -- +2 + +-- !window_funnel_default0 -- +100123 4 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_default1 -- +100123 3 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_default2 -- +100123 1 +100125 1 +100126 1 +100127 1 + +-- !window_funnel_default3 -- +100123 1 +100125 1 +100126 1 +100127 1 + +-- !window_funnel_default4 -- +100123 2 +100125 2 +100126 2 +100127 2 + +-- !window_funnel_default5 -- +100123 1 +100125 1 +100126 1 +100127 1 + +-- !window_funnel_default6 -- +100123 4 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_default7 -- +100123 2 +100125 2 +100126 1 +100127 1 + +-- !window_funnel_default8 -- +100123 4 +100125 2 +100126 0 +100127 1 + +-- !window_funnel_default9 -- +100123 4 +100125 3 +100126 4 +100127 2 + +-- !window_funnel_deduplication0 -- +100123 3 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_deduplication1 -- +100123 3 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_deduplication2 -- +100123 3 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_fixed0 -- +100123 2 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_fixed1 -- +100123 2 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_increase0 -- +100123 3 +100125 3 +100126 2 +100127 2 + diff --git a/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out b/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out index 7d7a2488245..43948dc7951 100644 --- a/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out +++ b/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out @@ -77,3 +77,8 @@ -- !window_funnel_25 -- 1 +-- !window_funnel_26 -- +users 13 +browser 10 +buy 1 + diff --git a/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy b/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy index 02562c49f48..5e4eeba7c14 100644 --- a/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy +++ b/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy @@ -294,4 +294,365 @@ suite("window_funnel") { from ${tableName} t; """ sql """ DROP TABLE IF EXISTS ${tableName} """ + + sql """ + CREATE TABLE windowfunnel_test ( + `xwho` varchar(50) NULL COMMENT 'xwho', + `xwhen` datetime COMMENT 'xwhen', + `xwhat` int NULL COMMENT 'xwhat' + ) + DUPLICATE KEY(xwho) + DISTRIBUTED BY HASH(xwho) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ + INSERT into windowfunnel_test (xwho, xwhen, xwhat) values ('1', '2022-03-12 10:41:00', 1), + ('1', '2022-03-12 13:28:02', 2), + ('1', '2022-03-12 16:15:01', 3), + ('1', '2022-03-12 19:05:04', 4); + """ + qt_window_funnel_neq """ + select window_funnel(3600 * 24, 'default', t.xwhen, t.xwhat = 1, t.xwhat != 2,t.xwhat=3 ) AS level from windowfunnel_test t; + """ + + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 3), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + // test default mode + qt_window_funnel_default0 """ + SELECT + user_id, + window_funnel(3600 * 3, "default", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + // in 5 minutes + qt_window_funnel_default1 """ + SELECT + user_id, + window_funnel(300, "default", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + // in 30 seconds + qt_window_funnel_default2 """ + SELECT + user_id, + window_funnel(30, "default", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + qt_window_funnel_default3 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name = '登录',event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + qt_window_funnel_default4 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name = '访问',event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + qt_window_funnel_default5 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name = '登录', event_name = '登录', event_name = '登录', event_name = '登录',event_name = '登录', event_name = '登录') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + // complicate expressions + qt_window_funnel_default6 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name != '登陆', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id; + """ + qt_window_funnel_default7 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name != '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id; + """ + qt_window_funnel_default8 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, + event_name = '登录' AND phone_brand in ('HONOR', 'XIAOMI', 'VIVO') AND tab_num not in (4, 5), + event_name = '访问' AND tab_num not in (4, 5), + event_name = '下单' AND tab_num not in (6, 7), + event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id; + """ + + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + // test multiple matched event list, output the longest match + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 3), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5), + (100123, '登录', '2022-05-14 13:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 13:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 13:04:00', "HONOR", 3), + (100123, '付款', '2022-05-14 13:10:00', 'HONOR', 4), + (100126, '登录', '2022-05-15 14:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 14:01:00', 'HONOR', 2), + (100126, '下单', '2022-05-15 14:02:00', 'HONOR', 3), + (100126, '付款', '2022-05-15 14:03:00', 'HONOR', 4); + """ + qt_window_funnel_default9 """ + SELECT + user_id, + window_funnel(3600, "default", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + + // test deduplication mode + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '登录', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_deduplication0 """ + SELECT + user_id, + window_funnel(3600, "deduplication", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + sql """ truncate table windowfunnel_test; """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '访问', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_deduplication1 """ + SELECT + user_id, + window_funnel(3600, "deduplication", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + sql """ truncate table windowfunnel_test; """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '下单', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_deduplication2 """ + SELECT + user_id, + window_funnel(3600, "deduplication", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + + + // test fixed mode + sql """ truncate table windowfunnel_test; """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '登录', '2022-05-14 10:03:00', 'HONOR', 3), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_fixed0 """ + SELECT + user_id, + window_funnel(3600, "fixed", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '登录2', '2022-05-14 10:03:00', 'HONOR', 3), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_fixed1 """ + SELECT + user_id, + window_funnel(3600, "fixed", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + + // test increase mode + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '付款', '2022-05-14 10:04:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_increase0 """ + SELECT + user_id, + window_funnel(3600, "increase", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + } diff --git a/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql b/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql index 850be3e4f4b..00fc65c1cc8 100644 --- a/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql +++ b/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql @@ -53,41 +53,40 @@ insert into user_analysis values (1000012,'browse','2022-07-28 00:00:00'); insert into user_analysis values (1000013,'browse','2022-07-29 00:00:00'); insert into user_analysis values (1000014,'browse','2022-07-30 00:00:00'); insert into user_analysis values (1000015,'browse','2022-07-31 00:00:00'); ---- Nereids does't support window function ---- WITH ---- level_detail AS ( ---- SELECT ---- level ---- ,COUNT(1) AS count_user ---- FROM ( ---- SELECT ---- user_id ---- ,window_funnel( ---- 1800 ---- ,'default' ---- ,event_time ---- ,event_type = 'browse' ---- ,event_type = 'favorite' ---- ,event_type = 'shopping cart' ---- ,event_type = 'buy' ---- ) AS level ---- FROM user_analysis ---- WHERE event_time >= TIMESTAMP '2022-07-17 00:00:00' ---- AND event_time < TIMESTAMP '2022-07-31 00:00:00' ---- GROUP BY user_id ---- ) AS basic_table ---- GROUP BY level ---- ORDER BY level ASC ) ---- SELECT CASE level WHEN 0 THEN 'users' ---- WHEN 1 THEN 'browser' ---- WHEN 2 THEN 'favorite' ---- WHEN 3 THEN 'shopping cart' ---- WHEN 4 THEN 'buy' ---- END ---- ,SUM(count_user) over ( ORDER BY level DESC ) ---- FROM level_detail ---- GROUP BY level ---- ,count_user ---- ORDER BY level ASC; +WITH + level_detail AS ( + SELECT + level + ,COUNT(1) AS count_user + FROM ( + SELECT + user_id + ,window_funnel( + 1800 + ,'default' + ,event_time + ,event_type = 'browse' + ,event_type = 'favorite' + ,event_type = 'shopping cart' + ,event_type = 'buy' + ) AS level + FROM user_analysis + WHERE event_time >= TIMESTAMP '2022-07-17 00:00:00' + AND event_time < TIMESTAMP '2022-07-31 00:00:00' + GROUP BY user_id + ) AS basic_table + GROUP BY level + ORDER BY level ASC ) +SELECT CASE level WHEN 0 THEN 'users' + WHEN 1 THEN 'browser' + WHEN 2 THEN 'favorite' + WHEN 3 THEN 'shopping cart' + WHEN 4 THEN 'buy' + END + ,SUM(count_user) over ( ORDER BY level DESC ) +FROM level_detail +GROUP BY level + ,count_user +ORDER BY level ASC; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org