This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7a2c1b8299d [Function](Exec) Support windows function cume_dist (#30997) 7a2c1b8299d is described below commit 7a2c1b8299d904cb2e1cf8da14fc2c19b52746dc Author: nanfeng <nanfeng_...@163.com> AuthorDate: Sat Feb 10 11:24:58 2024 +0800 [Function](Exec) Support windows function cume_dist (#30997) --- .../aggregate_function_window.cpp | 1 + .../aggregate_function_window.h | 53 +++++++ .../window-functions/window-function-cume-dist.md | 71 +++++++++ docs/sidebars.json | 1 + .../window-functions/window-function-cume-dist.md | 71 +++++++++ .../doris/catalog/BuiltinWindowFunctions.java | 4 +- .../java/org/apache/doris/catalog/FunctionSet.java | 8 ++ .../rules/analysis/WindowFunctionChecker.java | 13 ++ .../expressions/functions/window/CumeDist.java | 61 ++++++++ .../expressions/visitor/WindowFunctionVisitor.java | 5 + .../window_function/window_fn.out | 158 ++++++++++++++------- .../window_functions/test_window_fn.out | 32 +++-- .../window_functions/test_window_function.out | 23 ++- .../window_function/window_fn.groovy | 12 +- .../window_functions/test_window_fn.groovy | 3 + .../window_functions/test_window_function.groovy | 2 + 16 files changed, 450 insertions(+), 68 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.cpp b/be/src/vec/aggregate_functions/aggregate_function_window.cpp index ce82651b3fe..44575588187 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_window.cpp @@ -106,6 +106,7 @@ void register_aggregate_function_window_rank(AggregateFunctionSimpleFactory& fac creator_without_type::creator<WindowFunctionPercentRank>); factory.register_function("row_number", creator_without_type::creator<WindowFunctionRowNumber>); factory.register_function("ntile", creator_without_type::creator<WindowFunctionNTile>); + factory.register_function("cume_dist", creator_without_type::creator<WindowFunctionCumeDist>); } void register_aggregate_function_window_lead_lag_first_last( diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index 7dbe13a840e..5ce46495464 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -231,6 +231,59 @@ public: void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} }; +struct CumeDistData { + int64_t numerator = 0; + int64_t denominator = 0; + int64_t peer_group_start = 0; +}; + +class WindowFunctionCumeDist final + : public IAggregateFunctionDataHelper<CumeDistData, WindowFunctionCumeDist> { +private: + static void check_default(AggregateDataPtr place, int64_t partition_start, + int64_t partition_end) { + if (data(place).denominator == 0) { + data(place).denominator = partition_end - partition_start; + } + } + +public: + WindowFunctionCumeDist(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper(argument_types_) {} + + String get_name() const override { return "cume_dist"; } + + DataTypePtr get_return_type() const override { return std::make_shared<DataTypeFloat64>(); } + + void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override {} + + void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, + int64_t frame_end, AggregateDataPtr place, const IColumn** columns, + Arena* arena) const override { + check_default(place, partition_start, partition_end); + int64_t peer_group_count = frame_end - frame_start; + if (WindowFunctionCumeDist::data(place).peer_group_start != frame_start) { + WindowFunctionCumeDist::data(place).peer_group_start = frame_start; + WindowFunctionCumeDist::data(place).numerator += peer_group_count; + } + } + + void reset(AggregateDataPtr place) const override { + WindowFunctionCumeDist::data(place).numerator = 0; + WindowFunctionCumeDist::data(place).denominator = 0; + WindowFunctionCumeDist::data(place).peer_group_start = -1; + } + + void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override { + auto cume_dist = data(place).numerator * 1.0 / data(place).denominator; + assert_cast<ColumnFloat64&>(to).get_data().push_back(cume_dist); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {} + void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {} + void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} +}; + struct NTileData { int64_t bucket_index = 0; int64_t rows = 0; diff --git a/docs/en/docs/sql-manual/sql-functions/window-functions/window-function-cume-dist.md b/docs/en/docs/sql-manual/sql-functions/window-functions/window-function-cume-dist.md new file mode 100644 index 00000000000..434428875c7 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/window-functions/window-function-cume-dist.md @@ -0,0 +1,71 @@ +--- +{ + "title": "WINDOW_FUNCTION_CUME_DIST", + "language": "en" +} +--- + +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +## WINDOW FUNCTION CUME_DIST +### description + +CUME_DIST (Cumulative Distribution) is a window function commonly used to calculate the relative ranking of the current row value within a sorted result set. It returns the percentage ranking of the current row value in the result set, i.e., the ratio of the number of rows less than or equal to the current row value to the total number of rows in the result set after sorting. + +```sql +CUME_DIST() OVER(partition_by_clause order_by_clause) +``` + +### example +Suppose there is a table named sales containing sales data, including salesperson name (sales_person), sales amount (sales_amount), and sales date (sales_date). We want to calculate the cumulative percentage of sales amount for each salesperson on each sales date compared to the total sales amount for that day. +```sql +SELECT + sales_person, + sales_date, + sales_amount, + CUME_DIST() OVER (PARTITION BY sales_date ORDER BY sales_amount ASC) AS cumulative_sales_percentage +FROM + sales; +``` + +Suppose the data in the sales table is as follows: + +```sql ++------+--------------+------------+--------------+ +| id | sales_person | sales_date | sales_amount | ++------+--------------+------------+--------------+ +| 1 | Alice | 2024-02-01 | 2000 | +| 2 | Bob | 2024-02-01 | 1500 | +| 3 | Alice | 2024-02-02 | 1800 | +| 4 | Bob | 2024-02-02 | 1200 | +| 5 | Alice | 2024-02-03 | 2200 | +| 6 | Bob | 2024-02-03 | 1900 | +| 7 | Tom | 2024-02-03 | 2000 | +| 8 | Jerry | 2024-02-03 | 2000 | ++------+--------------+------------+--------------+ +``` + +After executing the above SQL query, the result will display the sales amount for each salesperson on each sales date and their cumulative percentage ranking for that sales date. +```sql ++--------------+------------+--------------+-----------------------------+ +| sales_person | sales_date | sales_amount | cumulative_sales_percentage | ++--------------+------------+--------------+-----------------------------+ +| Bob | 2024-02-01 | 1500 | 0.5 | +| Alice | 2024-02-01 | 2000 | 1 | +| Bob | 2024-02-02 | 1200 | 0.5 | +| Alice | 2024-02-02 | 1800 | 1 | +| Bob | 2024-02-03 | 1900 | 0.25 | +| Tom | 2024-02-03 | 2000 | 0.75 | +| Jerry | 2024-02-03 | 2000 | 0.75 | +| Alice | 2024-02-03 | 2200 | 1 | ++--------------+------------+--------------+-----------------------------+ +``` +In this example, the CUME_DIST() function sorts the sales amount for each sales date and then calculates the cumulative percentage of sales amount for each salesperson on that date compared to the total sales amount for that day. Since we use PARTITION BY sales_date, the calculation is done within each sales date, and the sales amount for salespersons on different dates is calculated separately. +### keywords + + WINDOW,FUNCTION,CUME_DIST + diff --git a/docs/sidebars.json b/docs/sidebars.json index 7ee8bb453eb..fd429fbfa3c 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -802,6 +802,7 @@ "sql-manual/sql-functions/window-functions/window-function-rank", "sql-manual/sql-functions/window-functions/window-function-dense-rank", "sql-manual/sql-functions/window-functions/window-function-percent-rank", + "sql-manual/sql-functions/window-functions/window-function-cume-dist", "sql-manual/sql-functions/window-functions/window-function-first-value", "sql-manual/sql-functions/window-functions/window-function-last-value", "sql-manual/sql-functions/window-functions/window-function-lead", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/window-function-cume-dist.md b/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/window-function-cume-dist.md new file mode 100644 index 00000000000..8d1ddea9e8e --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/window-function-cume-dist.md @@ -0,0 +1,71 @@ +--- +{ + "title": "WINDOW_FUNCTION_CUME_DIST", + "language": "zh-CN" +} +--- + +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +## WINDOW FUNCTION CUME_DIST +### description + +CUME_DIST (Cumulative Distribution) 是一种窗口函数,它常用于计算当前行值在排序后结果集中的相对排名。它返回的是当前行值在结果集中的百分比排名,即在排序后的结果中小于或等于当前行值的行数与结果集总行数的比例。 + +```sql +CUME_DIST() OVER(partition_by_clause order_by_clause) +``` + +### example +假设有一个表格 sales 包含销售数据,其中包括销售员姓名 (sales_person)、销售额 (sales_amount) 和销售日期 (sales_date)。我们想要计算每个销售员在每个销售日期的销售额占当日总销售额的累积百分比。 + +```sql +SELECT + sales_person, + sales_date, + sales_amount, + CUME_DIST() OVER (PARTITION BY sales_date ORDER BY sales_amount ASC) AS cumulative_sales_percentage +FROM + sales; +``` + +假设表格 sales 中的数据如下: +```sql ++------+--------------+------------+--------------+ +| id | sales_person | sales_date | sales_amount | ++------+--------------+------------+--------------+ +| 1 | Alice | 2024-02-01 | 2000 | +| 2 | Bob | 2024-02-01 | 1500 | +| 3 | Alice | 2024-02-02 | 1800 | +| 4 | Bob | 2024-02-02 | 1200 | +| 5 | Alice | 2024-02-03 | 2200 | +| 6 | Bob | 2024-02-03 | 1900 | +| 7 | Tom | 2024-02-03 | 2000 | +| 8 | Jerry | 2024-02-03 | 2000 | ++------+--------------+------------+--------------+ +``` +执行上述 SQL 查询后,结果将显示每个销售员在每个销售日期的销售额以及其在该销售日期的累积百分比排名。 +```sql ++--------------+------------+--------------+-----------------------------+ +| sales_person | sales_date | sales_amount | cumulative_sales_percentage | ++--------------+------------+--------------+-----------------------------+ +| Bob | 2024-02-01 | 1500 | 0.5 | +| Alice | 2024-02-01 | 2000 | 1 | +| Bob | 2024-02-02 | 1200 | 0.5 | +| Alice | 2024-02-02 | 1800 | 1 | +| Bob | 2024-02-03 | 1900 | 0.25 | +| Tom | 2024-02-03 | 2000 | 0.75 | +| Jerry | 2024-02-03 | 2000 | 0.75 | +| Alice | 2024-02-03 | 2200 | 1 | ++--------------+------------+--------------+-----------------------------+ +``` +在这个例子中,CUME_DIST() 函数根据每个销售日期对销售额进行排序,然后计算每个销售员在该销售日期的销售额占当日总销售额的累积百分比。由于我们使用了 PARTITION BY sales_date,所以计算是在每个销售日期内进行的,销售员在不同日期的销售额被分别计算。 + +### keywords + + WINDOW,FUNCTION,CUME_DIST + diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java index a23088d25f3..b35903d29fc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java @@ -17,6 +17,7 @@ package org.apache.doris.catalog; +import org.apache.doris.nereids.trees.expressions.functions.window.CumeDist; import org.apache.doris.nereids.trees.expressions.functions.window.DenseRank; import org.apache.doris.nereids.trees.expressions.functions.window.FirstValue; import org.apache.doris.nereids.trees.expressions.functions.window.Lag; @@ -46,7 +47,8 @@ public class BuiltinWindowFunctions implements FunctionHelper { window(Ntile.class, "ntile"), window(PercentRank.class, "percent_rank"), window(Rank.class, "rank"), - window(RowNumber.class, "row_number") + window(RowNumber.class, "row_number"), + window(CumeDist.class, "cume_dist") ); public static final BuiltinWindowFunctions INSTANCE = new BuiltinWindowFunctions(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 7a16f365465..e2dd59d680c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1608,6 +1608,14 @@ public class FunctionSet<T> { null, "", "", true)); + // vec cume_dist + addBuiltin(AggregateFunction.createAnalyticBuiltin("cume_dist", + Lists.<Type>newArrayList(), Type.DOUBLE, Type.VARCHAR, + "", + "", + null, + "", + "", true)); //vec row_number addBuiltin(AggregateFunction.createAnalyticBuiltin("row_number", new ArrayList<Type>(), Type.BIGINT, Type.BIGINT, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java index 3f805327b22..036b6be8b20 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java @@ -26,6 +26,7 @@ import org.apache.doris.nereids.trees.expressions.WindowFrame; import org.apache.doris.nereids.trees.expressions.WindowFrame.FrameBoundType; import org.apache.doris.nereids.trees.expressions.WindowFrame.FrameBoundary; import org.apache.doris.nereids.trees.expressions.WindowFrame.FrameUnitsType; +import org.apache.doris.nereids.trees.expressions.functions.window.CumeDist; import org.apache.doris.nereids.trees.expressions.functions.window.DenseRank; import org.apache.doris.nereids.trees.expressions.functions.window.FirstOrLastValue; import org.apache.doris.nereids.trees.expressions.functions.window.FirstValue; @@ -387,6 +388,18 @@ public class WindowFunctionChecker extends DefaultExpressionVisitor<Expression, return rowNumber; } + /** + * required WindowFrame: (RANGE, UNBOUNDED PRECEDING, CURRENT ROW) + */ + @Override + public CumeDist visitCumeDist(CumeDist cumeDist, Void ctx) { + WindowFrame requiredFrame = new WindowFrame(FrameUnitsType.RANGE, + FrameBoundary.newPrecedingBoundary(), FrameBoundary.newCurrentRowBoundary()); + + checkAndCompleteWindowFrame(requiredFrame, cumeDist.getName()); + return cumeDist; + } + /** * required WindowFrame: (ROWS, UNBOUNDED PRECEDING, CURRENT ROW) */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/CumeDist.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/CumeDist.java new file mode 100644 index 00000000000..fde939f95f1 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/CumeDist.java @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.window; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; +import org.apache.doris.nereids.trees.expressions.shape.LeafExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.DoubleType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * Window function: cume_dist() + */ +public class CumeDist extends WindowFunction implements AlwaysNotNullable, LeafExpression { + + public CumeDist() { + super("cume_dist"); + } + + @Override + public List<FunctionSignature> getSignatures() { + return ImmutableList.of(FunctionSignature.ret(DoubleType.INSTANCE).args()); + } + + @Override + public FunctionSignature searchSignature(List<FunctionSignature> signatures) { + return signatures.get(0); + } + + @Override + public CumeDist withChildren(List<Expression> children) { + Preconditions.checkArgument(children.size() == 0); + return new CumeDist(); + } + + @Override + public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) { + return visitor.visitCumeDist(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java index 29bbed53225..90782adc8c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.trees.expressions.visitor; +import org.apache.doris.nereids.trees.expressions.functions.window.CumeDist; import org.apache.doris.nereids.trees.expressions.functions.window.DenseRank; import org.apache.doris.nereids.trees.expressions.functions.window.FirstValue; import org.apache.doris.nereids.trees.expressions.functions.window.Lag; @@ -68,4 +69,8 @@ public interface WindowFunctionVisitor<R, C> { default R visitRowNumber(RowNumber rowNumber, C context) { return visitWindowFunction(rowNumber, context); } + + default R visitCumeDist(CumeDist cumeDist, C context) { + return visitWindowFunction(cumeDist, context); + } } diff --git a/regression-test/data/nereids_function_p0/window_function/window_fn.out b/regression-test/data/nereids_function_p0/window_function/window_fn.out index 8f51aa3f3c0..843598a6c1d 100644 --- a/regression-test/data/nereids_function_p0/window_function/window_fn.out +++ b/regression-test/data/nereids_function_p0/window_function/window_fn.out @@ -3538,62 +3538,120 @@ string3 9 3 string3 12 4 -- !sql_percent_rank_pb -- -null 0 -string1 0 -string1 0 -string1 0 -string1 0 -string2 0 -string2 0 -string2 0 -string2 0 -string3 0 -string3 0 -string3 0 -string3 0 +null 0.0 +string1 0.0 +string1 0.0 +string1 0.0 +string1 0.0 +string2 0.0 +string2 0.0 +string2 0.0 +string2 0.0 +string3 0.0 +string3 0.0 +string3 0.0 +string3 0.0 -- !sql_percent_rank_pb_ob -- -null \N 0 -string1 1 0 -string1 4 0.33333333333333331 -string1 7 0.66666666666666663 -string1 10 1 -string2 2 0 -string2 5 0.33333333333333331 -string2 8 0.66666666666666663 -string2 11 1 -string3 3 0 -string3 6 0.33333333333333331 -string3 9 0.66666666666666663 -string3 12 1 +null \N 0.0 +string1 1 0.0 +string1 4 0.3333333333333333 +string1 7 0.6666666666666666 +string1 10 1.0 +string2 2 0.0 +string2 5 0.3333333333333333 +string2 8 0.6666666666666666 +string2 11 1.0 +string3 3 0.0 +string3 6 0.3333333333333333 +string3 9 0.6666666666666666 +string3 12 1.0 -- !sql_percent_rank_pb_notnull -- -string1 0 -string1 0 -string1 0 -string1 0 -string2 0 -string2 0 -string2 0 -string2 0 -string3 0 -string3 0 -string3 0 -string3 0 +string1 0.0 +string1 0.0 +string1 0.0 +string1 0.0 +string2 0.0 +string2 0.0 +string2 0.0 +string2 0.0 +string3 0.0 +string3 0.0 +string3 0.0 +string3 0.0 -- !sql_percent_rank_pb_ob_notnull -- -string1 1 0 -string1 4 0.33333333333333331 -string1 7 0.66666666666666663 -string1 10 1 -string2 2 0 -string2 5 0.33333333333333331 -string2 8 0.66666666666666663 -string2 11 1 -string3 3 0 -string3 6 0.33333333333333331 -string3 9 0.66666666666666663 -string3 12 1 +string1 1 0.0 +string1 4 0.3333333333333333 +string1 7 0.6666666666666666 +string1 10 1.0 +string2 2 0.0 +string2 5 0.3333333333333333 +string2 8 0.6666666666666666 +string2 11 1.0 +string3 3 0.0 +string3 6 0.3333333333333333 +string3 9 0.6666666666666666 +string3 12 1.0 + +-- !sql_cume_dist_pb -- +null 1.0 +string1 1.0 +string1 1.0 +string1 1.0 +string1 1.0 +string2 1.0 +string2 1.0 +string2 1.0 +string2 1.0 +string3 1.0 +string3 1.0 +string3 1.0 +string3 1.0 + +-- !sql_cume_dist_pb_ob -- +null \N 1.0 +string1 1 0.25 +string1 4 0.5 +string1 7 0.75 +string1 10 1.0 +string2 2 0.25 +string2 5 0.5 +string2 8 0.75 +string2 11 1.0 +string3 3 0.25 +string3 6 0.5 +string3 9 0.75 +string3 12 1.0 + +-- !sql_cume_dist_pb_notnull -- +string1 1.0 +string1 1.0 +string1 1.0 +string1 1.0 +string2 1.0 +string2 1.0 +string2 1.0 +string2 1.0 +string3 1.0 +string3 1.0 +string3 1.0 +string3 1.0 + +-- !sql_cume_dist_pb_ob_notnull -- +string1 1 0.25 +string1 4 0.5 +string1 7 0.75 +string1 10 1.0 +string2 2 0.25 +string2 5 0.5 +string2 8 0.75 +string2 11 1.0 +string3 3 0.25 +string3 6 0.5 +string3 9 0.75 +string3 12 1.0 -- !sql_row_number_pb -- null 1 diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_window_fn.out b/regression-test/data/query_p0/sql_functions/window_functions/test_window_fn.out index f3c52109632..ebb0ebed2f2 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_window_fn.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_window_fn.out @@ -330,16 +330,28 @@ develop 11 2007-08-15 5200 9 2 3 3 -- !sql -- -0 0 0 -0 0 0 -1 4 0 -0 1 1 -0 1 1 -0.66666666666666663 7 1 -1 9 1 -0 0 2 -0 1 3 -1 3 3 +0.0 0 0 +0.0 0 0 +1.0 4 0 +0.0 1 1 +0.0 1 1 +0.6666666666666666 7 1 +1.0 9 1 +0.0 0 2 +0.0 1 3 +1.0 3 3 + +-- !sql -- +0.6666666666666666 0 0 +0.6666666666666666 0 0 +1.0 4 0 +0.5 1 1 +0.5 1 1 +0.75 7 1 +1.0 9 1 +1.0 0 2 +0.5 1 3 +1.0 3 3 -- !sql -- 0 9976146 4 diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out b/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out index adcee782506..5cfe5a4280b 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out @@ -206,15 +206,26 @@ JDR 2014-10-08T00:00:00.111111 13.98 14.75 3 2 2 -- !sql -- -1 1 0 +1 1 0.0 1 2 0.5 1 2 0.5 -2 1 0 +2 1 0.0 2 2 0.5 -2 3 1 -3 1 0 -3 1 0 -3 2 1 +2 3 1.0 +3 1 0.0 +3 1 0.0 +3 2 1.0 + +-- !sql -- +1 1 0.3333333333333333 +1 2 1.0 +1 2 1.0 +2 1 0.3333333333333333 +2 2 0.6666666666666666 +2 3 1.0 +3 1 0.6666666666666666 +3 1 0.6666666666666666 +3 2 1.0 -- !sql -- 1 1 1 diff --git a/regression-test/suites/nereids_function_p0/window_function/window_fn.groovy b/regression-test/suites/nereids_function_p0/window_function/window_fn.groovy index 1d66e5ad82b..903c59d2282 100644 --- a/regression-test/suites/nereids_function_p0/window_function/window_fn.groovy +++ b/regression-test/suites/nereids_function_p0/window_function/window_fn.groovy @@ -650,6 +650,16 @@ suite("nereids_win_fn") { qt_sql_percent_rank_pb_ob_notnull ''' select kstr, kint, percent_rank() over(partition by kstr order by kint) as wf from fn_test_not_nullable order by kstr, kint''' + qt_sql_cume_dist_pb ''' + select kstr, cume_dist() over(partition by kstr) as wf from fn_test order by kstr''' + qt_sql_cume_dist_pb_ob ''' + select kstr, kint, cume_dist() over(partition by kstr order by kint) as wf from fn_test order by kstr, kint''' + + qt_sql_cume_dist_pb_notnull ''' + select kstr, cume_dist() over(partition by kstr) as wf from fn_test_not_nullable order by kstr''' + qt_sql_cume_dist_pb_ob_notnull ''' + select kstr, kint, cume_dist() over(partition by kstr order by kint) as wf from fn_test_not_nullable order by kstr, kint''' + // qt_sql_row_number ''' // select row_number() over() as wf from fn_test''' // qt_sql_row_number_ob ''' @@ -667,4 +677,4 @@ suite("nereids_win_fn") { qt_sql_row_number_pb_ob_notnull ''' select kstr, kint, row_number() over(partition by kstr order by kint) as wf from fn_test_not_nullable order by kstr, kint''' -} \ No newline at end of file +} diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy index ab35e974b15..165d5cf1d42 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy @@ -220,6 +220,9 @@ suite("test_window_fn", "arrow_flight_sql") { qt_sql """ SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten; """ + qt_sql """ + SELECT cume_dist() OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten; + """ qt_sql """ select ten, sum(unique1) + sum(unique2) as res, rank() over (order by sum(unique1) + sum(unique2)) as rank from ${tbName2} group by ten order by ten; """ diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy index 1575358b7b0..17a21e93502 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy @@ -291,6 +291,8 @@ suite("test_window_function") { qt_sql """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1) */ x, y, dense_rank() over(partition by x order by y) as rank from ${windowFunctionTable3} order by x, y; """ // PERCENT_RANK qt_sql """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1) */ x, y, percent_rank() over(partition by x order by y) as rank from ${windowFunctionTable3} order by x, y; """ + // CUME_DIST + qt_sql """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1) */ x, y, cume_dist() over(partition by x order by y) as rank from ${windowFunctionTable3} order by x, y; """ // ROW_NUMBER qt_sql """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1) */ x, y, row_number() over(partition by x order by y) as rank from ${windowFunctionTable3} order by x, y; """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org