This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit f17449b1ab298a457ca32e673339722db04b1970 Author: yangshijie <sjyang2...@zju.edu.cn> AuthorDate: Thu Feb 8 14:13:34 2024 +0800 [feature](window-func) support percent_rank window function (#30926) --- be/src/pipeline/exec/analytic_source_operator.cpp | 4 +- .../aggregate_function_window.cpp | 2 + .../aggregate_function_window.h | 59 +++++++++- be/src/vec/exec/vanalytic_eval_node.cpp | 2 +- .../window-function-percent-rank.md | 128 +++++++++++++++++++++ docs/sidebars.json | 1 + .../window-function-percent-rank.md | 128 +++++++++++++++++++++ .../doris/catalog/BuiltinWindowFunctions.java | 2 + .../java/org/apache/doris/catalog/FunctionSet.java | 16 +++ .../rules/analysis/WindowFunctionChecker.java | 13 +++ .../expressions/functions/window/PercentRank.java | 61 ++++++++++ .../expressions/visitor/WindowFunctionVisitor.java | 5 + .../window_function/window_fn.out | 58 ++++++++++ .../window_functions/test_window_fn.out | 12 ++ .../window_functions/test_window_function.out | 11 ++ .../window_function/window_fn.groovy | 10 ++ .../window_functions/test_window_fn.groovy | 3 + .../window_functions/test_window_function.groovy | 2 + 18 files changed, 513 insertions(+), 4 deletions(-) diff --git a/be/src/pipeline/exec/analytic_source_operator.cpp b/be/src/pipeline/exec/analytic_source_operator.cpp index 0642b4b76bd..f78030f7c17 100644 --- a/be/src/pipeline/exec/analytic_source_operator.cpp +++ b/be/src/pipeline/exec/analytic_source_operator.cpp @@ -390,8 +390,8 @@ Status AnalyticLocalState::_get_next_for_range(size_t current_block_rows) { _window_end_position < current_block_rows) { if (_shared_state->current_row_position >= _order_by_end.pos) { _update_order_by_range(); - _executor.execute(_order_by_start.pos, _order_by_end.pos, _order_by_start.pos, - _order_by_end.pos); + _executor.execute(_partition_by_start.pos, _shared_state->partition_by_end.pos, + _order_by_start.pos, _order_by_end.pos); } _executor.insert_result(current_block_rows); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.cpp b/be/src/vec/aggregate_functions/aggregate_function_window.cpp index 2b260e6baee..ce82651b3fe 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_window.cpp @@ -102,6 +102,8 @@ CREATE_WINDOW_FUNCTION_WITH_NAME_AND_DATA(create_aggregate_function_window_last, void register_aggregate_function_window_rank(AggregateFunctionSimpleFactory& factory) { factory.register_function("dense_rank", creator_without_type::creator<WindowFunctionDenseRank>); factory.register_function("rank", creator_without_type::creator<WindowFunctionRank>); + factory.register_function("percent_rank", + creator_without_type::creator<WindowFunctionPercentRank>); factory.register_function("row_number", creator_without_type::creator<WindowFunctionRowNumber>); factory.register_function("ntile", creator_without_type::creator<WindowFunctionNTile>); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index 808b48c4e36..7dbe13a840e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -137,6 +137,7 @@ struct DenseRankData { int64_t rank = 0; int64_t peer_group_start = 0; }; + class WindowFunctionDenseRank final : public IAggregateFunctionDataHelper<DenseRankData, WindowFunctionDenseRank> { public: @@ -174,6 +175,62 @@ public: void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} }; +struct PercentRankData { + int64_t rank = 0; + int64_t count = 0; + int64_t peer_group_start = 0; + int64_t partition_size = 0; +}; + +class WindowFunctionPercentRank final + : public IAggregateFunctionDataHelper<PercentRankData, WindowFunctionPercentRank> { +private: + static double _cal_percent(int64 rank, int64 total_rows) { + return total_rows <= 1 ? 0.0 : (rank - 1) * 1.0 / (total_rows - 1); + } + +public: + WindowFunctionPercentRank(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper(argument_types_) {} + + String get_name() const override { return "percent_rank"; } + + DataTypePtr get_return_type() const override { return std::make_shared<DataTypeFloat64>(); } + + void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override {} + + void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, + int64_t frame_end, AggregateDataPtr place, const IColumn** columns, + Arena* arena) const override { + int64_t peer_group_count = frame_end - frame_start; + if (WindowFunctionPercentRank::data(place).peer_group_start != frame_start) { + WindowFunctionPercentRank::data(place).peer_group_start = frame_start; + WindowFunctionPercentRank::data(place).rank += + WindowFunctionPercentRank::data(place).count; + // some variables are partition related, but there is no chance to init them + // when the new partition arrives, so we calculate them every time now. + WindowFunctionPercentRank::data(place).partition_size = partition_end - partition_start; + } + WindowFunctionPercentRank::data(place).count = peer_group_count; + } + + void reset(AggregateDataPtr place) const override { + WindowFunctionPercentRank::data(place).rank = 0; + WindowFunctionPercentRank::data(place).count = 1; + WindowFunctionPercentRank::data(place).peer_group_start = -1; + WindowFunctionPercentRank::data(place).partition_size = 0; + } + + void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override { + auto percent_rank = _cal_percent(data(place).rank, data(place).partition_size); + assert_cast<ColumnFloat64&>(to).get_data().push_back(percent_rank); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {} + void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {} + void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {} +}; + struct NTileData { int64_t bucket_index = 0; int64_t rows = 0; @@ -195,7 +252,7 @@ public: int64_t frame_end, AggregateDataPtr place, const IColumn** columns, Arena* arena) const override { // some variables are partition related, but there is no chance to init them - // when the new partition arrives, so we calculate them evey time now. + // when the new partition arrives, so we calculate them every time now. // Partition = big_bucket_num * big_bucket_size + small_bucket_num * small_bucket_size int64_t row_index = ++WindowFunctionNTile::data(place).rows - 1; int64_t bucket_num = columns[0]->get_int(0); diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp index 918d86fb753..4491d291e2d 100644 --- a/be/src/vec/exec/vanalytic_eval_node.cpp +++ b/be/src/vec/exec/vanalytic_eval_node.cpp @@ -353,7 +353,7 @@ Status VAnalyticEvalNode::_get_next_for_range(size_t current_block_rows) { _window_end_position < current_block_rows) { if (_current_row_position >= _order_by_end.pos) { _update_order_by_range(); - _executor.execute(_order_by_start.pos, _order_by_end.pos, _order_by_start.pos, + _executor.execute(_partition_by_start.pos, _partition_by_end.pos, _order_by_start.pos, _order_by_end.pos); } _executor.insert_result(current_block_rows); diff --git a/docs/en/docs/sql-manual/sql-functions/window-functions/window-function-percent-rank.md b/docs/en/docs/sql-manual/sql-functions/window-functions/window-function-percent-rank.md new file mode 100644 index 00000000000..74e43fd3790 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/window-functions/window-function-percent-rank.md @@ -0,0 +1,128 @@ +--- +{ + "title": "WINDOW_FUNCTION_PERCENT_RANK", + "language": "en" +} +--- + +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +## WINDOW FUNCTION PERCENT_RANK +### description + +The PERCENT_RANK() is a window function that calculates the percentile rank of a row within a partition or result set. + +The following shows the syntax of the PERCENT_RANK() function: + +```sql +PERCENT_RANK() OVER ( + PARTITION BY partition_expression + ORDER BY + sort_expression [ASC | DESC] +) +``` + +The PERCENT_RANK() function returns a number that ranges from zero to one. + +For a specified row, PERCENT_RANK() calculates the rank of that row minus one, divided by 1 less than the number of rows in the evaluated partition or query result set: + +```sql +(rank - 1) / (total_rows - 1) +``` + +In this formula, rank is the rank of a specified row and total_rows is the number of rows being evaluated. + +The PERCENT_RANK() function always returns zero for the first row in a partition or result set. The repeated column values will receive the same PERCENT_RANK() value. + +Similar to other window functions, the PARTITION BY clause distributes the rows into partitions and the ORDER BY clause specifies the logical order of rows in each partition. The PERCENT_RANK() function is calculated for each ordered partition independently. + +Both PARTITION BY and ORDER BY clauses are optional. However, the PERCENT_RANK() is an order-sensitive function, therefore, you should always use the ORDER BY clause. + +### example + +```sql +// create table +CREATE TABLE test_percent_rank ( + productLine VARCHAR, + orderYear INT, + orderValue DOUBLE, + percentile_rank DOUBLE +) ENGINE=OLAP +DISTRIBUTED BY HASH(`orderYear`) BUCKETS 4 +PROPERTIES ( +"replication_allocation" = "tag.location.default: 1" +); + +// insert data into table +INSERT INTO test_percent_rank (productLine, orderYear, orderValue, percentile_rank) VALUES +('Motorcycles', 2003, 2440.50, 0.00), +('Trains', 2003, 2770.95, 0.17), +('Trucks and Buses', 2003, 3284.28, 0.33), +('Vintage Cars', 2003, 4080.00, 0.50), +('Planes', 2003, 4825.44, 0.67), +('Ships', 2003, 5072.71, 0.83), +('Classic Cars', 2003, 5571.80, 1.00), +('Motorcycles', 2004, 2598.77, 0.00), +('Vintage Cars', 2004, 2819.28, 0.17), +('Planes', 2004, 2857.35, 0.33), +('Ships', 2004, 4301.15, 0.50), +('Trucks and Buses', 2004, 4615.64, 0.67), +('Trains', 2004, 4646.88, 0.83), +('Classic Cars', 2004, 8124.98, 1.00), +('Ships', 2005, 1603.20, 0.00), +('Motorcycles', 2005, 3774.00, 0.17), +('Planes', 2005, 4018.00, 0.50), +('Vintage Cars', 2005, 5346.50, 0.67), +('Classic Cars', 2005, 5971.35, 0.83), +('Trucks and Buses', 2005, 6295.03, 1.00); + +// query +SELECT + productLine, + orderYear, + orderValue, + ROUND( + PERCENT_RANK() + OVER ( + PARTITION BY orderYear + ORDER BY orderValue + ),2) percentile_rank +FROM + test_percent_rank +ORDER BY + orderYear; + +// result ++------------------+-----------+------------+-----------------+ +| productLine | orderYear | orderValue | percentile_rank | ++------------------+-----------+------------+-----------------+ +| Motorcycles | 2003 | 2440.5 | 0 | +| Trains | 2003 | 2770.95 | 0.17 | +| Trucks and Buses | 2003 | 3284.28 | 0.33 | +| Vintage Cars | 2003 | 4080 | 0.5 | +| Planes | 2003 | 4825.44 | 0.67 | +| Ships | 2003 | 5072.71 | 0.83 | +| Classic Cars | 2003 | 5571.8 | 1 | +| Motorcycles | 2004 | 2598.77 | 0 | +| Vintage Cars | 2004 | 2819.28 | 0.17 | +| Planes | 2004 | 2857.35 | 0.33 | +| Ships | 2004 | 4301.15 | 0.5 | +| Trucks and Buses | 2004 | 4615.64 | 0.67 | +| Trains | 2004 | 4646.88 | 0.83 | +| Classic Cars | 2004 | 8124.98 | 1 | +| Ships | 2005 | 1603.2 | 0 | +| Motorcycles | 2005 | 3774 | 0.2 | +| Planes | 2005 | 4018 | 0.4 | +| Vintage Cars | 2005 | 5346.5 | 0.6 | +| Classic Cars | 2005 | 5971.35 | 0.8 | +| Trucks and Buses | 2005 | 6295.03 | 1 | ++------------------+-----------+------------+-----------------+ +``` + +### keywords + + WINDOW,FUNCTION,PERCENT_RANK diff --git a/docs/sidebars.json b/docs/sidebars.json index bc08c6bc23d..c32918f8eed 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -802,6 +802,7 @@ "sql-manual/sql-functions/window-functions/window-function-count", "sql-manual/sql-functions/window-functions/window-function-rank", "sql-manual/sql-functions/window-functions/window-function-dense-rank", + "sql-manual/sql-functions/window-functions/window-function-percent-rank", "sql-manual/sql-functions/window-functions/window-function-first-value", "sql-manual/sql-functions/window-functions/window-function-last-value", "sql-manual/sql-functions/window-functions/window-function-lead", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/window-function-percent-rank.md b/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/window-function-percent-rank.md new file mode 100644 index 00000000000..79ed3d9e021 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/window-functions/window-function-percent-rank.md @@ -0,0 +1,128 @@ +--- +{ + "title": "WINDOW_FUNCTION_PERCENT_RANK", + "language": "zh-CN" +} +--- + +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> + +## WINDOW FUNCTION PERCENT_RANK +### description + +PERCENT_RANK()是一个窗口函数,用于计算分区或结果集中行的百分位数排名。 + +下面展示了PERCENT_RANK()函数的语法: + +```sql +PERCENT_RANK() OVER ( + PARTITION BY partition_expression + ORDER BY + sort_expression [ASC | DESC] +) +``` + +PERCENT_RANK()函数返回一个范围从0.0到1.0的小数。 + +对于指定行,PERCENT_RANK()计算公式如下: + +```sql +(rank - 1) / (total_rows - 1) +``` + +在此公式中,rank是指定行的排名,total_rows是正在评估的行数。 + +对于分区或结果集中的第一行,PERCENT_RANK()函数始终返回零。对于重复的列值,PERCENT_RANK()函数将返回相同的值。 + +与其他窗口函数类似,PARTITION BY子句将行分配到分区中,并且ORDER BY子句指定每个分区中行的排序逻辑。PERCENT_RANK()函数是针对每个有序分区独立计算的。 + +PERCENT_RANK()是一个顺序敏感的函数,因此,您应该始终需要使用ORDER BY子句。 + +### example + +```sql +// create table +CREATE TABLE test_percent_rank ( + productLine VARCHAR, + orderYear INT, + orderValue DOUBLE, + percentile_rank DOUBLE +) ENGINE=OLAP +DISTRIBUTED BY HASH(`orderYear`) BUCKETS 4 +PROPERTIES ( +"replication_allocation" = "tag.location.default: 1" +); + +// insert data into table +INSERT INTO test_percent_rank (productLine, orderYear, orderValue, percentile_rank) VALUES +('Motorcycles', 2003, 2440.50, 0.00), +('Trains', 2003, 2770.95, 0.17), +('Trucks and Buses', 2003, 3284.28, 0.33), +('Vintage Cars', 2003, 4080.00, 0.50), +('Planes', 2003, 4825.44, 0.67), +('Ships', 2003, 5072.71, 0.83), +('Classic Cars', 2003, 5571.80, 1.00), +('Motorcycles', 2004, 2598.77, 0.00), +('Vintage Cars', 2004, 2819.28, 0.17), +('Planes', 2004, 2857.35, 0.33), +('Ships', 2004, 4301.15, 0.50), +('Trucks and Buses', 2004, 4615.64, 0.67), +('Trains', 2004, 4646.88, 0.83), +('Classic Cars', 2004, 8124.98, 1.00), +('Ships', 2005, 1603.20, 0.00), +('Motorcycles', 2005, 3774.00, 0.17), +('Planes', 2005, 4018.00, 0.50), +('Vintage Cars', 2005, 5346.50, 0.67), +('Classic Cars', 2005, 5971.35, 0.83), +('Trucks and Buses', 2005, 6295.03, 1.00); + +// query +SELECT + productLine, + orderYear, + orderValue, + ROUND( + PERCENT_RANK() + OVER ( + PARTITION BY orderYear + ORDER BY orderValue + ),2) percentile_rank +FROM + test_percent_rank +ORDER BY + orderYear; + +// result ++------------------+-----------+------------+-----------------+ +| productLine | orderYear | orderValue | percentile_rank | ++------------------+-----------+------------+-----------------+ +| Motorcycles | 2003 | 2440.5 | 0 | +| Trains | 2003 | 2770.95 | 0.17 | +| Trucks and Buses | 2003 | 3284.28 | 0.33 | +| Vintage Cars | 2003 | 4080 | 0.5 | +| Planes | 2003 | 4825.44 | 0.67 | +| Ships | 2003 | 5072.71 | 0.83 | +| Classic Cars | 2003 | 5571.8 | 1 | +| Motorcycles | 2004 | 2598.77 | 0 | +| Vintage Cars | 2004 | 2819.28 | 0.17 | +| Planes | 2004 | 2857.35 | 0.33 | +| Ships | 2004 | 4301.15 | 0.5 | +| Trucks and Buses | 2004 | 4615.64 | 0.67 | +| Trains | 2004 | 4646.88 | 0.83 | +| Classic Cars | 2004 | 8124.98 | 1 | +| Ships | 2005 | 1603.2 | 0 | +| Motorcycles | 2005 | 3774 | 0.2 | +| Planes | 2005 | 4018 | 0.4 | +| Vintage Cars | 2005 | 5346.5 | 0.6 | +| Classic Cars | 2005 | 5971.35 | 0.8 | +| Trucks and Buses | 2005 | 6295.03 | 1 | ++------------------+-----------+------------+-----------------+ +``` + +### keywords + + WINDOW,FUNCTION,PERCENT_RANK diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java index 07cd376d705..a23088d25f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinWindowFunctions.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.functions.window.Lag; import org.apache.doris.nereids.trees.expressions.functions.window.LastValue; import org.apache.doris.nereids.trees.expressions.functions.window.Lead; import org.apache.doris.nereids.trees.expressions.functions.window.Ntile; +import org.apache.doris.nereids.trees.expressions.functions.window.PercentRank; import org.apache.doris.nereids.trees.expressions.functions.window.Rank; import org.apache.doris.nereids.trees.expressions.functions.window.RowNumber; @@ -43,6 +44,7 @@ public class BuiltinWindowFunctions implements FunctionHelper { window(LastValue.class, "last_value"), window(Lead.class, "lead"), window(Ntile.class, "ntile"), + window(PercentRank.class, "percent_rank"), window(Rank.class, "rank"), window(RowNumber.class, "row_number") ); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 629e4556df2..7a16f365465 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1557,6 +1557,14 @@ public class FunctionSet<T> { null, "", "")); + // Percent Rank + addBuiltin(AggregateFunction.createAnalyticBuiltin("percent_rank", + Lists.<Type>newArrayList(), Type.DOUBLE, Type.VARCHAR, + "", + "", + null, + "", + "")); // Dense rank addBuiltin(AggregateFunction.createAnalyticBuiltin("dense_rank", Lists.<Type>newArrayList(), Type.BIGINT, Type.VARCHAR, @@ -1584,6 +1592,14 @@ public class FunctionSet<T> { null, "", "", true)); + //vec Percent Rank + addBuiltin(AggregateFunction.createAnalyticBuiltin("percent_rank", + Lists.<Type>newArrayList(), Type.DOUBLE, Type.VARCHAR, + "", + "", + null, + "", + "", true)); //vec Dense rank addBuiltin(AggregateFunction.createAnalyticBuiltin("dense_rank", Lists.<Type>newArrayList(), Type.BIGINT, Type.VARCHAR, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java index 9f5de748694..3f805327b22 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/WindowFunctionChecker.java @@ -33,6 +33,7 @@ import org.apache.doris.nereids.trees.expressions.functions.window.Lag; import org.apache.doris.nereids.trees.expressions.functions.window.LastValue; import org.apache.doris.nereids.trees.expressions.functions.window.Lead; import org.apache.doris.nereids.trees.expressions.functions.window.Ntile; +import org.apache.doris.nereids.trees.expressions.functions.window.PercentRank; import org.apache.doris.nereids.trees.expressions.functions.window.Rank; import org.apache.doris.nereids.trees.expressions.functions.window.RowNumber; import org.apache.doris.nereids.trees.expressions.literal.Literal; @@ -361,6 +362,18 @@ public class WindowFunctionChecker extends DefaultExpressionVisitor<Expression, return denseRank; } + /** + * required WindowFrame: (RANGE, UNBOUNDED PRECEDING, CURRENT ROW) + */ + @Override + public PercentRank visitPercentRank(PercentRank percentRank, Void ctx) { + WindowFrame requiredFrame = new WindowFrame(FrameUnitsType.RANGE, + FrameBoundary.newPrecedingBoundary(), FrameBoundary.newCurrentRowBoundary()); + + checkAndCompleteWindowFrame(requiredFrame, percentRank.getName()); + return percentRank; + } + /** * required WindowFrame: (ROWS, UNBOUNDED PRECEDING, CURRENT ROW) */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/PercentRank.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/PercentRank.java new file mode 100644 index 00000000000..67c914ec58a --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/window/PercentRank.java @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.window; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; +import org.apache.doris.nereids.trees.expressions.shape.LeafExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.DoubleType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * Window function: PercentRank() + */ +public class PercentRank extends WindowFunction implements AlwaysNotNullable, LeafExpression { + + public PercentRank() { + super("percent_rank"); + } + + @Override + public List<FunctionSignature> getSignatures() { + return ImmutableList.of(FunctionSignature.ret(DoubleType.INSTANCE).args()); + } + + @Override + public FunctionSignature searchSignature(List<FunctionSignature> signatures) { + return signatures.get(0); + } + + @Override + public PercentRank withChildren(List<Expression> children) { + Preconditions.checkArgument(children.size() == 0); + return new PercentRank(); + } + + @Override + public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) { + return visitor.visitPercentRank(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java index 3a97c744be6..29bbed53225 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/WindowFunctionVisitor.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.functions.window.Lag; import org.apache.doris.nereids.trees.expressions.functions.window.LastValue; import org.apache.doris.nereids.trees.expressions.functions.window.Lead; import org.apache.doris.nereids.trees.expressions.functions.window.Ntile; +import org.apache.doris.nereids.trees.expressions.functions.window.PercentRank; import org.apache.doris.nereids.trees.expressions.functions.window.Rank; import org.apache.doris.nereids.trees.expressions.functions.window.RowNumber; import org.apache.doris.nereids.trees.expressions.functions.window.WindowFunction; @@ -56,6 +57,10 @@ public interface WindowFunctionVisitor<R, C> { return visitWindowFunction(ntile, context); } + default R visitPercentRank(PercentRank percentRank, C context) { + return visitWindowFunction(percentRank, context); + } + default R visitRank(Rank rank, C context) { return visitWindowFunction(rank, context); } diff --git a/regression-test/data/nereids_function_p0/window_function/window_fn.out b/regression-test/data/nereids_function_p0/window_function/window_fn.out index a541087bb0f..8f51aa3f3c0 100644 --- a/regression-test/data/nereids_function_p0/window_function/window_fn.out +++ b/regression-test/data/nereids_function_p0/window_function/window_fn.out @@ -3537,6 +3537,64 @@ string3 6 2 string3 9 3 string3 12 4 +-- !sql_percent_rank_pb -- +null 0 +string1 0 +string1 0 +string1 0 +string1 0 +string2 0 +string2 0 +string2 0 +string2 0 +string3 0 +string3 0 +string3 0 +string3 0 + +-- !sql_percent_rank_pb_ob -- +null \N 0 +string1 1 0 +string1 4 0.33333333333333331 +string1 7 0.66666666666666663 +string1 10 1 +string2 2 0 +string2 5 0.33333333333333331 +string2 8 0.66666666666666663 +string2 11 1 +string3 3 0 +string3 6 0.33333333333333331 +string3 9 0.66666666666666663 +string3 12 1 + +-- !sql_percent_rank_pb_notnull -- +string1 0 +string1 0 +string1 0 +string1 0 +string2 0 +string2 0 +string2 0 +string2 0 +string3 0 +string3 0 +string3 0 +string3 0 + +-- !sql_percent_rank_pb_ob_notnull -- +string1 1 0 +string1 4 0.33333333333333331 +string1 7 0.66666666666666663 +string1 10 1 +string2 2 0 +string2 5 0.33333333333333331 +string2 8 0.66666666666666663 +string2 11 1 +string3 3 0 +string3 6 0.33333333333333331 +string3 9 0.66666666666666663 +string3 12 1 + -- !sql_row_number_pb -- null 1 string1 1 diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_window_fn.out b/regression-test/data/query_p0/sql_functions/window_functions/test_window_fn.out index 08907e0685d..f3c52109632 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_window_fn.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_window_fn.out @@ -329,6 +329,18 @@ develop 11 2007-08-15 5200 9 1 1 3 2 3 3 +-- !sql -- +0 0 0 +0 0 0 +1 4 0 +0 1 1 +0 1 1 +0.66666666666666663 7 1 +1 9 1 +0 0 2 +0 1 3 +1 3 3 + -- !sql -- 0 9976146 4 1 10114187 9 diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out b/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out index 8a753341bbd..adcee782506 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_window_function.out @@ -205,6 +205,17 @@ JDR 2014-10-08T00:00:00.111111 13.98 14.75 3 1 1 3 2 2 +-- !sql -- +1 1 0 +1 2 0.5 +1 2 0.5 +2 1 0 +2 2 0.5 +2 3 1 +3 1 0 +3 1 0 +3 2 1 + -- !sql -- 1 1 1 1 2 2 diff --git a/regression-test/suites/nereids_function_p0/window_function/window_fn.groovy b/regression-test/suites/nereids_function_p0/window_function/window_fn.groovy index a88592bf907..1d66e5ad82b 100644 --- a/regression-test/suites/nereids_function_p0/window_function/window_fn.groovy +++ b/regression-test/suites/nereids_function_p0/window_function/window_fn.groovy @@ -640,6 +640,16 @@ suite("nereids_win_fn") { qt_sql_rank_pb_ob_notnull ''' select kstr, kint, rank() over(partition by kstr order by kint) as wf from fn_test_not_nullable order by kstr, kint''' + qt_sql_percent_rank_pb ''' + select kstr, percent_rank() over(partition by kstr) as wf from fn_test order by kstr''' + qt_sql_percent_rank_pb_ob ''' + select kstr, kint, percent_rank() over(partition by kstr order by kint) as wf from fn_test order by kstr, kint''' + + qt_sql_percent_rank_pb_notnull ''' + select kstr, percent_rank() over(partition by kstr) as wf from fn_test_not_nullable order by kstr''' + qt_sql_percent_rank_pb_ob_notnull ''' + select kstr, kint, percent_rank() over(partition by kstr order by kint) as wf from fn_test_not_nullable order by kstr, kint''' + // qt_sql_row_number ''' // select row_number() over() as wf from fn_test''' // qt_sql_row_number_ob ''' diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy index 20806c9b953..ab35e974b15 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy @@ -217,6 +217,9 @@ suite("test_window_fn", "arrow_flight_sql") { qt_sql """ SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten; """ + qt_sql """ + SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten; + """ qt_sql """ select ten, sum(unique1) + sum(unique2) as res, rank() over (order by sum(unique1) + sum(unique2)) as rank from ${tbName2} group by ten order by ten; """ diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy index 2c853fc163d..1575358b7b0 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy @@ -289,6 +289,8 @@ suite("test_window_function") { qt_sql """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1) */ x, y, rank() over(partition by x order by y) as rank from ${windowFunctionTable3} order by x, y; """ // DENSE_RANK qt_sql """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1) */ x, y, dense_rank() over(partition by x order by y) as rank from ${windowFunctionTable3} order by x, y; """ + // PERCENT_RANK + qt_sql """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1) */ x, y, percent_rank() over(partition by x order by y) as rank from ${windowFunctionTable3} order by x, y; """ // ROW_NUMBER qt_sql """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1) */ x, y, row_number() over(partition by x order by y) as rank from ${windowFunctionTable3} order by x, y; """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org