This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b27a7dd062c [Bug](fix) fix the percentile func result do not equal the percentile array rewrite result (#49351) b27a7dd062c is described below commit b27a7dd062cfd14e3e0f8fe824b333cee6ce8148 Author: HappenLee <happen...@selectdb.com> AuthorDate: Sun Mar 23 12:50:40 2025 +0800 [Bug](fix) fix the percentile func result do not equal the percentile array rewrite result (#49351) origin case: ``` SELECT percentile(sale_price, 0.05) as median_price_05, percentile(sale_price, 0.5) as median_price, percentile(sale_price, 0.75) as p75_price, percentile(sale_price, 0.90) as p90_price, percentile(sale_price, 0.95) as p95_price FROM sales_data; +-----------------+--------------+-----------+--------------------+-------------------+ | median_price_05 | median_price | p75_price | p90_price | p95_price | +-----------------+--------------+-----------+--------------------+-------------------+ | 12.25 | 32.5 | 43.75 | 54.999999999999986 | 77.49999999999994 | +-----------------+--------------+-----------+--------------------+-------------------+ 1 row in set (0.03 sec) SELECT percentile(sale_price, 0.5) as median_price FROM sales_data; +--------------+ | median_price | +--------------+ | 32.625 | +--------------+ 1 row in set (0.02 sec) ``` the result `percentile(sale_price, 0.5)` different after rewrite `percentile_array(sale_price, [0.5])` ` --- .../aggregate_function_percentile.cpp | 1 + .../aggregate_function_percentile.h | 2 +- .../doris/catalog/BuiltinAggregateFunctions.java | 2 +- .../java/org/apache/doris/catalog/FunctionSet.java | 9 +++++ .../apache/doris/nereids/types/AggStateType.java | 1 + .../nereids/rules/analysis/GenerateFunction.java | 1 + .../test_aggregate_percentile_no_cast.out | Bin 1410 -> 1426 bytes .../test_aggregate_window_functions.out | Bin 21104 -> 21134 bytes .../data/query_p0/aggregate/aggregate.out | Bin 9762 -> 9841 bytes .../suites/query_p0/aggregate/aggregate.groovy | 40 +++++++++++++++++++++ 10 files changed, 54 insertions(+), 2 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_percentile.cpp b/be/src/vec/aggregate_functions/aggregate_function_percentile.cpp index f607f14d53e..d9d83f17753 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_percentile.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_percentile.cpp @@ -65,6 +65,7 @@ AggregateFunctionPtr create_aggregate_function_percentile_approx_weighted( void register_aggregate_function_percentile(AggregateFunctionSimpleFactory& factory) { factory.register_function_both("percentile", creator_with_numeric_type::creator<AggregateFunctionPercentile>); + factory.register_alias("percentile", "percentile_cont"); factory.register_function_both( "percentile_array", creator_with_numeric_type::creator<AggregateFunctionPercentileArray>); diff --git a/be/src/vec/aggregate_functions/aggregate_function_percentile.h b/be/src/vec/aggregate_functions/aggregate_function_percentile.h index 9dfb31864ba..1ba959045d9 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_percentile.h +++ b/be/src/vec/aggregate_functions/aggregate_function_percentile.h @@ -514,7 +514,7 @@ public: assert_cast<const ColumnFloat64&, TypeCheckOnRelease::DISABLE>(nested_column); AggregateFunctionPercentileArray::data(place).add( - sources.get_int(row_num), nested_column_data.get_data(), null_maps, + sources.get_element(row_num), nested_column_data.get_data(), null_maps, offset_column_data.data()[row_num] - offset_column_data[(ssize_t)row_num - 1]); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java index 5e2e7291987..8db503a5d31 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java @@ -145,7 +145,7 @@ public class BuiltinAggregateFunctions implements FunctionHelper { agg(OrthogonalBitmapIntersect.class, "orthogonal_bitmap_intersect"), agg(OrthogonalBitmapIntersectCount.class, "orthogonal_bitmap_intersect_count"), agg(OrthogonalBitmapUnionCount.class, "orthogonal_bitmap_union_count"), - agg(Percentile.class, "percentile"), + agg(Percentile.class, "percentile", "percentile_cont"), agg(PercentileApprox.class, "percentile_approx"), agg(PercentileApproxWeighted.class, "percentile_approx_weighted"), agg(PercentileArray.class, "percentile_array"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 452460334f6..84d0dbc7971 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1484,6 +1484,15 @@ public class FunctionSet<T> { "", false, true, false, true)); + addBuiltin(AggregateFunction.createBuiltin("percentile_cont", + Lists.newArrayList(Type.BIGINT, Type.DOUBLE), Type.DOUBLE, Type.VARCHAR, + "", + "", + "", + "", + "", + false, true, false, true)); + addBuiltin(AggregateFunction.createBuiltin("percentile_approx", Lists.<Type>newArrayList(Type.DOUBLE, Type.DOUBLE), Type.DOUBLE, Type.VARCHAR, "", diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/AggStateType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/AggStateType.java index f3f986bc26d..2f704190fef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/AggStateType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/AggStateType.java @@ -52,6 +52,7 @@ public class AggStateType extends DataType { .put("any", "any_value") .put("char_length", "character_length") .put("stddev_pop", "stddev") + .put("percentile_cont", "percentile") .put("var_pop", "variance") .put("variance_pop", "variance") .put("var_samp", "variance_samp") diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/GenerateFunction.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/GenerateFunction.java index 4760e86a921..c6ab40552e4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/GenerateFunction.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/GenerateFunction.java @@ -183,6 +183,7 @@ public class GenerateFunction { .put("any", "any_value") .put("char_length", "character_length") .put("stddev_pop", "stddev") + .put("percentile_cont", "percentile") .put("var_pop", "variance") .put("variance_pop", "variance") .put("var_samp", "variance_samp") diff --git a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.out b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.out index 53cbce2c30b..74654c2e129 100644 Binary files a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.out and b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_percentile_no_cast.out differ diff --git a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out index e2f8ac48979..ae6d1ad35a2 100644 Binary files a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out and b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out differ diff --git a/regression-test/data/query_p0/aggregate/aggregate.out b/regression-test/data/query_p0/aggregate/aggregate.out index ffd37904994..f17c690ec49 100644 Binary files a/regression-test/data/query_p0/aggregate/aggregate.out and b/regression-test/data/query_p0/aggregate/aggregate.out differ diff --git a/regression-test/suites/query_p0/aggregate/aggregate.groovy b/regression-test/suites/query_p0/aggregate/aggregate.groovy index 6079d09577f..c9afd839fcc 100644 --- a/regression-test/suites/query_p0/aggregate/aggregate.groovy +++ b/regression-test/suites/query_p0/aggregate/aggregate.groovy @@ -325,4 +325,44 @@ suite("aggregate") { qt_aggregate_limit_contain_null """ select count(), cast(k12 as int) as t from baseall group by t limit 1; """ + + // Test case for percentile function with sales data + sql """ DROP TABLE IF EXISTS sales_data """ + sql """ + CREATE TABLE sales_data ( + product_id INT, + sale_price DECIMAL(10, 2) + ) DUPLICATE KEY(`product_id`) + DISTRIBUTED BY HASH(`product_id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql """ + INSERT INTO sales_data VALUES + (1, 10.00), + (1, 15.00), + (1, 20.00), + (1, 25.00), + (1, 30.25), + (1, 35.00), + (1, 40.00), + (1, 45.00), + (1, 50.00), + (1, 100.00) + """ + + qt_aggregate35 """ + SELECT + percentile(sale_price, 0.05) as median_price_05, + percentile(sale_price, 0.5) as median_price, + percentile(sale_price, 0.75) as p75_price, + percentile(sale_price, 0.90) as p90_price, + percentile(sale_price, 0.95) as p95_price, + percentile(null, 0.99) as p99_null + FROM sales_data + """ + + sql """ DROP TABLE IF EXISTS sales_data """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org