This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e4b99932d02c [SPARK-53991][SQL][TEST][FOLLOWUP] Make KLL quantile
golden file tests deterministic
e4b99932d02c is described below
commit e4b99932d02cbc1c177e59f1da987856ece12f3d
Author: Daniel Tenedorio <[email protected]>
AuthorDate: Mon Dec 22 13:56:29 2025 +0800
[SPARK-53991][SQL][TEST][FOLLOWUP] Make KLL quantile golden file tests
deterministic
### What changes were proposed in this pull request?
In https://github.com/apache/spark/pull/52800, we added SQL support for KLL
quantiles functions based on DataSketches.
In this PR, we update some of the golden file tests to make them
deterministic.
### Why are the changes needed?
The previous tests generated string summaries of KLL quantile sketches and
then split them by newlines and made case-sensitive checks for substrings. It
turns out this was brittle, so this PR updates the tests to avoid the
newline-splitting and makes the substring checks case-insenstiive.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
This PR updates test coverage only.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #53549 from dtenedor/kll-quantile-golden-files-fix.
Authored-by: Daniel Tenedorio <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../analyzer-results/kllquantiles.sql.out | 57 ++++++++++------------
.../resources/sql-tests/inputs/kllquantiles.sql | 22 ++++-----
.../sql-tests/results/kllquantiles.sql.out | 34 ++++++-------
3 files changed, 53 insertions(+), 60 deletions(-)
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/kllquantiles.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/kllquantiles.sql.out
index dc22199985f0..3eea568420c0 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/kllquantiles.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/kllquantiles.sql.out
@@ -128,7 +128,7 @@ CreateDataSourceTableAsSelectCommand
`spark_catalog`.`default`.`t_double_1_5_thr
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -136,8 +136,7 @@ FROM (
FROM t_byte_1_5_through_7_11
)
-- !query analysis
-Project [split(kll_sketch_to_string_bigint(agg#x),
-, -1)[1] LIKE %Kll% AS str_contains_kll#x,
(abs((kll_sketch_get_quantile_bigint(agg#x, cast(0.5 as double)) - cast(4 as
bigint))) < cast(1 as bigint)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_bigint(agg#x, cast(3 as bigint)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+Project [lower(kll_sketch_to_string_bigint(agg#x)) LIKE %kll% AS
str_contains_kll#x, (abs((kll_sketch_get_quantile_bigint(agg#x, cast(0.5 as
double)) - cast(4 as bigint))) < cast(1 as bigint)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_bigint(agg#x, cast(3 as bigint)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+- SubqueryAlias __auto_generated_subquery_name
+- Aggregate [kll_sketch_agg_bigint(col1#x, None, 0, 0) AS agg#x]
+- SubqueryAlias spark_catalog.default.t_byte_1_5_through_7_11
@@ -145,7 +144,7 @@ Project [split(kll_sketch_to_string_bigint(agg#x),
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -153,8 +152,7 @@ FROM (
FROM t_int_1_5_through_7_11
)
-- !query analysis
-Project [split(kll_sketch_to_string_bigint(agg#x),
-, -1)[1] LIKE %Kll% AS str_contains_kll#x,
(abs((kll_sketch_get_quantile_bigint(agg#x, cast(0.5 as double)) - cast(4 as
bigint))) < cast(1 as bigint)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_bigint(agg#x, cast(3 as bigint)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+Project [lower(kll_sketch_to_string_bigint(agg#x)) LIKE %kll% AS
str_contains_kll#x, (abs((kll_sketch_get_quantile_bigint(agg#x, cast(0.5 as
double)) - cast(4 as bigint))) < cast(1 as bigint)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_bigint(agg#x, cast(3 as bigint)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+- SubqueryAlias __auto_generated_subquery_name
+- Aggregate [kll_sketch_agg_bigint(col1#x, None, 0, 0) AS agg#x]
+- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11
@@ -162,7 +160,7 @@ Project [split(kll_sketch_to_string_bigint(agg#x),
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -170,8 +168,7 @@ FROM (
FROM t_long_1_5_through_7_11
)
-- !query analysis
-Project [split(kll_sketch_to_string_bigint(agg#x),
-, -1)[1] LIKE %Kll% AS str_contains_kll#x,
(abs((kll_sketch_get_quantile_bigint(agg#x, cast(0.5 as double)) - cast(4 as
bigint))) < cast(1 as bigint)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_bigint(agg#x, cast(3 as bigint)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+Project [lower(kll_sketch_to_string_bigint(agg#x)) LIKE %kll% AS
str_contains_kll#x, (abs((kll_sketch_get_quantile_bigint(agg#x, cast(0.5 as
double)) - cast(4 as bigint))) < cast(1 as bigint)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_bigint(agg#x, cast(3 as bigint)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+- SubqueryAlias __auto_generated_subquery_name
+- Aggregate [kll_sketch_agg_bigint(col1#xL, None, 0, 0) AS agg#x]
+- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11
@@ -179,7 +176,7 @@ Project [split(kll_sketch_to_string_bigint(agg#x),
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -187,8 +184,7 @@ FROM (
FROM t_short_1_5_through_7_11
)
-- !query analysis
-Project [split(kll_sketch_to_string_bigint(agg#x),
-, -1)[1] LIKE %Kll% AS str_contains_kll#x,
(abs((kll_sketch_get_quantile_bigint(agg#x, cast(0.5 as double)) - cast(4 as
bigint))) < cast(1 as bigint)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_bigint(agg#x, cast(3 as bigint)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+Project [lower(kll_sketch_to_string_bigint(agg#x)) LIKE %kll% AS
str_contains_kll#x, (abs((kll_sketch_get_quantile_bigint(agg#x, cast(0.5 as
double)) - cast(4 as bigint))) < cast(1 as bigint)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_bigint(agg#x, cast(3 as bigint)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+- SubqueryAlias __auto_generated_subquery_name
+- Aggregate [kll_sketch_agg_bigint(col1#x, None, 0, 0) AS agg#x]
+- SubqueryAlias spark_catalog.default.t_short_1_5_through_7_11
@@ -196,7 +192,7 @@ Project [split(kll_sketch_to_string_bigint(agg#x),
-- !query
-SELECT split(kll_sketch_to_string_float(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_float(agg)) LIKE '%kll%' AS str_contains_kll,
abs(kll_sketch_get_quantile_float(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_float(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
FROM (
@@ -204,8 +200,7 @@ FROM (
FROM t_float_1_5_through_7_11
)
-- !query analysis
-Project [split(kll_sketch_to_string_float(agg#x),
-, -1)[1] LIKE %Kll% AS str_contains_kll#x,
(abs((cast(kll_sketch_get_quantile_float(agg#x, cast(0.5 as double)) as double)
- cast(4.0 as double))) < cast(0.5 as double)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_float(agg#x, cast(3 as float)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+Project [lower(kll_sketch_to_string_float(agg#x)) LIKE %kll% AS
str_contains_kll#x, (abs((cast(kll_sketch_get_quantile_float(agg#x, cast(0.5 as
double)) as double) - cast(4.0 as double))) < cast(0.5 as double)) AS
median_close_to_4#x, (abs((kll_sketch_get_rank_float(agg#x, cast(3 as float)) -
cast(0.4 as double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+- SubqueryAlias __auto_generated_subquery_name
+- Aggregate [kll_sketch_agg_float(col1#x, None, 0, 0) AS agg#x]
+- SubqueryAlias spark_catalog.default.t_float_1_5_through_7_11
@@ -213,7 +208,7 @@ Project [split(kll_sketch_to_string_float(agg#x),
-- !query
-SELECT split(kll_sketch_to_string_double(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_double(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_double(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -221,8 +216,7 @@ FROM (
FROM t_double_1_5_through_7_11
)
-- !query analysis
-Project [split(kll_sketch_to_string_double(agg#x),
-, -1)[1] LIKE %Kll% AS str_contains_kll#x,
(abs((kll_sketch_get_quantile_double(agg#x, cast(0.5 as double)) - cast(4.0 as
double))) < cast(0.5 as double)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_double(agg#x, cast(3 as double)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+Project [lower(kll_sketch_to_string_double(agg#x)) LIKE %kll% AS
str_contains_kll#x, (abs((kll_sketch_get_quantile_double(agg#x, cast(0.5 as
double)) - cast(4.0 as double))) < cast(0.5 as double)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_double(agg#x, cast(3 as double)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+- SubqueryAlias __auto_generated_subquery_name
+- Aggregate [kll_sketch_agg_double(col1#x, None, 0, 0) AS agg#x]
+- SubqueryAlias spark_catalog.default.t_double_1_5_through_7_11
@@ -230,7 +224,7 @@ Project [split(kll_sketch_to_string_double(agg#x),
-- !query
-SELECT split(kll_sketch_to_string_double(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_double(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_double(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -238,8 +232,7 @@ FROM (
FROM t_float_1_5_through_7_11
)
-- !query analysis
-Project [split(kll_sketch_to_string_double(agg#x),
-, -1)[1] LIKE %Kll% AS str_contains_kll#x,
(abs((kll_sketch_get_quantile_double(agg#x, cast(0.5 as double)) - cast(4.0 as
double))) < cast(0.5 as double)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_double(agg#x, cast(3 as double)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+Project [lower(kll_sketch_to_string_double(agg#x)) LIKE %kll% AS
str_contains_kll#x, (abs((kll_sketch_get_quantile_double(agg#x, cast(0.5 as
double)) - cast(4.0 as double))) < cast(0.5 as double)) AS median_close_to_4#x,
(abs((kll_sketch_get_rank_double(agg#x, cast(3 as double)) - cast(0.4 as
double))) < cast(0.1 as double)) AS rank3_close_to_0_4#x]
+- SubqueryAlias __auto_generated_subquery_name
+- Aggregate [kll_sketch_agg_double(col1#x, None, 0, 0) AS agg#x]
+- SubqueryAlias spark_catalog.default.t_float_1_5_through_7_11
@@ -549,7 +542,7 @@ Aggregate
[kll_sketch_get_n_bigint(kll_sketch_agg_bigint(col1#xL, Some(100), 0,
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -571,15 +564,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
- "startIndex" : 273,
- "stopIndex" : 299,
+ "startIndex" : 264,
+ "stopIndex" : 290,
"fragment" : "kll_sketch_agg_bigint(col1)"
} ]
}
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -601,15 +594,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
- "startIndex" : 273,
- "stopIndex" : 299,
+ "startIndex" : 264,
+ "stopIndex" : 290,
"fragment" : "kll_sketch_agg_bigint(col1)"
} ]
}
-- !query
-SELECT split(kll_sketch_to_string_float(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_float(agg)) LIKE '%kll%' AS str_contains_kll,
abs(kll_sketch_get_quantile_float(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_float(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
FROM (
@@ -631,8 +624,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
- "startIndex" : 274,
- "stopIndex" : 299,
+ "startIndex" : 265,
+ "stopIndex" : 290,
"fragment" : "kll_sketch_agg_float(col1)"
} ]
}
@@ -931,13 +924,13 @@ Project [isnotnull(kll_sketch_get_quantile_float(agg#x,
cast(0.5 as double))) AS
-- !query
-SELECT kll_sketch_to_string_double(agg) LIKE '%Kll%' AS contains_kll_header
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
contains_kll_header
FROM (
SELECT kll_sketch_agg_bigint(col1) AS agg
FROM t_long_1_5_through_7_11
)
-- !query analysis
-Project [kll_sketch_to_string_double(agg#x) LIKE %Kll% AS
contains_kll_header#x]
+Project [lower(kll_sketch_to_string_double(agg#x)) LIKE %kll% AS
contains_kll_header#x]
+- SubqueryAlias __auto_generated_subquery_name
+- Aggregate [kll_sketch_agg_bigint(col1#xL, None, 0, 0) AS agg#x]
+- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11
diff --git a/sql/core/src/test/resources/sql-tests/inputs/kllquantiles.sql
b/sql/core/src/test/resources/sql-tests/inputs/kllquantiles.sql
index 69d472ac78a6..fe1b61de037d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/kllquantiles.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/kllquantiles.sql
@@ -64,7 +64,7 @@ VALUES
(CAST(7 AS DOUBLE), CAST(11 AS DOUBLE)) AS tab(col1, col2);
-- BIGINT sketches
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -72,7 +72,7 @@ FROM (
FROM t_byte_1_5_through_7_11
);
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -80,7 +80,7 @@ FROM (
FROM t_int_1_5_through_7_11
);
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -88,7 +88,7 @@ FROM (
FROM t_long_1_5_through_7_11
);
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -97,7 +97,7 @@ FROM (
);
-- FLOAT sketches (only accepts float types to avoid precision loss)
-SELECT split(kll_sketch_to_string_float(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_float(agg)) LIKE '%kll%' AS str_contains_kll,
abs(kll_sketch_get_quantile_float(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_float(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
FROM (
@@ -106,7 +106,7 @@ FROM (
);
-- DOUBLE sketches (accepts float and double types to avoid precision loss
from integer conversion)
-SELECT split(kll_sketch_to_string_double(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_double(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_double(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -115,7 +115,7 @@ FROM (
);
-- Test float column with double sketch (valid type promotion)
-SELECT split(kll_sketch_to_string_double(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_double(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_double(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -268,7 +268,7 @@ FROM t_long_1_5_through_7_11;
-- These queries should fail with type mismatch or validation errors
-- Type mismatch: BIGINT sketch does not accept DOUBLE columns
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -277,7 +277,7 @@ FROM (
);
-- Type mismatch: BIGINT sketch does not accept FLOAT columns
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -286,7 +286,7 @@ FROM (
);
-- Type mismatch: FLOAT sketch does not accept DOUBLE columns
-SELECT split(kll_sketch_to_string_float(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_float(agg)) LIKE '%kll%' AS str_contains_kll,
abs(kll_sketch_get_quantile_float(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_float(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
FROM (
@@ -378,7 +378,7 @@ FROM (
-- interpret the binary data. This query succeeds even though we're using a
DOUBLE
-- to_string function on a BIGINT sketch. The function reads the binary
representation
-- and produces output, but the numeric values will be incorrectly interpreted.
-SELECT kll_sketch_to_string_double(agg) LIKE '%Kll%' AS contains_kll_header
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
contains_kll_header
FROM (
SELECT kll_sketch_agg_bigint(col1) AS agg
FROM t_long_1_5_through_7_11
diff --git a/sql/core/src/test/resources/sql-tests/results/kllquantiles.sql.out
b/sql/core/src/test/resources/sql-tests/results/kllquantiles.sql.out
index 6f60f30e5681..863bded1599b 100644
--- a/sql/core/src/test/resources/sql-tests/results/kllquantiles.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/kllquantiles.sql.out
@@ -134,7 +134,7 @@ struct<>
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -148,7 +148,7 @@ true true true
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -162,7 +162,7 @@ true true true
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -176,7 +176,7 @@ true true true
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -190,7 +190,7 @@ true true true
-- !query
-SELECT split(kll_sketch_to_string_float(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_float(agg)) LIKE '%kll%' AS str_contains_kll,
abs(kll_sketch_get_quantile_float(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_float(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
FROM (
@@ -204,7 +204,7 @@ true true true
-- !query
-SELECT split(kll_sketch_to_string_double(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_double(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_double(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -218,7 +218,7 @@ true true true
-- !query
-SELECT split(kll_sketch_to_string_double(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_double(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_double(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -512,7 +512,7 @@ struct<n_k_100:bigint>
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -536,15 +536,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
- "startIndex" : 273,
- "stopIndex" : 299,
+ "startIndex" : 264,
+ "stopIndex" : 290,
"fragment" : "kll_sketch_agg_bigint(col1)"
} ]
}
-- !query
-SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS
str_contains_kll,
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS
median_close_to_4,
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS
rank3_close_to_0_4
FROM (
@@ -568,15 +568,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
- "startIndex" : 273,
- "stopIndex" : 299,
+ "startIndex" : 264,
+ "stopIndex" : 290,
"fragment" : "kll_sketch_agg_bigint(col1)"
} ]
}
-- !query
-SELECT split(kll_sketch_to_string_float(agg), '\n')[1] LIKE '%Kll%' AS
str_contains_kll,
+SELECT lower(kll_sketch_to_string_float(agg)) LIKE '%kll%' AS str_contains_kll,
abs(kll_sketch_get_quantile_float(agg, 0.5) - 4.0) < 0.5 AS
median_close_to_4,
abs(kll_sketch_get_rank_float(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
FROM (
@@ -600,8 +600,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
- "startIndex" : 274,
- "stopIndex" : 299,
+ "startIndex" : 265,
+ "stopIndex" : 290,
"fragment" : "kll_sketch_agg_float(col1)"
} ]
}
@@ -953,7 +953,7 @@ true
-- !query
-SELECT kll_sketch_to_string_double(agg) LIKE '%Kll%' AS contains_kll_header
+SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS
contains_kll_header
FROM (
SELECT kll_sketch_agg_bigint(col1) AS agg
FROM t_long_1_5_through_7_11
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]