This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9565d164f389 [SPARK-52799][TESTS] Fix ThriftServerQueryTestSuite
result comparison
9565d164f389 is described below
commit 9565d164f389e2e3db7ab43a873f6b473f5c9365
Author: Kent Yao <[email protected]>
AuthorDate: Tue Jul 15 18:09:07 2025 +0800
[SPARK-52799][TESTS] Fix ThriftServerQueryTestSuite result comparison
### What changes were proposed in this pull request?
This PR fixes ThriftServerQueryTestSuite result comparison. When re-reading
the Golden Files, if the result lines exceed the row size, we assume they
contain multiple lines for a single row. In this case, we group these lines
into rows first to avoid line-by-line sorting.
### Why are the changes needed?
For a multiline result of a single row, it might get malformed, for example
```
[info] Expected "[ <birth>2018</birth>
[info] <name>[45 61 73 6F 6E]</name>
[info] <org>[4B 69 6E 64 65 72 67 61 72 74 65 6E 20 43 6F 70]</org>
[info] </ROW>
[info] <]ROW>", but got "[<ROW>
[info] <name>[45 61 73 6F 6E]</name>
[info] <birth>2018</birth>
[info] <org>[4B 69 6E 64 65 72 67 61 72 74 65 6E 20 43 6F 70]</org>
[info] </]ROW>"
```
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Tested with https://github.com/apache/spark/pull/51470 locally
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #51488 from yaooqinn/ThriftServerQueryTestSuite.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../thriftserver/ThriftServerQueryTestSuite.scala | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index a249d4b4d4e4..0012451dcf5e 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -137,11 +137,13 @@ class ThriftServerQueryTestSuite extends
SQLQueryTestSuite with SharedThriftServ
statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = true")
}
+ val rowCounts = new Array[Int](queries.size)
// Run the SQL queries preparing them for comparison.
val outputs: Seq[QueryTestOutput] = withSQLConf(configSet: _*) {
- queries.map { sql =>
+ queries.zipWithIndex.map { case (sql, i) =>
val (_, output) = handleExceptions(getNormalizedResult(statement,
sql))
// We might need to do some query canonicalization in the future.
+ rowCounts(i) = output.length
ExecutionOutput(
sql = sql,
schema = Some(""),
@@ -162,11 +164,19 @@ class ThriftServerQueryTestSuite extends
SQLQueryTestSuite with SharedThriftServ
val sql = segments(i * 3 + 1).trim
val schema = segments(i * 3 + 2).trim
val originalOut = segments(i * 3 + 3)
- val output = if (schema != emptySchema && isNeedSort(sql)) {
- originalOut.split("\n").sorted.mkString("\n")
- } else {
- originalOut
- }
+ val output =
+ if (schema != emptySchema && isNeedSort(sql)) {
+ val splits = originalOut.split("\n")
+ if (splits.length > rowCounts(i)) {
+ // the result is multiline
+ val step = splits.length / rowCounts(i)
+
splits.grouped(step).map(_.mkString("\n")).toSeq.sorted.mkString("\n")
+ } else {
+ splits.sorted.mkString("\n")
+ }
+ } else {
+ originalOut
+ }
ExecutionOutput(
sql = sql,
schema = Some(""),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]