This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 22e1c4f [SPARK-31343][SQL][TESTS] Check codegen does not fail on
expressions with escape chars in string parameters
22e1c4f is described below
commit 22e1c4f7ed0c5e8d63994ceda256a03f2aca09a2
Author: Maxim Gekk <[email protected]>
AuthorDate: Mon Apr 6 05:43:29 2020 +0000
[SPARK-31343][SQL][TESTS] Check codegen does not fail on expressions with
escape chars in string parameters
### What changes were proposed in this pull request?
In the PR, I propose to add tests to check that code generation doesn't
fail if expressions string argument contains escape chars. The PR adds similar
tests added by https://github.com/apache/spark/pull/20182 for
`from_utc_timestamp` / `to_utc_timestamp`.
### Why are the changes needed?
To prevent regressions in the future.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
By running the affected tests
Closes #28115 from MaxGekk/tests-arg-escape.
Authored-by: Maxim Gekk <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 09198528b98d43169b4d0c78e36de7e543d45fc5)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../expressions/CallMethodViaReflectionSuite.scala | 6 +++
.../expressions/DateExpressionsSuite.scala | 17 +++++++++
.../expressions/JsonExpressionsSuite.scala | 23 +++++++++++
.../expressions/RegexpExpressionsSuite.scala | 13 +++++++
.../expressions/StringExpressionsSuite.scala | 44 ++++++++++++++++++++++
5 files changed, 103 insertions(+)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
index 88d4d46..d8f3ad2 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
@@ -21,6 +21,7 @@ import java.sql.Timestamp
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import
org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.types.{IntegerType, StringType}
/** A static class for testing purpose. */
@@ -101,6 +102,11 @@ class CallMethodViaReflectionSuite extends SparkFunSuite
with ExpressionEvalHelp
checkEvaluation(createExpr(staticClassName, "method4", 4, "four"),
"m4four")
}
+ test("escaping of class and method names") {
+ GenerateUnsafeProjection.generate(
+ CallMethodViaReflection(Seq(Literal("\"quote"), Literal("\"quote"),
Literal(null))) :: Nil)
+ }
+
private def createExpr(className: String, methodName: String, args: Any*) = {
CallMethodViaReflection(
Literal.create(className, StringType) +:
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 350b3a4..7f235d6 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -265,6 +265,10 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
Literal("H"), JST_OPT), "0")
checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), JST_OPT),
"22")
+ // Test escaping of format
+ GenerateUnsafeProjection.generate(
+ DateFormatClass(Literal(ts), Literal("\"quote"), JST_OPT) :: Nil)
+
// SPARK-28072 The codegen path should work
checkEvaluation(
expression = DateFormatClass(
@@ -602,6 +606,9 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(NextDay(Literal.create(null, DateType), Literal("xx")),
null)
checkEvaluation(
NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null,
StringType)), null)
+ // Test escaping of dayOfWeek
+ GenerateUnsafeProjection.generate(
+ NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote")) :: Nil)
}
test("TruncDate") {
@@ -625,6 +632,8 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
testTrunc(date, null, null)
testTrunc(null, "MON", null)
testTrunc(null, null, null)
+ // Test escaping of format
+ GenerateUnsafeProjection.generate(TruncDate(Literal(0, DateType),
Literal("\"quote")) :: Nil)
testTrunc(Date.valueOf("2000-03-08"), "decade", Date.valueOf("2000-01-01"))
testTrunc(Date.valueOf("2000-03-08"), "century",
Date.valueOf("1901-01-01"))
@@ -751,6 +760,8 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
}
}
+ // Test escaping of format
+ GenerateUnsafeProjection.generate(FromUnixTime(Literal(0L),
Literal("\"quote")) :: Nil)
}
test("unix_timestamp") {
@@ -818,6 +829,9 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
}
}
+ // Test escaping of format
+ GenerateUnsafeProjection.generate(
+ UnixTimestamp(Literal("2015-07-24"), Literal("\"quote")) :: Nil)
}
test("to_unix_timestamp") {
@@ -893,6 +907,9 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
}
}
+ // Test escaping of format
+ GenerateUnsafeProjection.generate(
+ ToUnixTimestamp(Literal("2015-07-24"), Literal("\"quote")) :: Nil)
}
test("datediff") {
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 032e0ac..90c4d8f 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -25,6 +25,7 @@ import org.scalatest.exceptions.TestFailedException
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
+import
org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.catalyst.plans.PlanTestBase
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{PST, UTC, UTC_OPT}
@@ -49,6 +50,10 @@ class JsonExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper with
in Jackson's JsonFactory.createParser(byte[]) due to RFC-4627 encoding
detection */
val badJson = "\u0000\u0000\u0000A\u0001AAA"
+ test("get_json_object escaping") {
+ GenerateUnsafeProjection.generate(GetJsonObject(Literal("\"quote"),
Literal("\"quote")) :: Nil)
+ }
+
test("$.store.bicycle") {
checkEvaluation(
GetJsonObject(Literal(json), Literal("$.store.bicycle")),
@@ -265,6 +270,11 @@ class JsonExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper with
assert(jt.eval(null).toSeq.head === expected)
}
+ test("json_tuple escaping") {
+ GenerateUnsafeProjection.generate(
+ JsonTuple(Literal("\"quote") :: Literal("\"quote") :: Nil) :: Nil)
+ }
+
test("json_tuple - hive key 1") {
checkJsonTuple(
JsonTuple(
@@ -396,6 +406,12 @@ class JsonExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper with
InternalRow(UTF8String.fromString("1"), null,
UTF8String.fromString("1")))
}
+ test("from_json escaping") {
+ val schema = StructType(StructField("\"quote", IntegerType) :: Nil)
+ GenerateUnsafeProjection.generate(
+ JsonToStructs(schema, Map.empty, Literal("\"quote"), UTC_OPT) :: Nil)
+ }
+
test("from_json") {
val jsonData = """{"a": 1}"""
val schema = StructType(StructField("a", IntegerType) :: Nil)
@@ -549,6 +565,13 @@ class JsonExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper with
)
}
+ test("to_json escaping") {
+ val schema = StructType(StructField("\"quote", IntegerType) :: Nil)
+ val struct = Literal.create(create_row(1), schema)
+ GenerateUnsafeProjection.generate(
+ StructsToJson(Map.empty, struct, UTC_OPT) :: Nil)
+ }
+
test("to_json - struct") {
val schema = StructType(StructField("a", IntegerType) :: Nil)
val struct = Literal.create(create_row(1), schema)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 712d2bc..ad9492a 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import
org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.types.StringType
/**
@@ -255,6 +256,10 @@ class RegexpExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
val nonNullExpr = RegExpReplace(Literal("100-200"), Literal("(\\d+)"),
Literal("num"))
checkEvaluation(nonNullExpr, "num-num", row1)
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(
+ RegExpReplace(Literal("\"quote"), Literal("\"quote"),
Literal("\"quote")) :: Nil)
}
test("SPARK-22570: RegExpReplace should not create a lot of global
variables") {
@@ -305,6 +310,10 @@ class RegexpExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
expr, row9, "Regex group count is 1, but the specified group index is 2")
checkExceptionInExpression[IllegalArgumentException](
expr, row10, "Regex group count is 0, but the specified group index is
1")
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(
+ RegExpExtract(Literal("\"quote"), Literal("\"quote"), Literal(1)) :: Nil)
}
test("SPLIT") {
@@ -327,6 +336,10 @@ class RegexpExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
StringSplit(s1, s2, -1), Seq("aa", "bb", "cc"), row1)
checkEvaluation(StringSplit(s1, s2, -1), null, row2)
checkEvaluation(StringSplit(s1, s2, -1), null, row3)
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(
+ StringSplit(Literal("\"quote"), Literal("\"quote"), Literal(-1)) :: Nil)
}
test("SPARK-30759: cache initialization for literal patterns") {
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 4308f98..f18364d 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.dsl.expressions._
+import
org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
import org.apache.spark.sql.types._
class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -155,6 +156,11 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(c1 endsWith "b", false, row)
checkEvaluation(c2 endsWith "b", null, row)
checkEvaluation(c1 endsWith Literal.create(null, StringType), null, row)
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(Contains(Literal("\"quote"),
Literal("\"quote")) :: Nil)
+ GenerateUnsafeProjection.generate(EndsWith(Literal("\"quote"),
Literal("\"quote")) :: Nil)
+ GenerateUnsafeProjection.generate(StartsWith(Literal("\"quote"),
Literal("\"quote")) :: Nil)
}
test("Substring") {
@@ -352,6 +358,10 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(Decode(b, Literal("utf-8")), null, create_row(null))
checkEvaluation(Decode(Literal.create(null, BinaryType),
Literal("utf-8")), null)
checkEvaluation(Decode(b, Literal.create(null, StringType)), null,
create_row(null))
+
+ // Test escaping of charset
+ GenerateUnsafeProjection.generate(Encode(a, Literal("\"quote")) :: Nil)
+ GenerateUnsafeProjection.generate(Decode(b, Literal("\"quote")) :: Nil)
}
test("initcap unit test") {
@@ -379,6 +389,9 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(Levenshtein(Literal("千世"), Literal("fog")), 3)
checkEvaluation(Levenshtein(Literal("世界千世"), Literal("大a界b")), 4)
// scalastyle:on
+
+ // Test escaping of arguments:
+ GenerateUnsafeProjection.generate(Levenshtein(Literal("\"quotea"),
Literal("\"quoteb")) :: Nil)
}
test("soundex unit test") {
@@ -560,6 +573,9 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(StringTrim(Literal("a"), Literal.create(null,
StringType)), null)
checkEvaluation(StringTrim(Literal.create(null, StringType),
Literal("a")), null)
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(StringTrim(Literal("\"quote"),
Literal("\"quote")) :: Nil)
+
checkEvaluation(StringTrim(Literal("yxTomxx"), Literal("xyz")), "Tom")
checkEvaluation(StringTrim(Literal("xxxbarxxx"), Literal("x")), "bar")
}
@@ -587,6 +603,10 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(StringTrimLeft(Literal.create(null, StringType),
Literal("a")), null)
checkEvaluation(StringTrimLeft(Literal("a"), Literal.create(null,
StringType)), null)
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(
+ StringTrimLeft(Literal("\"quote"), Literal("\"quote")) :: Nil)
+
checkEvaluation(StringTrimLeft(Literal("zzzytest"), Literal("xyz")),
"test")
checkEvaluation(StringTrimLeft(Literal("zzzytestxyz"), Literal("xyz")),
"testxyz")
checkEvaluation(StringTrimLeft(Literal("xyxXxyLAST WORD"), Literal("xy")),
"XxyLAST WORD")
@@ -616,6 +636,10 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(StringTrimRight(Literal("a"), Literal.create(null,
StringType)), null)
checkEvaluation(StringTrimRight(Literal.create(null, StringType),
Literal("a")), null)
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(
+ StringTrimRight(Literal("\"quote"), Literal("\"quote")) :: Nil)
+
checkEvaluation(StringTrimRight(Literal("testxxzx"), Literal("xyz")),
"test")
checkEvaluation(StringTrimRight(Literal("xyztestxxzx"), Literal("xyz")),
"xyztest")
checkEvaluation(StringTrimRight(Literal("TURNERyxXxy"), Literal("xy")),
"TURNERyxX")
@@ -632,6 +656,9 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
FormatString(Literal("aa%d%s"), Literal.create(null, IntegerType),
"cc"), "aanullcc")
checkEvaluation(
FormatString(Literal("aa%d%s"), 12, Literal.create(null, StringType)),
"aa12null")
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(FormatString(Literal("\"quote"),
Literal("\"quote")) :: Nil)
}
test("SPARK-22603: FormatString should not generate codes beyond 64KB") {
@@ -662,6 +689,9 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(StringInstr(s1, s2), 1, create_row("花花世界", "花"))
checkEvaluation(StringInstr(s1, s2), 0, create_row("花花世界", "小"))
// scalastyle:on
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(StringInstr(Literal("\"quote"),
Literal("\"quote")) :: Nil)
}
test("LOCATE") {
@@ -718,6 +748,10 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(StringRPad(s1, s2, s3), null, row3)
checkEvaluation(StringRPad(s1, s2, s3), null, row4)
checkEvaluation(StringRPad(s1, s2, s3), null, row5)
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(StringLPad(Literal("\"quote"), s2,
Literal("\"quote")) :: Nil)
+ GenerateUnsafeProjection.generate(StringRPad(Literal("\"quote"), s2,
Literal("\"quote")) :: Nil)
checkEvaluation(StringRPad(Literal("hi"), Literal(5)), "hi ")
checkEvaluation(StringRPad(Literal("hi"), Literal(1)), "h")
}
@@ -732,6 +766,9 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(StringRepeat(Literal("hi"), Literal(-1)), "", row1)
checkEvaluation(StringRepeat(s1, s2), "hihi", row1)
checkEvaluation(StringRepeat(s1, s2), null, row2)
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(StringRepeat(Literal("\"quote"),
Literal(2)) :: Nil)
}
test("REVERSE") {
@@ -897,6 +934,9 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
assert(ParseUrl(Seq(Literal("1"),
Literal(2))).checkInputDataTypes().isFailure)
assert(ParseUrl(Seq(Literal(1),
Literal("2"))).checkInputDataTypes().isFailure)
assert(ParseUrl(Seq(Literal("1"), Literal("2"),
Literal(3))).checkInputDataTypes().isFailure)
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"),
Literal("\"quote"))) :: Nil)
}
test("Sentences") {
@@ -919,5 +959,9 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
answer)
checkEvaluation(Sentences("Hi there! The price was $1,234.56.... But, not
now.", "XXX", "YYY"),
answer)
+
+ // Test escaping of arguments
+ GenerateUnsafeProjection.generate(
+ Sentences(Literal("\"quote"), Literal("\"quote"), Literal("\"quote")) ::
Nil)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]