yonatan-sevenai commented on code in PR #21593:
URL: https://github.com/apache/datafusion/pull/21593#discussion_r3097243875
##########
datafusion/sql/tests/cases/plan_to_sql.rs:
##########
@@ -3006,3 +2996,482 @@ fn test_unparse_manual_join_with_subquery_aggregate()
-> Result<()> {
Ok(())
}
+
+#[test]
+fn snowflake_unnest_to_lateral_flatten_simple() -> Result<(), DataFusionError>
{
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT * FROM UNNEST([1,2,3])",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "_unnest"."VALUE" FROM LATERAL FLATTEN(INPUT =>
[1, 2, 3]) AS "_unnest""#,
+ );
+ Ok(())
+}
+
+#[test]
+fn snowflake_unnest_to_lateral_flatten_with_cross_join() -> Result<(),
DataFusionError> {
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT * FROM UNNEST([1,2,3]), j1",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "_unnest"."VALUE", "j1"."j1_id", "j1"."j1_string"
FROM LATERAL FLATTEN(INPUT => [1, 2, 3]) AS "_unnest" CROSS JOIN "j1""#,
+ );
+ Ok(())
+}
+
+#[test]
+fn snowflake_unnest_to_lateral_flatten_cross_join_inline() -> Result<(),
DataFusionError>
+{
+ // Cross join with two inline UNNEST sources — both produce valid FLATTEN.
+ // NOTE: UNNEST(table.column) is NOT tested with Snowflake because
+ // LATERAL FLATTEN(INPUT => col) requires the column to be a Snowflake
+ // VARIANT/ARRAY type, which cannot be validated at unparse time.
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "u"."c1", "j1"."j1_id", "j1"."j1_string" FROM
LATERAL FLATTEN(INPUT => [1, 2, 3]) AS "u" INNER JOIN "j1" ON ("u"."c1" =
"j1"."j1_id")"#,
+ );
+ Ok(())
+}
+
+// --- Edge case tests for Snowflake FLATTEN ---
+
+#[test]
+fn snowflake_flatten_implicit_from() -> Result<(), DataFusionError> {
+ // UNNEST in SELECT clause (no explicit FROM UNNEST) — implicit table
factor
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT UNNEST([1,2,3])",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "_unnest"."VALUE" FROM LATERAL FLATTEN(INPUT =>
[1, 2, 3]) AS "_unnest""#,
+ );
+ Ok(())
+}
+
+#[test]
+fn snowflake_flatten_string_array() -> Result<(), DataFusionError> {
+ // String array unnest
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT * FROM UNNEST(['a','b','c'])",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "_unnest"."VALUE" FROM LATERAL FLATTEN(INPUT =>
['a', 'b', 'c']) AS "_unnest""#,
+ );
+ Ok(())
+}
+
+#[test]
+fn snowflake_flatten_select_unnest_with_alias() -> Result<(), DataFusionError>
{
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT UNNEST([1,2,3]) as c1",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "_unnest"."VALUE" AS "c1" FROM LATERAL
FLATTEN(INPUT => [1, 2, 3]) AS "_unnest""#,
+ );
+ Ok(())
+}
+
+#[test]
+fn snowflake_flatten_select_unnest_plus_literal() -> Result<(),
DataFusionError> {
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT UNNEST([1,2,3]), 1",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "_unnest"."VALUE", "Int64(1)" FROM LATERAL
FLATTEN(INPUT => [1, 2, 3]) AS "_unnest""#,
+ );
+ Ok(())
+}
+
+#[test]
+fn snowflake_flatten_from_unnest_with_table_alias() -> Result<(),
DataFusionError> {
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "t1"."c1" FROM LATERAL FLATTEN(INPUT => [1, 2,
3]) AS "t1""#,
+ );
+ Ok(())
+}
+
+#[test]
+fn snowflake_flatten_unnest_from_subselect() -> Result<(), DataFusionError> {
+ // UNNEST operating on an array column produced by a subselect.
+ // Uses unnest_table which has array_col (List<Int64>).
+ // The filter uses array_col IS NOT NULL — a simple predicate
+ // that doesn't involve struct types (which Snowflake FLATTEN can't
handle).
+ let snowflake = SnowflakeDialect::new();
+ roundtrip_statement_with_dialect_helper!(
+ sql: "SELECT UNNEST(array_col) FROM (SELECT array_col FROM
unnest_table WHERE array_col IS NOT NULL LIMIT 3)",
+ parser_dialect: GenericDialect {},
+ unparser_dialect: snowflake,
+ expected: @r#"SELECT "_unnest"."VALUE" FROM (SELECT
"unnest_table"."array_col" FROM "unnest_table" WHERE "unnest_table"."array_col"
IS NOT NULL LIMIT 3) CROSS JOIN LATERAL FLATTEN(INPUT =>
"unnest_table"."array_col") AS "_unnest""#,
+ );
+ Ok(())
+}
+
+/// Dummy scalar UDF for testing — takes a string and returns List<Int64>.
+/// Simulates any UDF that extracts an array from a column (e.g. parsing
+/// JSON, splitting a delimited string, etc.).
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct ExtractArrayUdf {
+ signature: Signature,
+}
+
+impl ExtractArrayUdf {
+ fn new() -> Self {
+ Self {
+ signature: Signature::exact(vec![DataType::Utf8],
Volatility::Immutable),
+ }
+ }
+}
+
+impl ScalarUDFImpl for ExtractArrayUdf {
+ fn name(&self) -> &str {
+ "extract_array"
+ }
+ fn signature(&self) -> &Signature {
+ &self.signature
+ }
+ fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+ Ok(DataType::List(Arc::new(Field::new_list_field(
+ DataType::Int64,
+ true,
+ ))))
+ }
+ fn invoke_with_args(&self, _args: ScalarFunctionArgs) ->
Result<ColumnarValue> {
+ unimplemented!("test stub")
+ }
+}
+
+#[test]
+fn snowflake_flatten_unnest_udf_result() -> Result<(), DataFusionError> {
+ // UNNEST on a UDF result: extract_array(col) returns List<Int64>,
+ // then UNNEST flattens it. This exercises the path where the FLATTEN
+ // INPUT is a UDF call rather than a bare column reference.
+ let sql = "SELECT UNNEST(extract_array(j1_string)) AS items FROM j1 LIMIT
5";
+
+ let statement = Parser::new(&GenericDialect {})
+ .try_with_sql(sql)?
+ .parse_statement()?;
+
+ let state = MockSessionState::default()
+ .with_aggregate_function(max_udaf())
+ .with_aggregate_function(min_udaf())
+
.with_scalar_function(Arc::new(ScalarUDF::new_from_impl(ExtractArrayUdf::new())))
+ .with_expr_planner(Arc::new(CoreFunctionPlanner::default()))
+ .with_expr_planner(Arc::new(NestedFunctionPlanner))
+ .with_expr_planner(Arc::new(FieldAccessPlanner));
+
+ let context = MockContextProvider { state };
+ let sql_to_rel = SqlToRel::new(&context);
+ let plan = sql_to_rel
+ .sql_statement_to_plan(statement)
+ .unwrap_or_else(|e| panic!("Failed to parse sql: {sql}\n{e}"));
+
+ let snowflake = SnowflakeDialect::new();
+ let unparser = Unparser::new(&snowflake);
+ let result = unparser.plan_to_sql(&plan)?;
+ let actual = result.to_string();
+
+ insta::assert_snapshot!(actual, @r#"SELECT "_unnest"."VALUE" AS "items"
FROM "j1" CROSS JOIN LATERAL FLATTEN(INPUT => extract_array("j1"."j1_string"))
AS "_unnest" LIMIT 5"#);
+ Ok(())
+}
+
+#[test]
+fn snowflake_flatten_limit_between_projection_and_unnest() -> Result<(),
DataFusionError>
+{
+ // Build: Projection → Limit → Unnest → Projection → TableScan
+ // The optimizer can insert a Limit between the outer Projection and the
+ // Unnest. The FLATTEN code path must look through transparent nodes
+ // (Limit, Sort) to find the Unnest.
+ let schema = Schema::new(vec![Field::new(
+ "items",
+ DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
+ true,
+ )]);
+
+ let plan = table_scan(Some("source"), &schema, None)?
+ .project(vec![col("items").alias("__unnest_placeholder(items)")])?
+ .unnest_column("__unnest_placeholder(items)")?
+ .limit(0, Some(5))? // Limit BETWEEN outer Projection and Unnest
+ .project(vec![col("__unnest_placeholder(items)").alias("item")])?
+ .build()?;
+
+ let snowflake = SnowflakeDialect::new();
+ let unparser = Unparser::new(&snowflake);
+ let result = unparser.plan_to_sql(&plan)?;
+ let actual = result.to_string();
+
+ // Must contain LATERAL FLATTEN — the Limit must not prevent FLATTEN
detection
+ insta::assert_snapshot!(actual, @r#"SELECT "_unnest"."VALUE" AS "item"
FROM "source" CROSS JOIN LATERAL FLATTEN(INPUT => "source"."items", OUTER =>
true) AS "_unnest" LIMIT 5"#);
+ Ok(())
+}
+
+#[test]
+fn snowflake_flatten_sort_between_projection_and_unnest() -> Result<(),
DataFusionError> {
+ // Build: Projection → Sort → Unnest → Projection → TableScan
+ // Same as Limit test but with Sort instead.
+ let schema = Schema::new(vec![Field::new(
+ "items",
+ DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
+ true,
+ )]);
+
+ let plan = table_scan(Some("source"), &schema, None)?
+ .project(vec![col("items").alias("__unnest_placeholder(items)")])?
+ .unnest_column("__unnest_placeholder(items)")?
+ .sort(vec![col("__unnest_placeholder(items)").sort(true, true)])?
+ .project(vec![col("__unnest_placeholder(items)").alias("item")])?
+ .build()?;
+
+ let snowflake = SnowflakeDialect::new();
+ let unparser = Unparser::new(&snowflake);
+ let result = unparser.plan_to_sql(&plan)?;
+ let actual = result.to_string();
+
+ // Must contain LATERAL FLATTEN — the Sort must not prevent FLATTEN
detection
+ assert!(
+ actual.contains("LATERAL FLATTEN"),
+ "Expected LATERAL FLATTEN in SQL, got: {actual}"
+ );
+ assert!(
+ actual.contains("ORDER BY"),
+ "Expected ORDER BY in SQL, got: {actual}"
+ );
Review Comment:
Agreed. Updating the tests.
Thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]