Re: [PR] Snowflake Unparser dialect and UNNEST support [datafusion]

via GitHub Thu, 16 Apr 2026 10:03:29 -0700


goldmedal commented on code in PR #21593:
URL: https://github.com/apache/datafusion/pull/21593#discussion_r3094738024



##########
datafusion/sql/src/unparser/ast.rs:
##########
@@ -688,6 +697,94 @@ impl Default for UnnestRelationBuilder {
     }
 }
 
+/// Default table alias for FLATTEN table factors.
+/// Snowflake requires an alias to reference output columns (e.g. 
`_unnest.VALUE`).
+pub const FLATTEN_DEFAULT_ALIAS: &str = "_unnest";

Review Comment:
   We need to add the incremental counter for the prefix, like `_unnest_1`, 
`_unnest_2`, ...
   Consider the following SQL:
   ```sql
   SELECT * FROM j1, UNNEST(extract_array(j1_string)), j2, 
UNNEST(extract_array(j2_string)) LIMIT 5
   ```
   It will produce:
   ```sql
   SELECT
       "j1"."j1_id",
       "j1"."j1_string",
       "_unnest"."VALUE",
       "j2"."j2_id",
       "j2"."j2_string",
       "_unnest"."VALUE"
   FROM
       "j1"
       CROSS JOIN LATERAL FLATTEN (INPUT = > extract_array ("j1"."j1_string")) 
AS "_unnest"
       CROSS JOIN "j2"
       CROSS JOIN LATERAL FLATTEN (INPUT = > extract_array ("j2"."j2_string")) 
AS "_unnest"
   LIMIT
       5
   ```
   The alias is ambiguous. Maybe you can use `AliasGenerator` to add a prefix 
incrementally.



##########
datafusion/sql/src/unparser/plan.rs:
##########
@@ -376,29 +406,112 @@ impl Unparser<'_> {
                         .select_to_sql_recursively(&new_plan, query, select, 
relation);
                 }
 
-                // Projection can be top-level plan for unnest relation
-                // The projection generated by the `RecursiveUnnestRewriter` 
from a UNNEST relation will have
-                // only one expression, which is the placeholder column 
generated by the rewriter.
-                let unnest_input_type = if p.expr.len() == 1 {
-                    Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
-                } else {
-                    None
-                };
+                // Projection can be top-level plan for unnest relation.
+                // The projection generated by the `RecursiveUnnestRewriter`
+                // will have at least one expression referencing an unnest
+                // placeholder column.
+                let unnest_input_type: Option<UnnestInputType> =
+                    p.expr.iter().find_map(Self::find_unnest_placeholder);
+
+                // --- UNNEST table factor path (BigQuery, etc.) ---
+                // Only fires for a single bare-placeholder projection.
+                // Uses peel_to_unnest_with_modifiers (rather than matching
+                // p.input directly) to handle Limit/Sort between Projection
+                // and Unnest.
                 if self.dialect.unnest_as_table_factor()
-                    && unnest_input_type.is_some()
-                    && let LogicalPlan::Unnest(unnest) = &p.input.as_ref()
+                    && p.expr.len() == 1
+                    && Self::is_bare_unnest_placeholder(&p.expr[0])
+                    && let Some((unnest, unnest_plan)) =
+                        self.peel_to_unnest_with_modifiers(p.input.as_ref(), 
query)?
                     && let Some(unnest_relation) =
                         self.try_unnest_to_table_factor_sql(unnest)?
                 {
                     relation.unnest(unnest_relation);
                     return self.select_to_sql_recursively(
-                        p.input.as_ref(),
+                        unnest_plan,
                         query,
                         select,
                         relation,
                     );
                 }
 
+                // --- Snowflake LATERAL FLATTEN path ---
+                // `peel_to_unnest_with_modifiers` walks through any
+                // intermediate Limit/Sort nodes (the optimizer can insert
+                // these between the Projection and the Unnest), applies
+                // their modifiers to the query, and returns the Unnest +
+                // the LogicalPlan ref to recurse into. This bypasses the
+                // normal Limit/Sort handlers which would wrap the subtree
+                // in a derived subquery.
+                // SELECT rendering is delegated to
+                // `reconstruct_select_statement`, which rewrites
+                // placeholder columns to `"_unnest"."VALUE"` via
+                // `unproject_unnest_expr_as_flatten_value` — this works
+                // for bare, wrapped, and multi-expression projections.
+                if self.dialect.unnest_as_lateral_flatten()
+                    && unnest_input_type.is_some()
+                    && let Some((unnest, unnest_plan)) =
+                        self.peel_to_unnest_with_modifiers(p.input.as_ref(), 
query)?
+                    && let Some(flatten) =
+                        self.try_unnest_to_lateral_flatten_sql(unnest)?
+                {
+                    let inner_projection =
+                        Self::peel_to_inner_projection(unnest.input.as_ref())
+                            .ok_or_else(|| {
+                                DataFusionError::Internal(format!(
+                                    "Unnest input is not a Projection: {:?}",
+                                    unnest.input
+                                ))
+                            })?;
+
+                    // An outer plan (e.g. a wrapping Projection) may have
+                    // already set SELECT columns; only set them once.
+                    if !select.already_projected() {
+                        self.reconstruct_select_statement(plan, p, select)?;
+                    }
+
+                    if matches!(
+                        inner_projection.input.as_ref(),
+                        LogicalPlan::EmptyRelation(_)
+                    ) {
+                        // Inline array (e.g. UNNEST([1,2,3])):
+                        // FLATTEN is the sole FROM source.
+                        relation.flatten(flatten);
+                        return self.select_to_sql_recursively(
+                            unnest_plan,
+                            query,
+                            select,
+                            relation,
+                        );
+                    }
+
+                    // Non-empty source (table, subquery, etc.):
+                    // recurse to set the primary FROM, then attach FLATTEN
+                    // as a CROSS JOIN.
+                    self.select_to_sql_recursively(unnest_plan, query, select, 
relation)?;
+
+                    let flatten_factor = flatten.build().map_err(|e| {
+                        DataFusionError::Internal(format!("Failed to build 
FLATTEN: {e}"))

Review Comment:
   Use the macro to align the error handling
   ```suggestion
                           internal_datafusion_err!("Failed to build FLATTEN: 
{e}")
   ```



##########
datafusion/sql/src/unparser/plan.rs:
##########
@@ -376,29 +406,112 @@ impl Unparser<'_> {
                         .select_to_sql_recursively(&new_plan, query, select, 
relation);
                 }
 
-                // Projection can be top-level plan for unnest relation
-                // The projection generated by the `RecursiveUnnestRewriter` 
from a UNNEST relation will have
-                // only one expression, which is the placeholder column 
generated by the rewriter.
-                let unnest_input_type = if p.expr.len() == 1 {
-                    Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
-                } else {
-                    None
-                };
+                // Projection can be top-level plan for unnest relation.
+                // The projection generated by the `RecursiveUnnestRewriter`
+                // will have at least one expression referencing an unnest
+                // placeholder column.
+                let unnest_input_type: Option<UnnestInputType> =
+                    p.expr.iter().find_map(Self::find_unnest_placeholder);
+
+                // --- UNNEST table factor path (BigQuery, etc.) ---
+                // Only fires for a single bare-placeholder projection.
+                // Uses peel_to_unnest_with_modifiers (rather than matching
+                // p.input directly) to handle Limit/Sort between Projection
+                // and Unnest.
                 if self.dialect.unnest_as_table_factor()
-                    && unnest_input_type.is_some()
-                    && let LogicalPlan::Unnest(unnest) = &p.input.as_ref()
+                    && p.expr.len() == 1
+                    && Self::is_bare_unnest_placeholder(&p.expr[0])
+                    && let Some((unnest, unnest_plan)) =
+                        self.peel_to_unnest_with_modifiers(p.input.as_ref(), 
query)?
                     && let Some(unnest_relation) =
                         self.try_unnest_to_table_factor_sql(unnest)?
                 {
                     relation.unnest(unnest_relation);
                     return self.select_to_sql_recursively(
-                        p.input.as_ref(),
+                        unnest_plan,
                         query,
                         select,
                         relation,
                     );
                 }
 
+                // --- Snowflake LATERAL FLATTEN path ---
+                // `peel_to_unnest_with_modifiers` walks through any
+                // intermediate Limit/Sort nodes (the optimizer can insert
+                // these between the Projection and the Unnest), applies
+                // their modifiers to the query, and returns the Unnest +
+                // the LogicalPlan ref to recurse into. This bypasses the
+                // normal Limit/Sort handlers which would wrap the subtree
+                // in a derived subquery.
+                // SELECT rendering is delegated to
+                // `reconstruct_select_statement`, which rewrites
+                // placeholder columns to `"_unnest"."VALUE"` via
+                // `unproject_unnest_expr_as_flatten_value` — this works
+                // for bare, wrapped, and multi-expression projections.
+                if self.dialect.unnest_as_lateral_flatten()
+                    && unnest_input_type.is_some()
+                    && let Some((unnest, unnest_plan)) =
+                        self.peel_to_unnest_with_modifiers(p.input.as_ref(), 
query)?
+                    && let Some(flatten) =
+                        self.try_unnest_to_lateral_flatten_sql(unnest)?
+                {
+                    let inner_projection =
+                        Self::peel_to_inner_projection(unnest.input.as_ref())
+                            .ok_or_else(|| {
+                                DataFusionError::Internal(format!(
+                                    "Unnest input is not a Projection: {:?}",
+                                    unnest.input
+                                ))

Review Comment:
   ```suggestion
                                   internal_datafusion_err!("Unnest input is 
not a Projection: {:?}", unnest.input)
   ```



##########
datafusion/sql/src/unparser/dialect.rs:
##########
@@ -211,6 +206,15 @@ pub trait Dialect: Send + Sync {
         false
     }
 
+    /// Unparse the unnest plan as `LATERAL FLATTEN(INPUT => expr, ...)`.
+    ///
+    /// Snowflake uses FLATTEN as a table function instead of the SQL-standard 
UNNEST.
+    /// When this returns `true`, the unparser emits
+    /// `LATERAL FLATTEN(INPUT => <col>, OUTER => <bool>)` in the FROM clause.
+    fn unnest_as_lateral_flatten(&self) -> bool {
+        false
+    }

Review Comment:
   > 90% of the logic there if focused on untangling all the ways an Unnest 
node can interact with other entities (UDFs, `SubqueryAlias`, `Projection`, 
`Limits`, `Sorts`) etc. I've seen some weird shapes come out of the optimizer 
that I wanted to ensure get supported. This traversal logic is very consistent 
with the way the Unparser works across the board now.
   > 
   
   Agreed — the traversal logic (peeling through Limit/Sort/SubqueryAlias to 
find the Unnest) is generic and belongs in the core unparser. I'm not 
suggesting moving that into the dialect.
   
   > In other words, the Snowflake specific items are mostly business logic in 
how to really decompose the unnest forms supported in DF vs the completely 
different forms in takes in Snowflake.
   > 
   > We could move the AST `FlattenRelationBuilder` inside the dialect, 
   >
   > but I feel this breaks the current separation strategy in DF's design.
   >
   
   This is actually the direction I'd prefer. The dialect trait method would 
receive the prepared context from the traversal and return the table factor AST 
node. The core unparser handles what to unnest; the dialect decides how to 
render it.
   
   One practical reason: `FLATTEN(INPUT => col, OUTER => bool)` isn't the only 
Snowflake syntax for unnesting — `TABLE(FLATTEN([1, 2, 3], '', false, false, 
'ARRAY'))` is another form. A trait-based hook gives the dialect flexibility to 
choose between them. It also opens the door for other dialects (e.g. Trino's 
CROSS JOIN UNNEST, BigQuery's table-factor UNNEST) to implement the same method 
without adding more conditional branches to plan.rs.
   
   Regarding "separation strategy" — I think we're aligned on the goal of 
keeping dialect-specific rendering out of the core unparser. I understand it's 
not always easy to draw that line, but in this case I think the split is fairly 
natural: traversal in plan.rs, table-factor construction in the dialect.
   
   



##########
datafusion/sql/src/unparser/plan.rs:
##########
@@ -376,29 +406,112 @@ impl Unparser<'_> {
                         .select_to_sql_recursively(&new_plan, query, select, 
relation);
                 }
 
-                // Projection can be top-level plan for unnest relation
-                // The projection generated by the `RecursiveUnnestRewriter` 
from a UNNEST relation will have
-                // only one expression, which is the placeholder column 
generated by the rewriter.
-                let unnest_input_type = if p.expr.len() == 1 {
-                    Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
-                } else {
-                    None
-                };
+                // Projection can be top-level plan for unnest relation.
+                // The projection generated by the `RecursiveUnnestRewriter`
+                // will have at least one expression referencing an unnest
+                // placeholder column.
+                let unnest_input_type: Option<UnnestInputType> =
+                    p.expr.iter().find_map(Self::find_unnest_placeholder);
+
+                // --- UNNEST table factor path (BigQuery, etc.) ---
+                // Only fires for a single bare-placeholder projection.
+                // Uses peel_to_unnest_with_modifiers (rather than matching
+                // p.input directly) to handle Limit/Sort between Projection
+                // and Unnest.
                 if self.dialect.unnest_as_table_factor()
-                    && unnest_input_type.is_some()
-                    && let LogicalPlan::Unnest(unnest) = &p.input.as_ref()
+                    && p.expr.len() == 1
+                    && Self::is_bare_unnest_placeholder(&p.expr[0])
+                    && let Some((unnest, unnest_plan)) =
+                        self.peel_to_unnest_with_modifiers(p.input.as_ref(), 
query)?
                     && let Some(unnest_relation) =
                         self.try_unnest_to_table_factor_sql(unnest)?
                 {
                     relation.unnest(unnest_relation);
                     return self.select_to_sql_recursively(
-                        p.input.as_ref(),
+                        unnest_plan,
                         query,
                         select,
                         relation,
                     );
                 }
 
+                // --- Snowflake LATERAL FLATTEN path ---
+                // `peel_to_unnest_with_modifiers` walks through any
+                // intermediate Limit/Sort nodes (the optimizer can insert
+                // these between the Projection and the Unnest), applies
+                // their modifiers to the query, and returns the Unnest +
+                // the LogicalPlan ref to recurse into. This bypasses the
+                // normal Limit/Sort handlers which would wrap the subtree
+                // in a derived subquery.
+                // SELECT rendering is delegated to
+                // `reconstruct_select_statement`, which rewrites
+                // placeholder columns to `"_unnest"."VALUE"` via
+                // `unproject_unnest_expr_as_flatten_value` — this works
+                // for bare, wrapped, and multi-expression projections.
+                if self.dialect.unnest_as_lateral_flatten()
+                    && unnest_input_type.is_some()
+                    && let Some((unnest, unnest_plan)) =
+                        self.peel_to_unnest_with_modifiers(p.input.as_ref(), 
query)?
+                    && let Some(flatten) =
+                        self.try_unnest_to_lateral_flatten_sql(unnest)?
+                {

Review Comment:
   Consider extracting the snowflake-specific part to another function. It can 
make the Projection handling clear.



##########
datafusion/sql/tests/cases/plan_to_sql.rs:
##########
@@ -3006,3 +2996,482 @@ fn test_unparse_manual_join_with_subquery_aggregate() 
-> Result<()> {
 
     Ok(())
 }
+
+#[test]
+fn snowflake_unnest_to_lateral_flatten_simple() -> Result<(), DataFusionError> 
{
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3])",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "_unnest"."VALUE" FROM LATERAL FLATTEN(INPUT => 
[1, 2, 3]) AS "_unnest""#,
+    );
+    Ok(())
+}
+
+#[test]
+fn snowflake_unnest_to_lateral_flatten_with_cross_join() -> Result<(), 
DataFusionError> {
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]), j1",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "_unnest"."VALUE", "j1"."j1_id", "j1"."j1_string" 
FROM LATERAL FLATTEN(INPUT => [1, 2, 3]) AS "_unnest" CROSS JOIN "j1""#,
+    );
+    Ok(())
+}
+
+#[test]
+fn snowflake_unnest_to_lateral_flatten_cross_join_inline() -> Result<(), 
DataFusionError>
+{
+    // Cross join with two inline UNNEST sources — both produce valid FLATTEN.
+    // NOTE: UNNEST(table.column) is NOT tested with Snowflake because
+    // LATERAL FLATTEN(INPUT => col) requires the column to be a Snowflake
+    // VARIANT/ARRAY type, which cannot be validated at unparse time.
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "u"."c1", "j1"."j1_id", "j1"."j1_string" FROM 
LATERAL FLATTEN(INPUT => [1, 2, 3]) AS "u" INNER JOIN "j1" ON ("u"."c1" = 
"j1"."j1_id")"#,
+    );
+    Ok(())
+}
+
+// --- Edge case tests for Snowflake FLATTEN ---
+
+#[test]
+fn snowflake_flatten_implicit_from() -> Result<(), DataFusionError> {
+    // UNNEST in SELECT clause (no explicit FROM UNNEST) — implicit table 
factor
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT UNNEST([1,2,3])",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "_unnest"."VALUE" FROM LATERAL FLATTEN(INPUT => 
[1, 2, 3]) AS "_unnest""#,
+    );
+    Ok(())
+}
+
+#[test]
+fn snowflake_flatten_string_array() -> Result<(), DataFusionError> {
+    // String array unnest
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST(['a','b','c'])",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "_unnest"."VALUE" FROM LATERAL FLATTEN(INPUT => 
['a', 'b', 'c']) AS "_unnest""#,
+    );
+    Ok(())
+}
+
+#[test]
+fn snowflake_flatten_select_unnest_with_alias() -> Result<(), DataFusionError> 
{
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT UNNEST([1,2,3]) as c1",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "_unnest"."VALUE" AS "c1" FROM LATERAL 
FLATTEN(INPUT => [1, 2, 3]) AS "_unnest""#,
+    );
+    Ok(())
+}
+
+#[test]
+fn snowflake_flatten_select_unnest_plus_literal() -> Result<(), 
DataFusionError> {
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT UNNEST([1,2,3]), 1",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "_unnest"."VALUE", "Int64(1)" FROM LATERAL 
FLATTEN(INPUT => [1, 2, 3]) AS "_unnest""#,
+    );
+    Ok(())
+}
+
+#[test]
+fn snowflake_flatten_from_unnest_with_table_alias() -> Result<(), 
DataFusionError> {
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "t1"."c1" FROM LATERAL FLATTEN(INPUT => [1, 2, 
3]) AS "t1""#,
+    );
+    Ok(())
+}
+
+#[test]
+fn snowflake_flatten_unnest_from_subselect() -> Result<(), DataFusionError> {
+    // UNNEST operating on an array column produced by a subselect.
+    // Uses unnest_table which has array_col (List<Int64>).
+    // The filter uses array_col IS NOT NULL — a simple predicate
+    // that doesn't involve struct types (which Snowflake FLATTEN can't 
handle).
+    let snowflake = SnowflakeDialect::new();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT UNNEST(array_col) FROM (SELECT array_col FROM 
unnest_table WHERE array_col IS NOT NULL LIMIT 3)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: snowflake,
+        expected: @r#"SELECT "_unnest"."VALUE" FROM (SELECT 
"unnest_table"."array_col" FROM "unnest_table" WHERE "unnest_table"."array_col" 
IS NOT NULL LIMIT 3) CROSS JOIN LATERAL FLATTEN(INPUT => 
"unnest_table"."array_col") AS "_unnest""#,
+    );
+    Ok(())
+}
+
+/// Dummy scalar UDF for testing — takes a string and returns List<Int64>.
+/// Simulates any UDF that extracts an array from a column (e.g. parsing
+/// JSON, splitting a delimited string, etc.).
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct ExtractArrayUdf {
+    signature: Signature,
+}
+
+impl ExtractArrayUdf {
+    fn new() -> Self {
+        Self {
+            signature: Signature::exact(vec![DataType::Utf8], 
Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for ExtractArrayUdf {
+    fn name(&self) -> &str {
+        "extract_array"
+    }
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::List(Arc::new(Field::new_list_field(
+            DataType::Int64,
+            true,
+        ))))
+    }
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
+        unimplemented!("test stub")
+    }
+}
+
+#[test]
+fn snowflake_flatten_unnest_udf_result() -> Result<(), DataFusionError> {
+    // UNNEST on a UDF result: extract_array(col) returns List<Int64>,
+    // then UNNEST flattens it. This exercises the path where the FLATTEN
+    // INPUT is a UDF call rather than a bare column reference.
+    let sql = "SELECT UNNEST(extract_array(j1_string)) AS items FROM j1 LIMIT 
5";
+
+    let statement = Parser::new(&GenericDialect {})
+        .try_with_sql(sql)?
+        .parse_statement()?;
+
+    let state = MockSessionState::default()
+        .with_aggregate_function(max_udaf())
+        .with_aggregate_function(min_udaf())
+        
.with_scalar_function(Arc::new(ScalarUDF::new_from_impl(ExtractArrayUdf::new())))
+        .with_expr_planner(Arc::new(CoreFunctionPlanner::default()))
+        .with_expr_planner(Arc::new(NestedFunctionPlanner))
+        .with_expr_planner(Arc::new(FieldAccessPlanner));
+
+    let context = MockContextProvider { state };
+    let sql_to_rel = SqlToRel::new(&context);
+    let plan = sql_to_rel
+        .sql_statement_to_plan(statement)
+        .unwrap_or_else(|e| panic!("Failed to parse sql: {sql}\n{e}"));
+
+    let snowflake = SnowflakeDialect::new();
+    let unparser = Unparser::new(&snowflake);
+    let result = unparser.plan_to_sql(&plan)?;
+    let actual = result.to_string();
+
+    insta::assert_snapshot!(actual, @r#"SELECT "_unnest"."VALUE" AS "items" 
FROM "j1" CROSS JOIN LATERAL FLATTEN(INPUT => extract_array("j1"."j1_string")) 
AS "_unnest" LIMIT 5"#);
+    Ok(())
+}
+
+#[test]
+fn snowflake_flatten_limit_between_projection_and_unnest() -> Result<(), 
DataFusionError>
+{
+    // Build: Projection → Limit → Unnest → Projection → TableScan
+    // The optimizer can insert a Limit between the outer Projection and the
+    // Unnest. The FLATTEN code path must look through transparent nodes
+    // (Limit, Sort) to find the Unnest.
+    let schema = Schema::new(vec![Field::new(
+        "items",
+        DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
+        true,
+    )]);
+
+    let plan = table_scan(Some("source"), &schema, None)?
+        .project(vec![col("items").alias("__unnest_placeholder(items)")])?
+        .unnest_column("__unnest_placeholder(items)")?
+        .limit(0, Some(5))? // Limit BETWEEN outer Projection and Unnest
+        .project(vec![col("__unnest_placeholder(items)").alias("item")])?
+        .build()?;
+
+    let snowflake = SnowflakeDialect::new();
+    let unparser = Unparser::new(&snowflake);
+    let result = unparser.plan_to_sql(&plan)?;
+    let actual = result.to_string();
+
+    // Must contain LATERAL FLATTEN — the Limit must not prevent FLATTEN 
detection
+    insta::assert_snapshot!(actual, @r#"SELECT "_unnest"."VALUE" AS "item" 
FROM "source" CROSS JOIN LATERAL FLATTEN(INPUT => "source"."items", OUTER => 
true) AS "_unnest" LIMIT 5"#);
+    Ok(())
+}
+
+#[test]
+fn snowflake_flatten_sort_between_projection_and_unnest() -> Result<(), 
DataFusionError> {
+    // Build: Projection → Sort → Unnest → Projection → TableScan
+    // Same as Limit test but with Sort instead.
+    let schema = Schema::new(vec![Field::new(
+        "items",
+        DataType::List(Arc::new(Field::new_list_field(DataType::Utf8, true))),
+        true,
+    )]);
+
+    let plan = table_scan(Some("source"), &schema, None)?
+        .project(vec![col("items").alias("__unnest_placeholder(items)")])?
+        .unnest_column("__unnest_placeholder(items)")?
+        .sort(vec![col("__unnest_placeholder(items)").sort(true, true)])?
+        .project(vec![col("__unnest_placeholder(items)").alias("item")])?
+        .build()?;
+
+    let snowflake = SnowflakeDialect::new();
+    let unparser = Unparser::new(&snowflake);
+    let result = unparser.plan_to_sql(&plan)?;
+    let actual = result.to_string();
+
+    // Must contain LATERAL FLATTEN — the Sort must not prevent FLATTEN 
detection
+    assert!(
+        actual.contains("LATERAL FLATTEN"),
+        "Expected LATERAL FLATTEN in SQL, got: {actual}"
+    );
+    assert!(
+        actual.contains("ORDER BY"),
+        "Expected ORDER BY in SQL, got: {actual}"
+    );

Review Comment:
   Instead of checking the keyword, I prefer to use `insta::assert_snapshot` to 
assert the full SQL result. It can make others understand what SQL is expected 
and fully protect the expected result from being broken by other changes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Snowflake Unparser dialect and UNNEST support [datafusion]

Reply via email to