This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new bd071be1dc feat: add `custom_string_literal_override` to unparser
Dialect trait (#20590)
bd071be1dc is described below
commit bd071be1dcc0b7e50ed224e8bf83e15470f20f8d
Author: Jax Liu <[email protected]>
AuthorDate: Tue Mar 17 04:44:46 2026 +0800
feat: add `custom_string_literal_override` to unparser Dialect trait
(#20590)
## Which issue does this PR close?
- Closes #.
## Rationale for this change
When unparsing queries targeting databases like MSSQL, non-ASCII string
literals need special handling. MSSQL requires the `N'...'` (national
string literal) prefix for strings containing Unicode characters.
Currently the unparser always emits single-quoted strings with no way
for dialects to customize this behavior.
## What changes are included in this PR?
- Add a new `custom_string_literal_override` method to the `Dialect`
trait with a default implementation returning `None` (no override).
- Consolidate the `Utf8`, `Utf8View`, and `LargeUtf8` match arms in
`scalar_value_to_sql` and route them through the new dialect hook.
## Are these changes tested?
Yes. A test-only `MsSqlDialect` is defined in the test module to verify:
- ASCII strings produce standard single-quoted literals (no `N` prefix)
- Non-ASCII strings produce national string literals (`N'...`')
- The default dialect is unaffected (no `N` prefix regardless of
content)
It's used by Wren AI in production for a while:
https://github.com/Canner/datafusion/pull/8
## Are there any user-facing changes?
Yes. The `Dialect` trait gains a new method
`custom_string_literal_override`. This is a non-breaking change since
the method has a default implementation. Dialect implementors can
override it to customize string literal unparsing.
---
datafusion/sql/src/unparser/dialect.rs | 11 +++++
datafusion/sql/src/unparser/expr.rs | 84 +++++++++++++++++++++++++++++-----
2 files changed, 84 insertions(+), 11 deletions(-)
diff --git a/datafusion/sql/src/unparser/dialect.rs
b/datafusion/sql/src/unparser/dialect.rs
index 31d2662cc4..fe278a0e1e 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -248,6 +248,17 @@ pub trait Dialect: Send + Sync {
fn supports_empty_select_list(&self) -> bool {
false
}
+
+ /// Override the default string literal unparsing.
+ ///
+ /// Returns `Some(ast::Expr)` to replace the default single-quoted string,
+ /// or `None` to use the default behavior.
+ ///
+ /// For example, MSSQL requires non-ASCII strings to use national string
+ /// literal syntax (`N'datafusion資料融合'`).
+ fn string_literal_to_sql(&self, _s: &str) -> Option<ast::Expr> {
+ None
+ }
}
/// `IntervalStyle` to use for unparsing
diff --git a/datafusion/sql/src/unparser/expr.rs
b/datafusion/sql/src/unparser/expr.rs
index 503048bb3c..54c8eeb125 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -1294,18 +1294,17 @@ impl Unparser<'_> {
Ok(ast::Expr::value(ast::Value::Number(ui.to_string(), false)))
}
ScalarValue::UInt64(None) =>
Ok(ast::Expr::value(ast::Value::Null)),
- ScalarValue::Utf8(Some(str)) => {
- Ok(ast::Expr::value(SingleQuotedString(str.to_string())))
- }
- ScalarValue::Utf8(None) => Ok(ast::Expr::value(ast::Value::Null)),
- ScalarValue::Utf8View(Some(str)) => {
- Ok(ast::Expr::value(SingleQuotedString(str.to_string())))
- }
- ScalarValue::Utf8View(None) =>
Ok(ast::Expr::value(ast::Value::Null)),
- ScalarValue::LargeUtf8(Some(str)) => {
+ ScalarValue::Utf8(Some(str))
+ | ScalarValue::Utf8View(Some(str))
+ | ScalarValue::LargeUtf8(Some(str)) => {
+ if let Some(expr) = self.dialect.string_literal_to_sql(str) {
+ return Ok(expr);
+ }
Ok(ast::Expr::value(SingleQuotedString(str.to_string())))
}
- ScalarValue::LargeUtf8(None) =>
Ok(ast::Expr::value(ast::Value::Null)),
+ ScalarValue::Utf8(None)
+ | ScalarValue::Utf8View(None)
+ | ScalarValue::LargeUtf8(None) =>
Ok(ast::Expr::value(ast::Value::Null)),
ScalarValue::Binary(Some(_)) => not_impl_err!("Unsupported scalar:
{v:?}"),
ScalarValue::Binary(None) =>
Ok(ast::Expr::value(ast::Value::Null)),
ScalarValue::BinaryView(Some(_)) => {
@@ -2397,7 +2396,6 @@ mod tests {
let expected = r#"('a' > 4)"#;
assert_eq!(actual, expected);
-
Ok(())
}
@@ -2960,6 +2958,70 @@ mod tests {
Ok(())
}
+ #[test]
+ fn test_mssql_dialect_national_literal() -> Result<()> {
+ struct MsSqlDialect;
+
+ impl Dialect for MsSqlDialect {
+ fn identifier_quote_style(&self, _identifier: &str) ->
Option<char> {
+ Some('[')
+ }
+
+ fn string_literal_to_sql(&self, s: &str) -> Option<ast::Expr> {
+ if !s.is_ascii() {
+ Some(ast::Expr::value(ast::Value::NationalStringLiteral(
+ s.to_string(),
+ )))
+ } else {
+ None
+ }
+ }
+ }
+
+ let dialect = MsSqlDialect;
+ let unparser = Unparser::new(&dialect);
+
+ // Get nation string literal for the custom mssql dialect
+ for (s, expected) in [
+ ("national string", "'national string'"),
+ ("datafusion資料融合", "N'datafusion資料融合'"),
+ ] {
+ let expr = Expr::Literal(ScalarValue::Utf8(Some(s.to_string())),
None);
+ let ast = unparser.expr_to_sql(&expr)?;
+ assert_eq!(ast.to_string(), expected);
+
+ let expr =
Expr::Literal(ScalarValue::Utf8View(Some(s.to_string())), None);
+ let ast = unparser.expr_to_sql(&expr)?;
+ assert_eq!(ast.to_string(), expected);
+
+ let expr =
Expr::Literal(ScalarValue::LargeUtf8(Some(s.to_string())), None);
+ let ast = unparser.expr_to_sql(&expr)?;
+ assert_eq!(ast.to_string(), expected);
+ }
+
+ let dialect = DefaultDialect {};
+ let unparser = Unparser::new(&dialect);
+
+ // Get normal string literal for default dialect
+ for (s, expected) in [
+ ("national string", "'national string'"),
+ ("datafusion資料融合", "'datafusion資料融合'"),
+ ] {
+ let expr = Expr::Literal(ScalarValue::Utf8(Some(s.to_string())),
None);
+ let ast = unparser.expr_to_sql(&expr)?;
+ assert_eq!(ast.to_string(), expected);
+
+ let expr =
Expr::Literal(ScalarValue::Utf8View(Some(s.to_string())), None);
+ let ast = unparser.expr_to_sql(&expr)?;
+ assert_eq!(ast.to_string(), expected);
+
+ let expr =
Expr::Literal(ScalarValue::LargeUtf8(Some(s.to_string())), None);
+ let ast = unparser.expr_to_sql(&expr)?;
+ assert_eq!(ast.to_string(), expected);
+ }
+ Ok(())
+ }
+
#[test]
fn test_cast_value_to_dict_expr() {
let tests = [(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]