kosiew commented on code in PR #20268:
URL: https://github.com/apache/datafusion/pull/20268#discussion_r3158691840


##########
datafusion/spark/src/function/string/format_string.rs:
##########
@@ -2385,4 +2437,295 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_insert_thousands_separator() {
+        assert_eq!(insert_thousands_separator("1234567.89"), "1,234,567.89");
+        assert_eq!(insert_thousands_separator("123.45"), "123.45");
+        assert_eq!(insert_thousands_separator("1234"), "1,234");
+        assert_eq!(insert_thousands_separator("12"), "12");
+        assert_eq!(insert_thousands_separator("0.5"), "0.5");
+        assert_eq!(
+            insert_thousands_separator("1234567890.1234"),
+            "1,234,567,890.1234"
+        );
+        assert_eq!(insert_thousands_separator("1000"), "1,000");
+        assert_eq!(insert_thousands_separator("100"), "100");
+    }
+
+    #[test]
+    fn test_grouping_separator_float() -> Result<()> {
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Ok(Some("1,234,567.89")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_grouping_separator_decimal() -> Result<()> {
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 
10, 2)),
+            ],
+            Ok(Some("1,234,567.89")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_grouping_separator_scientific_float() -> Result<()> {
+        // %,e — Java/Spark reject grouping separator with scientific notation
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Err(DataFusionError::Execution(
+                "Grouping separator ',' flag is not compatible with scientific 
notation conversion 'e'".to_string(),
+            )),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %,E — uppercase scientific also rejected
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,E".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Err(DataFusionError::Execution(
+                "Grouping separator ',' flag is not compatible with scientific 
notation conversion 'E'".to_string(),
+            )),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %,.0e — precision 0 scientific with grouping also rejected
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Err(DataFusionError::Execution(
+                "Grouping separator ',' flag is not compatible with scientific 
notation conversion 'e'".to_string(),
+            )),
+            &str,
+            Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_grouping_separator_compact_float() -> Result<()> {
+        // %,g with large number — triggers scientific, no commas
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Ok(Some("1.23457e+06")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %,g with small number — triggers fixed-point, commas in integer part
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(12345.6))),
+            ],
+            Ok(Some("12,345.6")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %,.0g — precision 0 compact with grouping (large number, scientific)
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Ok(Some("1e+06")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %,G — uppercase compact
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,G".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Ok(Some("1.23457E+06")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_grouping_separator_scientific_decimal() -> Result<()> {
+        // %,e on decimal — Java/Spark reject grouping separator with 
scientific notation
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 
10, 2)),
+            ],
+            Err(DataFusionError::Execution(
+                "Grouping separator ',' flag is not compatible with scientific 
notation conversion 'e'".to_string(),
+            )),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %,.0e on decimal — also rejected
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 
10, 2)),
+            ],
+            Err(DataFusionError::Execution(
+                "Grouping separator ',' flag is not compatible with scientific 
notation conversion 'e'".to_string(),
+            )),
+            &str,
+            Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_grouping_separator_compact_decimal() -> Result<()> {
+        // %,g on decimal — large number triggers scientific, no commas
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 
10, 2)),
+            ],
+            Ok(Some("1.23457e+06")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %,g on decimal — small number triggers fixed-point, commas expected
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(1234560), 
10, 2)),
+            ],
+            Ok(Some("12,345.6")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %,.0g on decimal — precision 0 compact with grouping (scientific)
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 
10, 2)),
+            ],
+            Ok(Some("1e+06")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_grouping_separator_width_sign_float() -> Result<()> {
+        // %0,15.2f — zero-pad + grouping + width
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%0,15.2f".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Ok(Some("0001,234,567.89")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %+,15.2f — force-sign + grouping + width (space-padded)
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+,15.2f".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Ok(Some("  +1,234,567.89")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %-,15.2f — left-adjust + grouping + width
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%-,15.2f".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+            ],
+            Ok(Some("1,234,567.89   ")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_grouping_separator_width_sign_decimal() -> Result<()> {
+        // %0,15.2f — zero-pad + grouping + width on decimal
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%0,15.2f".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 
10, 2)),
+            ],
+            Ok(Some("0001,234,567.89")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        // %+,15.2f — force-sign + grouping + width on decimal
+        test_scalar_function!(
+            FormatStringFunc::new(),
+            vec![
+                
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+,15.2f".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789), 
10, 2)),
+            ],
+            Ok(Some("  +1,234,567.89")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        Ok(())
+    }

Review Comment:
   I think one test case that was explicitly called out earlier is still 
missing.
   
   The combination %(,10.2f (parentheses for negatives + grouping + width) was 
mentioned alongside %0,10.2f. The %0 case is covered now, but I could not find 
a test for the %( case.
   
   From a quick read, format_float seems to already handle 
negative_in_parentheses correctly, including placing commas in the integer 
part. So this is probably working fine, but it would be great to have a test to 
confirm the behavior.
   
   Something like:
   
   // %(,12.2f with -1234.5
   // Expect something like "(1,234.50)" with padding depending on width
   // Worth verifying against Java: String.format("%(,12.2f", -1234.5)
   
   Also worth noting that format_decimal currently ignores 
negative_in_parentheses and always uses a minus sign. That looks like 
pre-existing behavior, but adding the decimal version of this test would help 
document that gap more clearly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to