Druva-D commented on code in PR #20268:
URL: https://github.com/apache/datafusion/pull/20268#discussion_r3166900599
##########
datafusion/spark/src/function/string/format_string.rs:
##########
@@ -2385,4 +2437,295 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_insert_thousands_separator() {
+ assert_eq!(insert_thousands_separator("1234567.89"), "1,234,567.89");
+ assert_eq!(insert_thousands_separator("123.45"), "123.45");
+ assert_eq!(insert_thousands_separator("1234"), "1,234");
+ assert_eq!(insert_thousands_separator("12"), "12");
+ assert_eq!(insert_thousands_separator("0.5"), "0.5");
+ assert_eq!(
+ insert_thousands_separator("1234567890.1234"),
+ "1,234,567,890.1234"
+ );
+ assert_eq!(insert_thousands_separator("1000"), "1,000");
+ assert_eq!(insert_thousands_separator("100"), "100");
+ }
+
+ #[test]
+ fn test_grouping_separator_float() -> Result<()> {
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Ok(Some("1,234,567.89")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_grouping_separator_decimal() -> Result<()> {
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.2f".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789),
10, 2)),
+ ],
+ Ok(Some("1,234,567.89")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_grouping_separator_scientific_float() -> Result<()> {
+ // %,e — Java/Spark reject grouping separator with scientific notation
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Err(DataFusionError::Execution(
+ "Grouping separator ',' flag is not compatible with scientific
notation conversion 'e'".to_string(),
+ )),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %,E — uppercase scientific also rejected
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,E".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Err(DataFusionError::Execution(
+ "Grouping separator ',' flag is not compatible with scientific
notation conversion 'E'".to_string(),
+ )),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %,.0e — precision 0 scientific with grouping also rejected
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Err(DataFusionError::Execution(
+ "Grouping separator ',' flag is not compatible with scientific
notation conversion 'e'".to_string(),
+ )),
+ &str,
+ Utf8,
+ StringArray
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_grouping_separator_compact_float() -> Result<()> {
+ // %,g with large number — triggers scientific, no commas
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Ok(Some("1.23457e+06")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %,g with small number — triggers fixed-point, commas in integer part
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(12345.6))),
+ ],
+ Ok(Some("12,345.6")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %,.0g — precision 0 compact with grouping (large number, scientific)
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Ok(Some("1e+06")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %,G — uppercase compact
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,G".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Ok(Some("1.23457E+06")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_grouping_separator_scientific_decimal() -> Result<()> {
+ // %,e on decimal — Java/Spark reject grouping separator with
scientific notation
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,e".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789),
10, 2)),
+ ],
+ Err(DataFusionError::Execution(
+ "Grouping separator ',' flag is not compatible with scientific
notation conversion 'e'".to_string(),
+ )),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %,.0e on decimal — also rejected
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0e".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789),
10, 2)),
+ ],
+ Err(DataFusionError::Execution(
+ "Grouping separator ',' flag is not compatible with scientific
notation conversion 'e'".to_string(),
+ )),
+ &str,
+ Utf8,
+ StringArray
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_grouping_separator_compact_decimal() -> Result<()> {
+ // %,g on decimal — large number triggers scientific, no commas
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789),
10, 2)),
+ ],
+ Ok(Some("1.23457e+06")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %,g on decimal — small number triggers fixed-point, commas expected
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,g".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Decimal128(Some(1234560),
10, 2)),
+ ],
+ Ok(Some("12,345.6")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %,.0g on decimal — precision 0 compact with grouping (scientific)
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%,.0g".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789),
10, 2)),
+ ],
+ Ok(Some("1e+06")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_grouping_separator_width_sign_float() -> Result<()> {
+ // %0,15.2f — zero-pad + grouping + width
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%0,15.2f".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Ok(Some("0001,234,567.89")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %+,15.2f — force-sign + grouping + width (space-padded)
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+,15.2f".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Ok(Some(" +1,234,567.89")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %-,15.2f — left-adjust + grouping + width
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%-,15.2f".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Float64(Some(1234567.89))),
+ ],
+ Ok(Some("1,234,567.89 ")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_grouping_separator_width_sign_decimal() -> Result<()> {
+ // %0,15.2f — zero-pad + grouping + width on decimal
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%0,15.2f".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789),
10, 2)),
+ ],
+ Ok(Some("0001,234,567.89")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ // %+,15.2f — force-sign + grouping + width on decimal
+ test_scalar_function!(
+ FormatStringFunc::new(),
+ vec![
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+,15.2f".to_string()))),
+ ColumnarValue::Scalar(ScalarValue::Decimal128(Some(123456789),
10, 2)),
+ ],
+ Ok(Some(" +1,234,567.89")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ Ok(())
+ }
Review Comment:
Yep, looks like its not being handled in `format_decimal`, added a test and
a TODO to document, will implement it if I get time.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]