kosiew commented on code in PR #21509:
URL: https://github.com/apache/datafusion/pull/21509#discussion_r3062090902


##########
datafusion/functions/src/core/with_metadata.rs:
##########
@@ -0,0 +1,314 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::{Result, exec_err, internal_err};
+use datafusion_expr::{
+    ColumnarValue, Documentation, ReturnFieldArgs, ScalarFunctionArgs, 
ScalarUDFImpl,
+    Signature, Volatility,
+};
+use datafusion_macros::user_doc;
+
+#[user_doc(
+    doc_section(label = "Other Functions"),
+    description = "Attaches Arrow field metadata (key/value pairs) to the 
input expression. Keys and values must be non-empty constant strings. Existing 
metadata on the input field is preserved; new keys overwrite on collision. This 
is the inverse of `arrow_metadata`.",
+    syntax_example = "with_metadata(expression, key1, value1[, key2, value2, 
...])",
+    sql_example = r#"```sql
+> select arrow_metadata(with_metadata(column1, 'unit', 'ms'), 'unit') from 
(values (1));
++---------------------------------------------------------------+
+| arrow_metadata(with_metadata(column1,Utf8("unit"),Utf8("ms")),Utf8("unit")) |
++---------------------------------------------------------------+
+| ms                                                            |
++---------------------------------------------------------------+
+> select arrow_metadata(with_metadata(column1, 'unit', 'ms', 'source', 
'sensor')) from (values (1));
++--------------------------+
+| {source: sensor, unit: ms} |
++--------------------------+
+```"#,
+    argument(
+        name = "expression",
+        description = "The expression whose output Arrow field should be 
annotated. Values flow through unchanged."
+    ),
+    argument(
+        name = "key",
+        description = "Metadata key. Must be a non-empty constant string 
literal."
+    ),
+    argument(
+        name = "value",
+        description = "Metadata value. Must be a non-empty constant string 
literal."
+    )
+)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct WithMetadataFunc {
+    signature: Signature,
+}
+
+impl Default for WithMetadataFunc {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl WithMetadataFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::variadic_any(Volatility::Immutable),
+        }
+    }
+}
+
+impl ScalarUDFImpl for WithMetadataFunc {
+    fn name(&self) -> &str {
+        "with_metadata"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!(
+            "with_metadata: return_type called instead of 
return_field_from_args"
+        )
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> 
Result<FieldRef> {
+        // Require at least the value expression plus one (key, value) pair,
+        // and an odd total (1 + 2*N).
+        if args.arg_fields.len() < 3 {
+            return exec_err!(
+                "with_metadata requires the input expression plus at least one 
(key, value) pair (minimum 3 arguments), got {}",
+                args.arg_fields.len()
+            );
+        }
+        if args.arg_fields.len().is_multiple_of(2) {
+            return exec_err!(
+                "with_metadata requires an odd number of arguments (expression 
followed by key/value pairs), got {}",
+                args.arg_fields.len()
+            );
+        }
+
+        let input_field = &args.arg_fields[0];
+        let mut metadata = input_field.metadata().clone();
+
+        // Keys are at indices 1, 3, 5, ...; values at 2, 4, 6, ...
+        for pair_idx in 0..((args.scalar_arguments.len() - 1) / 2) {
+            let key_idx = 1 + pair_idx * 2;
+            let value_idx = key_idx + 1;
+
+            let key = args.scalar_arguments[key_idx]
+                .and_then(|sv| sv.try_as_str().flatten().filter(|s| 
!s.is_empty()))
+                .ok_or_else(|| {
+                    datafusion_common::DataFusionError::Execution(format!(
+                        "with_metadata requires argument {key_idx} (key) to be 
a non-empty constant string"
+                    ))
+                })?;
+
+            let value = args.scalar_arguments[value_idx]

Review Comment:
   I noticed the public contract says both keys and values must be non empty 
constant strings, but right now only the keys are enforced.
   
   Values can still be empty strings and get stored without any issue. Would 
you prefer to add the same non empty check for values here, or relax the docs 
and tests so the behavior is consistent?
   
   Either way works, it would just be nice for callers to have one clear rule.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to