Jefffrey commented on code in PR #21115:
URL: https://github.com/apache/datafusion/pull/21115#discussion_r3092876020
##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -198,6 +202,52 @@ fn extract_single_field(base: ColumnarValue, name:
ScalarValue) -> Result<Column
let string_value = name.try_as_str().flatten().map(|s| s.to_string());
match (array.data_type(), name, string_value) {
+ // Dictionary-encoded struct: extract the field from the dictionary's
+ // values (the deduplicated struct array) and rebuild a dictionary with
+ // the same keys. This preserves dictionary encoding without expanding.
+ (DataType::Dictionary(key_type, value_type), _, Some(field_name))
+ if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+ {
+ // Downcast to DictionaryArray to access keys and values without
+ // materializing the dictionary.
+ macro_rules! extract_dict_field {
+ ($key_ty:ty) => {{
+ let dict = array
+ .as_any()
+ .downcast_ref::<DictionaryArray<$key_ty>>()
+ .ok_or_else(|| {
+ internal_datafusion_err!(
+ "Failed to downcast dictionary with key type
{key_type}"
+ )
+ })?;
+ let values_struct = as_struct_array(dict.values())?;
+ let field_col =
+
values_struct.column_by_name(&field_name).ok_or_else(|| {
+ exec_datafusion_err!(
+ "Field {field_name} not found in dictionary
struct"
+ )
+ })?;
+ // Rebuild dictionary: same keys, extracted field as
values.
+ let new_dict = DictionaryArray::<$key_ty>::try_new(
+ dict.keys().clone(),
+ Arc::clone(field_col),
+ )?;
+ Ok(ColumnarValue::Array(Arc::new(new_dict)))
+ }};
+ }
+
+ match key_type.as_ref() {
+ DataType::Int8 => extract_dict_field!(Int8Type),
+ DataType::Int16 => extract_dict_field!(Int16Type),
+ DataType::Int32 => extract_dict_field!(Int32Type),
+ DataType::Int64 => extract_dict_field!(Int64Type),
+ DataType::UInt8 => extract_dict_field!(UInt8Type),
+ DataType::UInt16 => extract_dict_field!(UInt16Type),
+ DataType::UInt32 => extract_dict_field!(UInt32Type),
+ DataType::UInt64 => extract_dict_field!(UInt64Type),
+ other => exec_err!("Unsupported dictionary key type: {other}"),
+ }
+ }
Review Comment:
```suggestion
let dict = array.as_any_dictionary();
let values_struct = dict.values().as_struct();
let field_col =
values_struct.column_by_name(&field_name).ok_or_else(|| {
exec_datafusion_err!(
"Field {field_name} not found in dictionary struct"
)
})?;
Ok(ColumnarValue::Array(
dict.with_values(Arc::clone(field_col)),
))
```
Utilizing
[`AnyDictionaryArray`](https://docs.rs/arrow/latest/arrow/array/trait.AnyDictionaryArray.html)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]