This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b14e109672 GH-48366: [GLib][Ruby] Add DictionaryEncodeOptions (#48373)
b14e109672 is described below
commit b14e109672e47f6d9c7852df93b20c0016b84753
Author: Sten Larsson <[email protected]>
AuthorDate: Sun Dec 7 11:55:14 2025 +0100
GH-48366: [GLib][Ruby] Add DictionaryEncodeOptions (#48373)
### Rationale for this change
The `DictionaryEncodeOptions` class is not available in GLib/Ruby, and it
is used together with the `dictionary_encode` compute function.
### What changes are included in this PR?
This adds the `DictionaryEncodeOptions` class to GLib.
### Are these changes tested?
Yes, with Ruby unit tests.
### Are there any user-facing changes?
Yes, a new class.
* GitHub Issue: #48366
Authored-by: Sten Larsson <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/compute.cpp | 130 ++++++++++++++++++++++++++
c_glib/arrow-glib/compute.h | 34 +++++++
c_glib/arrow-glib/compute.hpp | 6 ++
c_glib/test/test-dictionary-encode-options.rb | 46 +++++++++
4 files changed, 216 insertions(+)
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 25eb50bab2..828e672d80 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -258,6 +258,9 @@ G_BEGIN_DECLS
* such as `cumulative_sum`, `cumulative_prod`, `cumulative_max`, and
* `cumulative_min`.
*
+ * #GArrowDictionaryEncodeOptions is a class to customize the
`dictionary_encode`
+ * function.
+ *
* There are many functions to compute data on an array.
*/
@@ -6662,6 +6665,108 @@ garrow_cumulative_options_new(void)
return GARROW_CUMULATIVE_OPTIONS(options);
}
+enum {
+ PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR = 1,
+};
+
+G_DEFINE_TYPE(GArrowDictionaryEncodeOptions,
+ garrow_dictionary_encode_options,
+ GARROW_TYPE_FUNCTION_OPTIONS)
+
+static void
+garrow_dictionary_encode_options_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto options =
+
garrow_dictionary_encode_options_get_raw(GARROW_DICTIONARY_ENCODE_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR:
+ options->null_encoding_behavior =
+
static_cast<arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior>(
+ g_value_get_enum(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_dictionary_encode_options_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto options =
+
garrow_dictionary_encode_options_get_raw(GARROW_DICTIONARY_ENCODE_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR:
+ g_value_set_enum(value,
+ static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
+ options->null_encoding_behavior));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_dictionary_encode_options_init(GArrowDictionaryEncodeOptions *object)
+{
+ auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+ priv->options = static_cast<arrow::compute::FunctionOptions *>(
+ new arrow::compute::DictionaryEncodeOptions());
+}
+
+static void
+garrow_dictionary_encode_options_class_init(GArrowDictionaryEncodeOptionsClass
*klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->set_property = garrow_dictionary_encode_options_set_property;
+ gobject_class->get_property = garrow_dictionary_encode_options_get_property;
+
+ arrow::compute::DictionaryEncodeOptions options;
+
+ GParamSpec *spec;
+ /**
+ * GArrowDictionaryEncodeOptions:null-encoding-behavior:
+ *
+ * How null values will be encoded.
+ *
+ * Since: 23.0.0
+ */
+ spec = g_param_spec_enum("null-encoding-behavior",
+ "Null encoding behavior",
+ "How null values will be encoded",
+
GARROW_TYPE_DICTIONARY_ENCODE_NULL_ENCODING_BEHAVIOR,
+
static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
+ options.null_encoding_behavior),
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class,
+
PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR,
+ spec);
+}
+
+/**
+ * garrow_dictionary_encode_options_new:
+ *
+ * Returns: A newly created #GArrowDictionaryEncodeOptions.
+ *
+ * Since: 23.0.0
+ */
+GArrowDictionaryEncodeOptions *
+garrow_dictionary_encode_options_new(void)
+{
+ auto options = g_object_new(GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS, NULL);
+ return GARROW_DICTIONARY_ENCODE_OPTIONS(options);
+}
+
G_END_DECLS
arrow::Result<arrow::FieldRef>
@@ -6803,6 +6908,12 @@ garrow_function_options_new_raw(const
arrow::compute::FunctionOptions *arrow_opt
static_cast<const arrow::compute::CumulativeOptions *>(arrow_options);
auto options = garrow_cumulative_options_new_raw(arrow_cumulative_options);
return GARROW_FUNCTION_OPTIONS(options);
+ } else if (arrow_type_name == "DictionaryEncodeOptions") {
+ const auto arrow_dictionary_encode_options =
+ static_cast<const arrow::compute::DictionaryEncodeOptions
*>(arrow_options);
+ auto options =
+
garrow_dictionary_encode_options_new_raw(arrow_dictionary_encode_options);
+ return GARROW_FUNCTION_OPTIONS(options);
} else {
auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
return GARROW_FUNCTION_OPTIONS(options);
@@ -7370,3 +7481,22 @@
garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options)
return static_cast<arrow::compute::CumulativeOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
}
+
+GArrowDictionaryEncodeOptions *
+garrow_dictionary_encode_options_new_raw(
+ const arrow::compute::DictionaryEncodeOptions *arrow_options)
+{
+ return GARROW_DICTIONARY_ENCODE_OPTIONS(
+ g_object_new(GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS,
+ "null-encoding-behavior",
+ static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
+ arrow_options->null_encoding_behavior),
+ NULL));
+}
+
+arrow::compute::DictionaryEncodeOptions *
+garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions
*options)
+{
+ return static_cast<arrow::compute::DictionaryEncodeOptions *>(
+ garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 92b5225798..14ca458ae7 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -1189,4 +1189,38 @@ GARROW_AVAILABLE_IN_23_0
GArrowCumulativeOptions *
garrow_cumulative_options_new(void);
+/**
+ * GArrowDictionaryEncodeNullEncodingBehavior:
+ * @GARROW_DICTIONARY_ENCODE_NULL_ENCODING_ENCODE: The null value will be
added to the
+ * dictionary with a proper index.
+ * @GARROW_DICTIONARY_ENCODE_NULL_ENCODING_MASK: The null value will be masked
in the
+ * indices array.
+ *
+ * They correspond to the values of
+ * `arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior`.
+ *
+ * Since: 23.0.0
+ */
+typedef enum {
+ GARROW_DICTIONARY_ENCODE_NULL_ENCODING_ENCODE,
+ GARROW_DICTIONARY_ENCODE_NULL_ENCODING_MASK,
+} GArrowDictionaryEncodeNullEncodingBehavior;
+
+#define GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS
\
+ (garrow_dictionary_encode_options_get_type())
+GARROW_AVAILABLE_IN_23_0
+G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryEncodeOptions,
+ garrow_dictionary_encode_options,
+ GARROW,
+ DICTIONARY_ENCODE_OPTIONS,
+ GArrowFunctionOptions)
+struct _GArrowDictionaryEncodeOptionsClass
+{
+ GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_23_0
+GArrowDictionaryEncodeOptions *
+garrow_dictionary_encode_options_new(void);
+
G_END_DECLS
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index ab5235b9cc..d1b91a79e6 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -186,3 +186,9 @@ GArrowCumulativeOptions *
garrow_cumulative_options_new_raw(const arrow::compute::CumulativeOptions
*arrow_options);
arrow::compute::CumulativeOptions *
garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options);
+
+GArrowDictionaryEncodeOptions *
+garrow_dictionary_encode_options_new_raw(
+ const arrow::compute::DictionaryEncodeOptions *arrow_options);
+arrow::compute::DictionaryEncodeOptions *
+garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions
*options);
diff --git a/c_glib/test/test-dictionary-encode-options.rb
b/c_glib/test/test-dictionary-encode-options.rb
new file mode 100644
index 0000000000..6ab6cdd2e4
--- /dev/null
+++ b/c_glib/test/test-dictionary-encode-options.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDictionaryEncodeOptions < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ @options = Arrow::DictionaryEncodeOptions.new
+ end
+
+ def test_null_encoding_behavior_property
+ assert_equal(Arrow::DictionaryEncodeNullEncodingBehavior::MASK,
@options.null_encoding_behavior)
+ @options.null_encoding_behavior = :encode
+ assert_equal(Arrow::DictionaryEncodeNullEncodingBehavior::ENCODE,
+ @options.null_encoding_behavior)
+ end
+
+ def test_dictionary_encode_function_with_encode
+ args = [
+ Arrow::ArrayDatum.new(build_string_array(["a", "b", nil, "a", "b"])),
+ ]
+ @options.null_encoding_behavior = :encode
+ dictionary_encode_function = Arrow::Function.find("dictionary_encode")
+ result = dictionary_encode_function.execute(args, @options).value
+ assert_equal(Arrow::DictionaryDataType.new(Arrow::Int32DataType.new,
+ Arrow::StringDataType.new,
+ false),
+ result.value_data_type)
+ assert_equal(build_int32_array([0, 1, 2, 0, 1]), result.indices)
+ end
+end
+