This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b14e109672 GH-48366: [GLib][Ruby] Add DictionaryEncodeOptions (#48373)
b14e109672 is described below

commit b14e109672e47f6d9c7852df93b20c0016b84753
Author: Sten Larsson <[email protected]>
AuthorDate: Sun Dec 7 11:55:14 2025 +0100

    GH-48366: [GLib][Ruby] Add DictionaryEncodeOptions (#48373)
    
    ### Rationale for this change
    
    The `DictionaryEncodeOptions` class is not available in GLib/Ruby, and it 
is used together with the `dictionary_encode` compute function.
    
    ### What changes are included in this PR?
    
    This adds the `DictionaryEncodeOptions` class to GLib.
    
    ### Are these changes tested?
    
    Yes, with Ruby unit tests.
    
    ### Are there any user-facing changes?
    
    Yes, a new class.
    * GitHub Issue: #48366
    
    Authored-by: Sten Larsson <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/arrow-glib/compute.cpp                 | 130 ++++++++++++++++++++++++++
 c_glib/arrow-glib/compute.h                   |  34 +++++++
 c_glib/arrow-glib/compute.hpp                 |   6 ++
 c_glib/test/test-dictionary-encode-options.rb |  46 +++++++++
 4 files changed, 216 insertions(+)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 25eb50bab2..828e672d80 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -258,6 +258,9 @@ G_BEGIN_DECLS
  * such as `cumulative_sum`, `cumulative_prod`, `cumulative_max`, and
  * `cumulative_min`.
  *
+ * #GArrowDictionaryEncodeOptions is a class to customize the 
`dictionary_encode`
+ * function.
+ *
  * There are many functions to compute data on an array.
  */
 
@@ -6662,6 +6665,108 @@ garrow_cumulative_options_new(void)
   return GARROW_CUMULATIVE_OPTIONS(options);
 }
 
+enum {
+  PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR = 1,
+};
+
+G_DEFINE_TYPE(GArrowDictionaryEncodeOptions,
+              garrow_dictionary_encode_options,
+              GARROW_TYPE_FUNCTION_OPTIONS)
+
+static void
+garrow_dictionary_encode_options_set_property(GObject *object,
+                                              guint prop_id,
+                                              const GValue *value,
+                                              GParamSpec *pspec)
+{
+  auto options =
+    
garrow_dictionary_encode_options_get_raw(GARROW_DICTIONARY_ENCODE_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR:
+    options->null_encoding_behavior =
+      
static_cast<arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior>(
+        g_value_get_enum(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_dictionary_encode_options_get_property(GObject *object,
+                                              guint prop_id,
+                                              GValue *value,
+                                              GParamSpec *pspec)
+{
+  auto options =
+    
garrow_dictionary_encode_options_get_raw(GARROW_DICTIONARY_ENCODE_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR:
+    g_value_set_enum(value,
+                     static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
+                       options->null_encoding_behavior));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_dictionary_encode_options_init(GArrowDictionaryEncodeOptions *object)
+{
+  auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+  priv->options = static_cast<arrow::compute::FunctionOptions *>(
+    new arrow::compute::DictionaryEncodeOptions());
+}
+
+static void
+garrow_dictionary_encode_options_class_init(GArrowDictionaryEncodeOptionsClass 
*klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = garrow_dictionary_encode_options_set_property;
+  gobject_class->get_property = garrow_dictionary_encode_options_get_property;
+
+  arrow::compute::DictionaryEncodeOptions options;
+
+  GParamSpec *spec;
+  /**
+   * GArrowDictionaryEncodeOptions:null-encoding-behavior:
+   *
+   * How null values will be encoded.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_enum("null-encoding-behavior",
+                           "Null encoding behavior",
+                           "How null values will be encoded",
+                           
GARROW_TYPE_DICTIONARY_ENCODE_NULL_ENCODING_BEHAVIOR,
+                           
static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
+                             options.null_encoding_behavior),
+                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  
PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR,
+                                  spec);
+}
+
+/**
+ * garrow_dictionary_encode_options_new:
+ *
+ * Returns: A newly created #GArrowDictionaryEncodeOptions.
+ *
+ * Since: 23.0.0
+ */
+GArrowDictionaryEncodeOptions *
+garrow_dictionary_encode_options_new(void)
+{
+  auto options = g_object_new(GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS, NULL);
+  return GARROW_DICTIONARY_ENCODE_OPTIONS(options);
+}
+
 G_END_DECLS
 
 arrow::Result<arrow::FieldRef>
@@ -6803,6 +6908,12 @@ garrow_function_options_new_raw(const 
arrow::compute::FunctionOptions *arrow_opt
       static_cast<const arrow::compute::CumulativeOptions *>(arrow_options);
     auto options = garrow_cumulative_options_new_raw(arrow_cumulative_options);
     return GARROW_FUNCTION_OPTIONS(options);
+  } else if (arrow_type_name == "DictionaryEncodeOptions") {
+    const auto arrow_dictionary_encode_options =
+      static_cast<const arrow::compute::DictionaryEncodeOptions 
*>(arrow_options);
+    auto options =
+      
garrow_dictionary_encode_options_new_raw(arrow_dictionary_encode_options);
+    return GARROW_FUNCTION_OPTIONS(options);
   } else {
     auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
     return GARROW_FUNCTION_OPTIONS(options);
@@ -7370,3 +7481,22 @@ 
garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options)
   return static_cast<arrow::compute::CumulativeOptions *>(
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
+
+GArrowDictionaryEncodeOptions *
+garrow_dictionary_encode_options_new_raw(
+  const arrow::compute::DictionaryEncodeOptions *arrow_options)
+{
+  return GARROW_DICTIONARY_ENCODE_OPTIONS(
+    g_object_new(GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS,
+                 "null-encoding-behavior",
+                 static_cast<GArrowDictionaryEncodeNullEncodingBehavior>(
+                   arrow_options->null_encoding_behavior),
+                 NULL));
+}
+
+arrow::compute::DictionaryEncodeOptions *
+garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions 
*options)
+{
+  return static_cast<arrow::compute::DictionaryEncodeOptions *>(
+    garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 92b5225798..14ca458ae7 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -1189,4 +1189,38 @@ GARROW_AVAILABLE_IN_23_0
 GArrowCumulativeOptions *
 garrow_cumulative_options_new(void);
 
+/**
+ * GArrowDictionaryEncodeNullEncodingBehavior:
+ * @GARROW_DICTIONARY_ENCODE_NULL_ENCODING_ENCODE: The null value will be 
added to the
+ * dictionary with a proper index.
+ * @GARROW_DICTIONARY_ENCODE_NULL_ENCODING_MASK: The null value will be masked 
in the
+ * indices array.
+ *
+ * They correspond to the values of
+ * `arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior`.
+ *
+ * Since: 23.0.0
+ */
+typedef enum {
+  GARROW_DICTIONARY_ENCODE_NULL_ENCODING_ENCODE,
+  GARROW_DICTIONARY_ENCODE_NULL_ENCODING_MASK,
+} GArrowDictionaryEncodeNullEncodingBehavior;
+
+#define GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS                                  
          \
+  (garrow_dictionary_encode_options_get_type())
+GARROW_AVAILABLE_IN_23_0
+G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryEncodeOptions,
+                         garrow_dictionary_encode_options,
+                         GARROW,
+                         DICTIONARY_ENCODE_OPTIONS,
+                         GArrowFunctionOptions)
+struct _GArrowDictionaryEncodeOptionsClass
+{
+  GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_23_0
+GArrowDictionaryEncodeOptions *
+garrow_dictionary_encode_options_new(void);
+
 G_END_DECLS
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index ab5235b9cc..d1b91a79e6 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -186,3 +186,9 @@ GArrowCumulativeOptions *
 garrow_cumulative_options_new_raw(const arrow::compute::CumulativeOptions 
*arrow_options);
 arrow::compute::CumulativeOptions *
 garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options);
+
+GArrowDictionaryEncodeOptions *
+garrow_dictionary_encode_options_new_raw(
+  const arrow::compute::DictionaryEncodeOptions *arrow_options);
+arrow::compute::DictionaryEncodeOptions *
+garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions 
*options);
diff --git a/c_glib/test/test-dictionary-encode-options.rb 
b/c_glib/test/test-dictionary-encode-options.rb
new file mode 100644
index 0000000000..6ab6cdd2e4
--- /dev/null
+++ b/c_glib/test/test-dictionary-encode-options.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDictionaryEncodeOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @options = Arrow::DictionaryEncodeOptions.new
+  end
+
+  def test_null_encoding_behavior_property
+    assert_equal(Arrow::DictionaryEncodeNullEncodingBehavior::MASK, 
@options.null_encoding_behavior)
+    @options.null_encoding_behavior = :encode
+    assert_equal(Arrow::DictionaryEncodeNullEncodingBehavior::ENCODE, 
+                 @options.null_encoding_behavior)
+  end
+
+  def test_dictionary_encode_function_with_encode
+    args = [
+      Arrow::ArrayDatum.new(build_string_array(["a", "b", nil, "a", "b"])),
+    ]
+    @options.null_encoding_behavior = :encode
+    dictionary_encode_function = Arrow::Function.find("dictionary_encode")
+    result = dictionary_encode_function.execute(args, @options).value
+    assert_equal(Arrow::DictionaryDataType.new(Arrow::Int32DataType.new,
+                                               Arrow::StringDataType.new,
+                                               false),
+                 result.value_data_type)
+    assert_equal(build_int32_array([0, 1, 2, 0, 1]), result.indices)
+  end
+end
+

Reply via email to