This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 0b8fb56cba GH-48508: [GLib][Ruby] Add TDigestOptions (#48529)
0b8fb56cba is described below

commit 0b8fb56cba732dd9ef044aab67163282b4165de4
Author: Sten Larsson <[email protected]>
AuthorDate: Thu Jan 1 07:42:08 2026 +0100

    GH-48508: [GLib][Ruby] Add TDigestOptions (#48529)
    
    ### Rationale for this change
    
    The `TDigestOptions` class is not available in GLib/Ruby, and it is used 
together with the `tdigest` compute function.
    
    ### What changes are included in this PR?
    
    This adds the `TDigestOptions` class to GLib.
    
    ### Are these changes tested?
    
    Yes, with Ruby unit tests.
    
    ### Are there any user-facing changes?
    
    Yes, a new class.
    
    * GitHub Issue: #48508
    
    Authored-by: Sten Larsson <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/arrow-glib/compute.cpp       | 252 +++++++++++++++++++++++++++++++++++-
 c_glib/arrow-glib/compute.h         |  25 ++++
 c_glib/arrow-glib/compute.hpp       |   5 +
 c_glib/test/test-tdigest-options.rb |  71 ++++++++++
 4 files changed, 350 insertions(+), 3 deletions(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index db35b9a89e..7e786c870c 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -259,8 +259,8 @@ G_BEGIN_DECLS
  * such as `cumulative_sum`, `cumulative_prod`, `cumulative_max`, and
  * `cumulative_min`.
  *
- * #GArrowDictionaryEncodeOptions is a class to customize the 
`dictionary_encode`
- * function.
+ * #GArrowDictionaryEncodeOptions is a class to customize the
+ * `dictionary_encode` function.
  *
  * #GArrowElementWiseAggregateOptions is a class to customize element-wise
  * aggregate functions such as `min_element_wise` and `max_element_wise`.
@@ -299,7 +299,6 @@ G_BEGIN_DECLS
  * #GArrowReplaceSliceOptions is a class to customize the
  * `utf8_replace_slice` and `binary_replace_slice` functions.
  *
- *
  * #GArrowPartitionNthOptions is a class to customize the
  * `partition_nth_indices` function.
  *
@@ -327,6 +326,9 @@ G_BEGIN_DECLS
  * #GArrowSliceOptions is a class to customize the `utf8_slice_codeunits` and
  * `binary_slice` functions.
  *
+ * #GArrowTDigestOptions is a class to customize the `tdigest` and
+ * `hash_tdigest` functions.
+ *
  * There are many functions to compute data on an array.
  */
 
@@ -9781,6 +9783,219 @@ garrow_slice_options_new(void)
   return GARROW_SLICE_OPTIONS(g_object_new(GARROW_TYPE_SLICE_OPTIONS, 
nullptr));
 }
 
+enum {
+  PROP_TDIGEST_OPTIONS_DELTA = 1,
+  PROP_TDIGEST_OPTIONS_BUFFER_SIZE,
+  PROP_TDIGEST_OPTIONS_SKIP_NULLS,
+  PROP_TDIGEST_OPTIONS_MIN_COUNT,
+};
+
+G_DEFINE_TYPE(GArrowTDigestOptions, garrow_tdigest_options, 
GARROW_TYPE_FUNCTION_OPTIONS)
+
+static void
+garrow_tdigest_options_set_property(GObject *object,
+                                    guint prop_id,
+                                    const GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto options = 
garrow_tdigest_options_get_raw(GARROW_TDIGEST_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_TDIGEST_OPTIONS_DELTA:
+    options->delta = g_value_get_uint(value);
+    break;
+  case PROP_TDIGEST_OPTIONS_BUFFER_SIZE:
+    options->buffer_size = g_value_get_uint(value);
+    break;
+  case PROP_TDIGEST_OPTIONS_SKIP_NULLS:
+    options->skip_nulls = g_value_get_boolean(value);
+    break;
+  case PROP_TDIGEST_OPTIONS_MIN_COUNT:
+    options->min_count = g_value_get_uint(value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_tdigest_options_get_property(GObject *object,
+                                    guint prop_id,
+                                    GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto options = 
garrow_tdigest_options_get_raw(GARROW_TDIGEST_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_TDIGEST_OPTIONS_DELTA:
+    g_value_set_uint(value, options->delta);
+    break;
+  case PROP_TDIGEST_OPTIONS_BUFFER_SIZE:
+    g_value_set_uint(value, options->buffer_size);
+    break;
+  case PROP_TDIGEST_OPTIONS_SKIP_NULLS:
+    g_value_set_boolean(value, options->skip_nulls);
+    break;
+  case PROP_TDIGEST_OPTIONS_MIN_COUNT:
+    g_value_set_uint(value, options->min_count);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_tdigest_options_init(GArrowTDigestOptions *object)
+{
+  auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+  arrow_priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new 
arrow::compute::TDigestOptions());
+}
+
+static void
+garrow_tdigest_options_class_init(GArrowTDigestOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = garrow_tdigest_options_set_property;
+  gobject_class->get_property = garrow_tdigest_options_get_property;
+
+  auto options = arrow::compute::TDigestOptions::Defaults();
+
+  GParamSpec *spec;
+  /**
+   * GArrowTDigestOptions:delta:
+   *
+   * Compression parameter, default 100.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_uint("delta",
+                           "Delta",
+                           "Compression parameter, default 100",
+                           0,
+                           G_MAXUINT32,
+                           options.delta,
+                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_TDIGEST_OPTIONS_DELTA, 
spec);
+
+  /**
+   * GArrowTDigestOptions:buffer-size:
+   *
+   * Input buffer size, default 500.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_uint("buffer-size",
+                           "Buffer size",
+                           "Input buffer size, default 500",
+                           0,
+                           G_MAXUINT32,
+                           options.buffer_size,
+                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, 
PROP_TDIGEST_OPTIONS_BUFFER_SIZE, spec);
+
+  /**
+   * GArrowTDigestOptions:skip-nulls:
+   *
+   * If true (the default), null values are ignored. Otherwise, if any
+   * value is null, emit null.
+   *
+   * Since: 23.0.0
+   */
+  spec = g_param_spec_boolean("skip-nulls",
+                              "Skip nulls",
+                              "If true (the default), null values are ignored. 
"
+                              "Otherwise, if any value is null, emit null.",
+                              options.skip_nulls,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, 
PROP_TDIGEST_OPTIONS_SKIP_NULLS, spec);
+
+  /**
+   * GArrowTDigestOptions:min-count:
+   *
+   * If less than this many non-null values are observed, emit null.
+   *
+   * Since: 23.0.0
+   */
+  spec =
+    g_param_spec_uint("min-count",
+                      "Min count",
+                      "If less than this many non-null values are observed, 
emit null",
+                      0,
+                      G_MAXUINT32,
+                      options.min_count,
+                      static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, 
PROP_TDIGEST_OPTIONS_MIN_COUNT, spec);
+}
+
+/**
+ * garrow_tdigest_options_new:
+ *
+ * Returns: A newly created #GArrowTDigestOptions.
+ *
+ * Since: 23.0.0
+ */
+GArrowTDigestOptions *
+garrow_tdigest_options_new(void)
+{
+  return GARROW_TDIGEST_OPTIONS(g_object_new(GARROW_TYPE_TDIGEST_OPTIONS, 
nullptr));
+}
+
+/**
+ * garrow_tdigest_options_get_qs:
+ * @options: A #GArrowTDigestOptions.
+ * @n: (out): The number of `q`s.
+ *
+ * Returns: (array length=n) (transfer none): The `q`s to be used.
+ *
+ * Since: 23.0.0
+ */
+const gdouble *
+garrow_tdigest_options_get_qs(GArrowTDigestOptions *options, gsize *n)
+{
+  auto priv = garrow_tdigest_options_get_raw(options);
+  if (n) {
+    *n = priv->q.size();
+  }
+  return priv->q.data();
+}
+
+/**
+ * garrow_tdigest_options_set_q:
+ * @options: A #GArrowTDigestOptions.
+ * @q: A `q` to be used.
+ *
+ * Since: 23.0.0
+ */
+void
+garrow_tdigest_options_set_q(GArrowTDigestOptions *options, gdouble q)
+{
+  auto priv = garrow_tdigest_options_get_raw(options);
+  priv->q.clear();
+  priv->q.push_back(q);
+}
+
+/**
+ * garrow_tdigest_options_set_qs:
+ * @options: A #GArrowTDigestOptions.
+ * @qs: (array length=n): `q`s to be used.
+ * @n: The number of @qs.
+ *
+ * Since: 23.0.0
+ */
+void
+garrow_tdigest_options_set_qs(GArrowTDigestOptions *options, const gdouble 
*qs, gsize n)
+{
+  auto priv = garrow_tdigest_options_get_raw(options);
+  priv->q.clear();
+  for (gsize i = 0; i < n; i++) {
+    priv->q.push_back(qs[i]);
+  }
+}
+
 G_END_DECLS
 
 arrow::Result<arrow::FieldRef>
@@ -10036,6 +10251,11 @@ garrow_function_options_new_raw(const 
arrow::compute::FunctionOptions *arrow_opt
       static_cast<const arrow::compute::SliceOptions *>(arrow_options);
     auto options = garrow_slice_options_new_raw(arrow_slice_options);
     return GARROW_FUNCTION_OPTIONS(options);
+  } else if (arrow_type_name == "TDigestOptions") {
+    const auto arrow_tdigest_options =
+      static_cast<const arrow::compute::TDigestOptions *>(arrow_options);
+    auto options = garrow_tdigest_options_new_raw(arrow_tdigest_options);
+    return GARROW_FUNCTION_OPTIONS(options);
   } else {
     auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
     return GARROW_FUNCTION_OPTIONS(options);
@@ -11071,3 +11291,29 @@ garrow_slice_options_get_raw(GArrowSliceOptions 
*options)
   return static_cast<arrow::compute::SliceOptions *>(
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
+
+GArrowTDigestOptions *
+garrow_tdigest_options_new_raw(const arrow::compute::TDigestOptions 
*arrow_options)
+{
+  auto options = 
GARROW_TDIGEST_OPTIONS(g_object_new(GARROW_TYPE_TDIGEST_OPTIONS,
+                                                     "delta",
+                                                     arrow_options->delta,
+                                                     "buffer-size",
+                                                     
arrow_options->buffer_size,
+                                                     "skip-nulls",
+                                                     arrow_options->skip_nulls,
+                                                     "min-count",
+                                                     arrow_options->min_count,
+                                                     nullptr));
+  garrow_tdigest_options_set_qs(options,
+                                arrow_options->q.data(),
+                                arrow_options->q.size());
+  return options;
+}
+
+arrow::compute::TDigestOptions *
+garrow_tdigest_options_get_raw(GArrowTDigestOptions *options)
+{
+  return static_cast<arrow::compute::TDigestOptions *>(
+    garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 37c2fee644..3a2be582a6 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -1704,4 +1704,29 @@ GARROW_AVAILABLE_IN_23_0
 GArrowSliceOptions *
 garrow_slice_options_new(void);
 
+#define GARROW_TYPE_TDIGEST_OPTIONS (garrow_tdigest_options_get_type())
+GARROW_AVAILABLE_IN_23_0
+G_DECLARE_DERIVABLE_TYPE(GArrowTDigestOptions,
+                         garrow_tdigest_options,
+                         GARROW,
+                         TDIGEST_OPTIONS,
+                         GArrowFunctionOptions)
+struct _GArrowTDigestOptionsClass
+{
+  GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_23_0
+GArrowTDigestOptions *
+garrow_tdigest_options_new(void);
+GARROW_AVAILABLE_IN_23_0
+const gdouble *
+garrow_tdigest_options_get_qs(GArrowTDigestOptions *options, gsize *n);
+GARROW_AVAILABLE_IN_23_0
+void
+garrow_tdigest_options_set_q(GArrowTDigestOptions *options, gdouble q);
+GARROW_AVAILABLE_IN_23_0
+void
+garrow_tdigest_options_set_qs(GArrowTDigestOptions *options, const gdouble 
*qs, gsize n);
+
 G_END_DECLS
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index 1b32022092..25924d408f 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -313,3 +313,8 @@ GArrowSliceOptions *
 garrow_slice_options_new_raw(const arrow::compute::SliceOptions 
*arrow_options);
 arrow::compute::SliceOptions *
 garrow_slice_options_get_raw(GArrowSliceOptions *options);
+
+GArrowTDigestOptions *
+garrow_tdigest_options_new_raw(const arrow::compute::TDigestOptions 
*arrow_options);
+arrow::compute::TDigestOptions *
+garrow_tdigest_options_get_raw(GArrowTDigestOptions *options);
diff --git a/c_glib/test/test-tdigest-options.rb 
b/c_glib/test/test-tdigest-options.rb
new file mode 100644
index 0000000000..117e7ec056
--- /dev/null
+++ b/c_glib/test/test-tdigest-options.rb
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTDigestOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @options = Arrow::TDigestOptions.new
+  end
+
+  def test_delta
+    assert_equal(100, @options.delta)
+    @options.delta = 200
+    assert_equal(200, @options.delta)
+  end
+
+  def test_buffer_size
+    assert_equal(500, @options.buffer_size)
+    @options.buffer_size = 1000
+    assert_equal(1000, @options.buffer_size)
+  end
+
+  def test_skip_nulls
+    assert do
+      @options.skip_nulls?
+    end
+    @options.skip_nulls = false
+    assert do
+      not @options.skip_nulls?
+    end
+  end
+
+  def test_min_count
+    assert_equal(0, @options.min_count)
+    @options.min_count = 1
+    assert_equal(1, @options.min_count)
+  end
+
+  def test_q
+    assert_equal([0.5], @options.qs)
+    @options.qs = [0.1, 0.2, 0.9]
+    assert_equal([0.1, 0.2, 0.9], @options.qs)
+    @options.q = 0.7
+    assert_equal([0.7], @options.qs)
+  end
+
+  def test_tdigest_function
+    args = [
+      Arrow::ArrayDatum.new(build_double_array([1.0, 2.0, 3.0, 4.0, 5.0])),
+    ]
+    @options.q = 0.5
+    @options.delta = 200
+    tdigest_function = Arrow::Function.find("tdigest")
+    result = tdigest_function.execute(args, @options).value
+    assert_equal(build_double_array([3.0]), result)
+  end
+end

Reply via email to