This is an automated email from the ASF dual-hosted git repository.

sgilmore pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new aabd3db3ca GH-38422: [MATLAB] Add `NumNulls` property to 
`arrow.array.Array` class (#47116)
aabd3db3ca is described below

commit aabd3db3cab180ed55e61df0cc4ee8714f6fd87a
Author: Sarah Gilmore <[email protected]>
AuthorDate: Wed Jul 16 13:01:15 2025 -0400

    GH-38422: [MATLAB] Add `NumNulls` property to `arrow.array.Array` class 
(#47116)
    
    ### Rationale for this change
    
    It would be nice if there was a `NumNulls` property on the 
`arrow.array.Array` base class. Currently, the only way to figure out the 
number of nulls is count the number of `false` values in the `Valid` array:
    
    ```matlab
    >> a = arrow.array([1 2 NaN 4 5 6 NaN 8 9 10 NaN]);
    >> invalidValues = ~a.Valid;
    >> numNulls = nnz(invalidValues)
    
    numNulls =
    
         3
    ```
    
    It would be nice if `NumNulls` was already a property on the array class. 
As @ kou mentioned, we can use the `arrow::Array::null_count()` to get the 
number of nulls.
    
    ### What changes are included in this PR?
    
    Added `NumNulls` as a property of the `arrow.array.Array` abstract class. 
`NumNulls` is a scalar `int64` value that returns the number of null elements 
in the array.
    
    **Example Usage**
    ```matlab
    >> a  = arrow.array([1 2 NaN 3 4 NaN 5 6 NaN])
    
    a =
    
      Float64Array with 9 elements and 3 null values:
    
        1 | 2 | null | ... | 5 | 6 | null
    
    >> a.NumNulls
    
    ans =
    
      int64
    
       3
    ```
    
    ### Are these changes tested?
    
    Yes. Added test cases verifying the `NumNulls` property to these MATLAB 
test classes: `hNumeric`, `tBooleanArray`, `tTimestampArray`, `tTime32Array`, 
`tTime64Array`, `tDate32Array`, `tDate64Array`, `tListArray`, `tStringArray`, 
and `tStructArray`.
    
    ### Are there any user-facing changes?
    
    Yes.  Users can now use the `NumNulls` property to query the number of null 
elements in an array.
    
    ### Future Changes
    
    1. Add `NumNulls` as a property of `arrow.array.ChunkedArray`.
    
    * GitHub Issue: #38422
    
    Authored-by: Sarah Gilmore <[email protected]>
    Signed-off-by: Sarah Gilmore <[email protected]>
---
 matlab/src/cpp/arrow/matlab/array/proxy/array.cc |  7 +++++++
 matlab/src/cpp/arrow/matlab/array/proxy/array.h  |  2 ++
 matlab/src/matlab/+arrow/+array/Array.m          |  5 +++++
 matlab/test/arrow/array/hNumericArray.m          | 23 +++++++++++++++++++++++
 matlab/test/arrow/array/tBooleanArray.m          | 22 ++++++++++++++++++++++
 matlab/test/arrow/array/tDate32Array.m           | 22 ++++++++++++++++++++++
 matlab/test/arrow/array/tDate64Array.m           | 22 ++++++++++++++++++++++
 matlab/test/arrow/array/tListArray.m             |  8 ++++++++
 matlab/test/arrow/array/tStringArray.m           | 22 ++++++++++++++++++++++
 matlab/test/arrow/array/tStructArray.m           | 20 ++++++++++++++++++++
 matlab/test/arrow/array/tTime32Array.m           | 22 ++++++++++++++++++++++
 matlab/test/arrow/array/tTime64Array.m           | 23 +++++++++++++++++++++++
 matlab/test/arrow/array/tTimestampArray.m        | 22 ++++++++++++++++++++++
 13 files changed, 220 insertions(+)

diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc 
b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
index 753b15fe72..ad88949349 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
@@ -37,6 +37,7 @@ Array::Array(std::shared_ptr<arrow::Array> array) : 
array{std::move(array)} {
   // Register Proxy methods.
   REGISTER_METHOD(Array, toString);
   REGISTER_METHOD(Array, getNumElements);
+  REGISTER_METHOD(Array, getNumNulls);
   REGISTER_METHOD(Array, getValid);
   REGISTER_METHOD(Array, getType);
   REGISTER_METHOD(Array, isEqual);
@@ -89,6 +90,12 @@ void 
Array::getNumElements(libmexclass::proxy::method::Context& context) {
   context.outputs[0] = length_mda;
 }
 
+void Array::getNumNulls(libmexclass::proxy::method::Context& context) {
+  ::matlab::data::ArrayFactory factory;
+  auto num_nulls_mda = factory.createScalar(array->null_count());
+  context.outputs[0] = num_nulls_mda;
+}
+
 void Array::getValid(libmexclass::proxy::method::Context& context) {
   auto array_length = static_cast<size_t>(array->length());
 
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h 
b/matlab/src/cpp/arrow/matlab/array/proxy/array.h
index 189fd2fea0..e518212931 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h
@@ -37,6 +37,8 @@ class Array : public libmexclass::proxy::Proxy {
 
   void getNumElements(libmexclass::proxy::method::Context& context);
 
+  void getNumNulls(libmexclass::proxy::method::Context& context);
+
   void getValid(libmexclass::proxy::method::Context& context);
 
   void getType(libmexclass::proxy::method::Context& context);
diff --git a/matlab/src/matlab/+arrow/+array/Array.m 
b/matlab/src/matlab/+arrow/+array/Array.m
index ffd9ec8bd9..09932abec7 100644
--- a/matlab/src/matlab/+arrow/+array/Array.m
+++ b/matlab/src/matlab/+arrow/+array/Array.m
@@ -23,6 +23,7 @@ classdef (Abstract) Array < matlab.mixin.CustomDisplay & ...
 
     properties(Dependent, SetAccess=private, GetAccess=public)
         NumElements
+        NumNulls
         Valid % Validity bitmap
         Type(1, 1) arrow.type.Type
     end
@@ -39,6 +40,10 @@ classdef (Abstract) Array < matlab.mixin.CustomDisplay & ...
             numElements = obj.Proxy.getNumElements();
         end
 
+        function numNulls = get.NumNulls(obj)
+            numNulls = obj.Proxy.getNumNulls();
+        end
+
         function validElements = get.Valid(obj)
             validElements = obj.Proxy.getValid();
         end
diff --git a/matlab/test/arrow/array/hNumericArray.m 
b/matlab/test/arrow/array/hNumericArray.m
index 233a090bae..6a9f33f496 100644
--- a/matlab/test/arrow/array/hNumericArray.m
+++ b/matlab/test/arrow/array/hNumericArray.m
@@ -208,5 +208,28 @@ classdef hNumericArray < matlab.unittest.TestCase
             % Test supplying more than two arrays to isequal
             tc.verifyFalse(isequal(array1, array1, array3, array4, array5)); 
         end
+
+        function TestNumNulls(tc)
+            % Verify the NumNulls property returns correct value.
+            
+            % array1 has 0 null values.
+            data1 = tc.MatlabArrayFcn(1:10);
+            array1 = tc.ArrowArrayConstructorFcn(data1);
+            tc.verifyEqual(array1.NumNulls, int64(0));
+
+            % array2 has 8 null values.
+            array2 = tc.ArrowArrayConstructorFcn(data1, Valid=[1 4]);
+            tc.verifyEqual(array2.NumNulls, int64(8));
+        end
+
+        function TestNumNullsNoSetter(tc)
+            % Verify the NumNulls property is read-only.
+
+            data = tc.MatlabArrayFcn(1:10);
+            array = tc.ArrowArrayConstructorFcn(data);
+            fcn = @() setfield(array, "NumNulls", 1);
+            tc.verifyError(fcn, "MATLAB:class:SetProhibited");            
+        end
+
     end
 end
diff --git a/matlab/test/arrow/array/tBooleanArray.m 
b/matlab/test/arrow/array/tBooleanArray.m
index 1af184d61c..4dca6ee7a5 100644
--- a/matlab/test/arrow/array/tBooleanArray.m
+++ b/matlab/test/arrow/array/tBooleanArray.m
@@ -216,5 +216,27 @@ classdef tBooleanArray < matlab.unittest.TestCase
             % Test supplying more than two arrays to isequal
             tc.verifyFalse(isequal(array1, array1, array3, array4, array5)); 
         end
+
+        function TestNumNulls(tc)
+            % Verify the NumNulls property returns correct value.
+            
+            % array1 has 0 null values.
+            data1 = tc.MatlabArrayFcn([true false true false]);
+            array1 = tc.ArrowArrayConstructorFcn(data1);
+            tc.verifyEqual(array1.NumNulls, int64(0));
+
+            % array2 has 3 null values.
+            array2 = tc.ArrowArrayConstructorFcn(data1, Valid=3);
+            tc.verifyEqual(array2.NumNulls, int64(3));
+        end
+
+        function TestNumNullsNoSetter(tc)
+            % Verify the NumNulls property is read-only.
+
+            data = tc.MatlabArrayFcn([true false true false]);
+            array = tc.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+            fcn = @() setfield(array, "NumNulls", 1);
+            tc.verifyError(fcn, "MATLAB:class:SetProhibited");            
+        end
     end
 end
diff --git a/matlab/test/arrow/array/tDate32Array.m 
b/matlab/test/arrow/array/tDate32Array.m
index e696efdb63..d4b1ae1b99 100644
--- a/matlab/test/arrow/array/tDate32Array.m
+++ b/matlab/test/arrow/array/tDate32Array.m
@@ -53,6 +53,28 @@ classdef tDate32Array < matlab.unittest.TestCase
             array = testCase.ArrowArrayConstructorFcn(dates);
             testCase.verifyEqual(array.NumElements, int64(10));
         end
+    
+        function TestNumNulls(testCase)
+            % Verify the NumNulls property returns correct value.
+            
+            % array1 has 0 null values.
+            dates = datetime(2023, 1, 1) + days(1:5)';
+            array1 = testCase.ArrowArrayConstructorFcn(dates);
+            testCase.verifyEqual(array1.NumNulls, int64(0));
+
+            % array2 has 3 null values.
+            array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+            testCase.verifyEqual(array2.NumNulls, int64(3));
+        end
+
+        function TestNumNullsNoSetter(testCase)
+            % Verify the NumNulls property is read-only.
+
+            data =  datetime(2023, 1, 1) + days(1:5)';
+            array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+            fcn = @() setfield(array, "NumNulls", 1);
+            testCase.verifyError(fcn, "MATLAB:class:SetProhibited");           
 
+        end
 
         function TestToMATLAB(testCase)
             % Verify toMATLAB() round-trips the original datetime array.
diff --git a/matlab/test/arrow/array/tDate64Array.m 
b/matlab/test/arrow/array/tDate64Array.m
index 124bae50a9..48075cc276 100644
--- a/matlab/test/arrow/array/tDate64Array.m
+++ b/matlab/test/arrow/array/tDate64Array.m
@@ -54,6 +54,28 @@ classdef tDate64Array < matlab.unittest.TestCase
             testCase.verifyEqual(array.NumElements, int64(10));
         end
 
+        function TestNumNulls(testCase)
+            % Verify the NumNulls property returns correct value.
+            
+            % array1 has 0 null values.
+            dates = datetime(2023, 1, 1) + days(1:5)';
+            array1 = testCase.ArrowArrayConstructorFcn(dates);
+            testCase.verifyEqual(array1.NumNulls, int64(0));
+
+            % array2 has 3 null values.
+            array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+            testCase.verifyEqual(array2.NumNulls, int64(3));
+        end
+
+        function TestNumNullsNoSetter(testCase)
+            % Verify the NumNulls property is read-only.
+
+            data =  datetime(2023, 1, 1) + days(1:5)';
+            array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+            fcn = @() setfield(array, "NumNulls", 1);
+            testCase.verifyError(fcn, "MATLAB:class:SetProhibited");           
 
+        end
+
         function TestToMATLAB(testCase)
             % Verify toMATLAB() round-trips the original datetime array.
             dates = testCase.UnixEpoch + days(1:10);
diff --git a/matlab/test/arrow/array/tListArray.m 
b/matlab/test/arrow/array/tListArray.m
index 0b5a74313f..a26b10fab8 100644
--- a/matlab/test/arrow/array/tListArray.m
+++ b/matlab/test/arrow/array/tListArray.m
@@ -32,6 +32,7 @@ classdef tListArray < matlab.unittest.TestCase
             %% Empty (zero-element) list (List<Float64>)
             Type = arrow.list(arrow.float64());
             NumElements = int64(0);
+            NumNulls = int64(0);
             Valid = logical.empty(0, 1);
             Offsets = arrow.array(int32(0));
             Values = arrow.array([]);
@@ -44,6 +45,7 @@ classdef tListArray < matlab.unittest.TestCase
                 Properties=struct(...
                     Type=Type, ...
                     NumElements=NumElements, ...
+                    NumNulls=NumNulls, ...
                     Valid=Valid, ...
                     Offsets=Offsets, ...
                     Values=Values ...
@@ -53,6 +55,7 @@ classdef tListArray < matlab.unittest.TestCase
             %% List with NULLs (List<String>)
             Type = arrow.list(arrow.string());
             NumElements = int64(4);
+            NumNulls = int64(2);
             Valid = [true, false, true, false];
             Offsets = arrow.array(int32([0, 1, 4, 6, 7]));
             Values = arrow.array(["A", missing, "C", "D", "E", missing, "G"]);
@@ -65,6 +68,7 @@ classdef tListArray < matlab.unittest.TestCase
                 Properties=struct(...
                     Type=Type, ...
                     NumElements=NumElements, ...
+                    NumNulls=NumNulls, ...
                     Valid=Valid, ...
                     Offsets=Offsets, ...
                     Values=Values ...
@@ -74,6 +78,7 @@ classdef tListArray < matlab.unittest.TestCase
             %% Single-level list (List<Float64>)
             Type = arrow.list(arrow.float64());
             NumElements = int64(3);
+            NumNulls = int64(0);
             Valid = true(1, NumElements);
             Offsets = arrow.array(int32([0, 2, 5, 9]));
             Values = arrow.array([1, 2, 3, 4, 5, 6, 7, 8, 9]);
@@ -86,6 +91,7 @@ classdef tListArray < matlab.unittest.TestCase
                 Properties=struct(...
                     Type=Type, ...
                     NumElements=NumElements, ...
+                    NumNulls=NumNulls, ...
                     Valid=Valid, ...
                     Offsets=Offsets, ...
                     Values=Values ...
@@ -95,6 +101,7 @@ classdef tListArray < matlab.unittest.TestCase
             %% Multi-level list (List<List<Float64>>)
             Type = arrow.list(arrow.list(arrow.float64()));
             NumElements = int64(2);
+            NumNulls = int64(0);
             Valid = true(1, NumElements);
             Offsets = arrow.array(int32([0, 1, 3]));
             Values = TestArrowArray.SingleLevelList.ArrowArray;
@@ -107,6 +114,7 @@ classdef tListArray < matlab.unittest.TestCase
                 Properties=struct(...
                     Type=Type, ...
                     NumElements=NumElements, ...
+                    NumNulls=NumNulls, ...
                     Valid=Valid, ...
                     Offsets=Offsets, ...
                     Values=Values ...
diff --git a/matlab/test/arrow/array/tStringArray.m 
b/matlab/test/arrow/array/tStringArray.m
index 5933b5a522..4c32148076 100644
--- a/matlab/test/arrow/array/tStringArray.m
+++ b/matlab/test/arrow/array/tStringArray.m
@@ -279,5 +279,27 @@ classdef tStringArray < matlab.unittest.TestCase
             % Test supplying more than two arrays to isequal
             tc.verifyFalse(isequal(array1, array1, array3, array4, array5)); 
         end
+
+        function TestNumNulls(testCase)
+            % Verify the NumNulls property returns correct value.
+            
+            % array1 has 0 null values.
+            data1 = ["A"; "B"; "C"; "D"; "E"; "F"];
+            array1 = testCase.ArrowArrayConstructorFcn(data1);
+            testCase.verifyEqual(array1.NumNulls, int64(0));
+
+            % array2 has 2 null values.
+            array2 = testCase.ArrowArrayConstructorFcn(data1, Valid=[1 2 3 4]);
+            testCase.verifyEqual(array2.NumNulls, int64(2));
+        end
+
+        function TestNumNullsNoSetter(testCase)
+            % Verify the NumNulls property is read-only.
+
+            data = ["A"; "B"; "C"; missing; "D"; "E"; "F"];
+            array = testCase.ArrowArrayConstructorFcn(data);
+            fcn = @() setfield(array, "NumNulls", 1);
+            testCase.verifyError(fcn, "MATLAB:class:SetProhibited");           
 
+        end
     end
 end
diff --git a/matlab/test/arrow/array/tStructArray.m 
b/matlab/test/arrow/array/tStructArray.m
index 5dd153d322..473169fb15 100644
--- a/matlab/test/arrow/array/tStructArray.m
+++ b/matlab/test/arrow/array/tStructArray.m
@@ -142,6 +142,26 @@ classdef tStructArray < matlab.unittest.TestCase
             tc.verifyError(fcn, "MATLAB:class:SetProhibited");
         end
 
+        function NumNulls(tc)
+            % Verify the NumNulls property.
+            import arrow.array.StructArray
+
+            array1 = StructArray.fromArrays(tc.Float64Array, tc.StringArray);
+            tc.verifyEqual(array1.NumNulls, int64(0));
+
+            array2 = StructArray.fromArrays(tc.Float64Array, tc.StringArray, 
Valid=[1 3 5]);
+            tc.verifyEqual(array2.NumNulls, int64(2));
+        end
+
+        function NumNullsNoSetter(tc)
+            % Verify the NumNulls property is read-only.
+            import arrow.array.StructArray
+
+            array = StructArray.fromArrays(tc.Float64Array, tc.StringArray);
+            fcn = @() setfield(array, "NumNulls", 1);
+            tc.verifyError(fcn, "MATLAB:class:SetProhibited");
+        end
+
         function Type(tc)
             % Verify the Type property is set to the expected value.
             import arrow.array.StructArray
diff --git a/matlab/test/arrow/array/tTime32Array.m 
b/matlab/test/arrow/array/tTime32Array.m
index 99d5839974..9a46d517b0 100644
--- a/matlab/test/arrow/array/tTime32Array.m
+++ b/matlab/test/arrow/array/tTime32Array.m
@@ -98,6 +98,28 @@ classdef tTime32Array < matlab.unittest.TestCase
             testCase.verifyEqual(array.NumElements, int64(5));
         end
 
+        function TestNumNulls(testCase)
+            % Verify the NumNulls property returns correct value.
+            
+            % array1 has 0 null values.
+            dates = seconds(1:10);
+            array1 = testCase.ArrowArrayConstructorFcn(dates);
+            testCase.verifyEqual(array1.NumNulls, int64(0));
+
+            % array2 has 8 null values.
+            array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+            testCase.verifyEqual(array2.NumNulls, int64(8));
+        end
+
+        function TestNumNullsNoSetter(testCase)
+            % Verify the NumNulls property is read-only.
+
+            data =  seconds(1:10);
+            array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+            fcn = @() setfield(array, "NumNulls", 1);
+            testCase.verifyError(fcn, "MATLAB:class:SetProhibited");           
 
+        end
+
         function TestToMATLAB(testCase, Unit)
             % Verify toMATLAB() round-trips the original duration array.
             times = seconds([100 200 355 400]);
diff --git a/matlab/test/arrow/array/tTime64Array.m 
b/matlab/test/arrow/array/tTime64Array.m
index 816b7acddf..566173b83c 100644
--- a/matlab/test/arrow/array/tTime64Array.m
+++ b/matlab/test/arrow/array/tTime64Array.m
@@ -109,6 +109,29 @@ classdef tTime64Array < matlab.unittest.TestCase
             testCase.verifyEqual(array.NumElements, int64(5));
         end
 
+        function TestNumNulls(testCase)
+            % Verify the NumNulls property.
+            
+            % array1 has 0 null values.
+            dates = seconds(1:10);
+            array1 = testCase.ArrowArrayConstructorFcn(dates);
+            testCase.verifyEqual(array1.NumNulls, int64(0));
+
+            % array2 has 8 null values.
+            array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+            testCase.verifyEqual(array2.NumNulls, int64(8));
+        end
+
+        function TestNumNullsNoSetter(testCase)
+            % Verify the NumNulls property is read-only.
+
+            data =  seconds(1:10);
+            array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+            fcn = @() setfield(array, "NumNulls", 1);
+            testCase.verifyError(fcn, "MATLAB:class:SetProhibited");           
 
+        end
+
+
         function TestToMATLAB(testCase, Unit)
             % Verify toMATLAB() round-trips the original duration array.
             times = seconds([100 200 355 400]);
diff --git a/matlab/test/arrow/array/tTimestampArray.m 
b/matlab/test/arrow/array/tTimestampArray.m
index daf77c03fc..31d5830d70 100644
--- a/matlab/test/arrow/array/tTimestampArray.m
+++ b/matlab/test/arrow/array/tTimestampArray.m
@@ -50,6 +50,28 @@ classdef tTimestampArray < matlab.unittest.TestCase
             arrowArray = testCase.ArrowArrayConstructorFcn(dates);
             testCase.verifyEqual(arrowArray.NumElements, int64(5));
         end
+        
+        function TestNumNulls(testCase)
+            % Verify the NumNulls property returns correct value.
+            
+            % array1 has 0 null values.
+            dates = datetime(2023, 1, 1) + days(1:5)';
+            array1 = testCase.ArrowArrayConstructorFcn(dates);
+            testCase.verifyEqual(array1.NumNulls, int64(0));
+
+            % array2 has 3 null values.
+            array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+            testCase.verifyEqual(array2.NumNulls, int64(3));
+        end
+
+        function TestNumNullsNoSetter(testCase)
+            % Verify the NumNulls property is read-only.
+
+            data =  datetime(2023, 1, 1) + days(1:5)';
+            array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+            fcn = @() setfield(array, "NumNulls", 1);
+            testCase.verifyError(fcn, "MATLAB:class:SetProhibited");           
 
+        end
 
         function TestDefaultTimestampType(testCase, TimeZone)
         % Verify the TimestampArray's units is Microsecond by default and

Reply via email to