This is an automated email from the ASF dual-hosted git repository.
sgilmore pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new aabd3db3ca GH-38422: [MATLAB] Add `NumNulls` property to
`arrow.array.Array` class (#47116)
aabd3db3ca is described below
commit aabd3db3cab180ed55e61df0cc4ee8714f6fd87a
Author: Sarah Gilmore <[email protected]>
AuthorDate: Wed Jul 16 13:01:15 2025 -0400
GH-38422: [MATLAB] Add `NumNulls` property to `arrow.array.Array` class
(#47116)
### Rationale for this change
It would be nice if there was a `NumNulls` property on the
`arrow.array.Array` base class. Currently, the only way to figure out the
number of nulls is count the number of `false` values in the `Valid` array:
```matlab
>> a = arrow.array([1 2 NaN 4 5 6 NaN 8 9 10 NaN]);
>> invalidValues = ~a.Valid;
>> numNulls = nnz(invalidValues)
numNulls =
3
```
It would be nice if `NumNulls` was already a property on the array class.
As @ kou mentioned, we can use the `arrow::Array::null_count()` to get the
number of nulls.
### What changes are included in this PR?
Added `NumNulls` as a property of the `arrow.array.Array` abstract class.
`NumNulls` is a scalar `int64` value that returns the number of null elements
in the array.
**Example Usage**
```matlab
>> a = arrow.array([1 2 NaN 3 4 NaN 5 6 NaN])
a =
Float64Array with 9 elements and 3 null values:
1 | 2 | null | ... | 5 | 6 | null
>> a.NumNulls
ans =
int64
3
```
### Are these changes tested?
Yes. Added test cases verifying the `NumNulls` property to these MATLAB
test classes: `hNumeric`, `tBooleanArray`, `tTimestampArray`, `tTime32Array`,
`tTime64Array`, `tDate32Array`, `tDate64Array`, `tListArray`, `tStringArray`,
and `tStructArray`.
### Are there any user-facing changes?
Yes. Users can now use the `NumNulls` property to query the number of null
elements in an array.
### Future Changes
1. Add `NumNulls` as a property of `arrow.array.ChunkedArray`.
* GitHub Issue: #38422
Authored-by: Sarah Gilmore <[email protected]>
Signed-off-by: Sarah Gilmore <[email protected]>
---
matlab/src/cpp/arrow/matlab/array/proxy/array.cc | 7 +++++++
matlab/src/cpp/arrow/matlab/array/proxy/array.h | 2 ++
matlab/src/matlab/+arrow/+array/Array.m | 5 +++++
matlab/test/arrow/array/hNumericArray.m | 23 +++++++++++++++++++++++
matlab/test/arrow/array/tBooleanArray.m | 22 ++++++++++++++++++++++
matlab/test/arrow/array/tDate32Array.m | 22 ++++++++++++++++++++++
matlab/test/arrow/array/tDate64Array.m | 22 ++++++++++++++++++++++
matlab/test/arrow/array/tListArray.m | 8 ++++++++
matlab/test/arrow/array/tStringArray.m | 22 ++++++++++++++++++++++
matlab/test/arrow/array/tStructArray.m | 20 ++++++++++++++++++++
matlab/test/arrow/array/tTime32Array.m | 22 ++++++++++++++++++++++
matlab/test/arrow/array/tTime64Array.m | 23 +++++++++++++++++++++++
matlab/test/arrow/array/tTimestampArray.m | 22 ++++++++++++++++++++++
13 files changed, 220 insertions(+)
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
index 753b15fe72..ad88949349 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
@@ -37,6 +37,7 @@ Array::Array(std::shared_ptr<arrow::Array> array) :
array{std::move(array)} {
// Register Proxy methods.
REGISTER_METHOD(Array, toString);
REGISTER_METHOD(Array, getNumElements);
+ REGISTER_METHOD(Array, getNumNulls);
REGISTER_METHOD(Array, getValid);
REGISTER_METHOD(Array, getType);
REGISTER_METHOD(Array, isEqual);
@@ -89,6 +90,12 @@ void
Array::getNumElements(libmexclass::proxy::method::Context& context) {
context.outputs[0] = length_mda;
}
+void Array::getNumNulls(libmexclass::proxy::method::Context& context) {
+ ::matlab::data::ArrayFactory factory;
+ auto num_nulls_mda = factory.createScalar(array->null_count());
+ context.outputs[0] = num_nulls_mda;
+}
+
void Array::getValid(libmexclass::proxy::method::Context& context) {
auto array_length = static_cast<size_t>(array->length());
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h
b/matlab/src/cpp/arrow/matlab/array/proxy/array.h
index 189fd2fea0..e518212931 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h
@@ -37,6 +37,8 @@ class Array : public libmexclass::proxy::Proxy {
void getNumElements(libmexclass::proxy::method::Context& context);
+ void getNumNulls(libmexclass::proxy::method::Context& context);
+
void getValid(libmexclass::proxy::method::Context& context);
void getType(libmexclass::proxy::method::Context& context);
diff --git a/matlab/src/matlab/+arrow/+array/Array.m
b/matlab/src/matlab/+arrow/+array/Array.m
index ffd9ec8bd9..09932abec7 100644
--- a/matlab/src/matlab/+arrow/+array/Array.m
+++ b/matlab/src/matlab/+arrow/+array/Array.m
@@ -23,6 +23,7 @@ classdef (Abstract) Array < matlab.mixin.CustomDisplay & ...
properties(Dependent, SetAccess=private, GetAccess=public)
NumElements
+ NumNulls
Valid % Validity bitmap
Type(1, 1) arrow.type.Type
end
@@ -39,6 +40,10 @@ classdef (Abstract) Array < matlab.mixin.CustomDisplay & ...
numElements = obj.Proxy.getNumElements();
end
+ function numNulls = get.NumNulls(obj)
+ numNulls = obj.Proxy.getNumNulls();
+ end
+
function validElements = get.Valid(obj)
validElements = obj.Proxy.getValid();
end
diff --git a/matlab/test/arrow/array/hNumericArray.m
b/matlab/test/arrow/array/hNumericArray.m
index 233a090bae..6a9f33f496 100644
--- a/matlab/test/arrow/array/hNumericArray.m
+++ b/matlab/test/arrow/array/hNumericArray.m
@@ -208,5 +208,28 @@ classdef hNumericArray < matlab.unittest.TestCase
% Test supplying more than two arrays to isequal
tc.verifyFalse(isequal(array1, array1, array3, array4, array5));
end
+
+ function TestNumNulls(tc)
+ % Verify the NumNulls property returns correct value.
+
+ % array1 has 0 null values.
+ data1 = tc.MatlabArrayFcn(1:10);
+ array1 = tc.ArrowArrayConstructorFcn(data1);
+ tc.verifyEqual(array1.NumNulls, int64(0));
+
+ % array2 has 8 null values.
+ array2 = tc.ArrowArrayConstructorFcn(data1, Valid=[1 4]);
+ tc.verifyEqual(array2.NumNulls, int64(8));
+ end
+
+ function TestNumNullsNoSetter(tc)
+ % Verify the NumNulls property is read-only.
+
+ data = tc.MatlabArrayFcn(1:10);
+ array = tc.ArrowArrayConstructorFcn(data);
+ fcn = @() setfield(array, "NumNulls", 1);
+ tc.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
+
end
end
diff --git a/matlab/test/arrow/array/tBooleanArray.m
b/matlab/test/arrow/array/tBooleanArray.m
index 1af184d61c..4dca6ee7a5 100644
--- a/matlab/test/arrow/array/tBooleanArray.m
+++ b/matlab/test/arrow/array/tBooleanArray.m
@@ -216,5 +216,27 @@ classdef tBooleanArray < matlab.unittest.TestCase
% Test supplying more than two arrays to isequal
tc.verifyFalse(isequal(array1, array1, array3, array4, array5));
end
+
+ function TestNumNulls(tc)
+ % Verify the NumNulls property returns correct value.
+
+ % array1 has 0 null values.
+ data1 = tc.MatlabArrayFcn([true false true false]);
+ array1 = tc.ArrowArrayConstructorFcn(data1);
+ tc.verifyEqual(array1.NumNulls, int64(0));
+
+ % array2 has 3 null values.
+ array2 = tc.ArrowArrayConstructorFcn(data1, Valid=3);
+ tc.verifyEqual(array2.NumNulls, int64(3));
+ end
+
+ function TestNumNullsNoSetter(tc)
+ % Verify the NumNulls property is read-only.
+
+ data = tc.MatlabArrayFcn([true false true false]);
+ array = tc.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+ fcn = @() setfield(array, "NumNulls", 1);
+ tc.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
end
end
diff --git a/matlab/test/arrow/array/tDate32Array.m
b/matlab/test/arrow/array/tDate32Array.m
index e696efdb63..d4b1ae1b99 100644
--- a/matlab/test/arrow/array/tDate32Array.m
+++ b/matlab/test/arrow/array/tDate32Array.m
@@ -53,6 +53,28 @@ classdef tDate32Array < matlab.unittest.TestCase
array = testCase.ArrowArrayConstructorFcn(dates);
testCase.verifyEqual(array.NumElements, int64(10));
end
+
+ function TestNumNulls(testCase)
+ % Verify the NumNulls property returns correct value.
+
+ % array1 has 0 null values.
+ dates = datetime(2023, 1, 1) + days(1:5)';
+ array1 = testCase.ArrowArrayConstructorFcn(dates);
+ testCase.verifyEqual(array1.NumNulls, int64(0));
+
+ % array2 has 3 null values.
+ array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+ testCase.verifyEqual(array2.NumNulls, int64(3));
+ end
+
+ function TestNumNullsNoSetter(testCase)
+ % Verify the NumNulls property is read-only.
+
+ data = datetime(2023, 1, 1) + days(1:5)';
+ array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+ fcn = @() setfield(array, "NumNulls", 1);
+ testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
function TestToMATLAB(testCase)
% Verify toMATLAB() round-trips the original datetime array.
diff --git a/matlab/test/arrow/array/tDate64Array.m
b/matlab/test/arrow/array/tDate64Array.m
index 124bae50a9..48075cc276 100644
--- a/matlab/test/arrow/array/tDate64Array.m
+++ b/matlab/test/arrow/array/tDate64Array.m
@@ -54,6 +54,28 @@ classdef tDate64Array < matlab.unittest.TestCase
testCase.verifyEqual(array.NumElements, int64(10));
end
+ function TestNumNulls(testCase)
+ % Verify the NumNulls property returns correct value.
+
+ % array1 has 0 null values.
+ dates = datetime(2023, 1, 1) + days(1:5)';
+ array1 = testCase.ArrowArrayConstructorFcn(dates);
+ testCase.verifyEqual(array1.NumNulls, int64(0));
+
+ % array2 has 3 null values.
+ array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+ testCase.verifyEqual(array2.NumNulls, int64(3));
+ end
+
+ function TestNumNullsNoSetter(testCase)
+ % Verify the NumNulls property is read-only.
+
+ data = datetime(2023, 1, 1) + days(1:5)';
+ array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+ fcn = @() setfield(array, "NumNulls", 1);
+ testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
+
function TestToMATLAB(testCase)
% Verify toMATLAB() round-trips the original datetime array.
dates = testCase.UnixEpoch + days(1:10);
diff --git a/matlab/test/arrow/array/tListArray.m
b/matlab/test/arrow/array/tListArray.m
index 0b5a74313f..a26b10fab8 100644
--- a/matlab/test/arrow/array/tListArray.m
+++ b/matlab/test/arrow/array/tListArray.m
@@ -32,6 +32,7 @@ classdef tListArray < matlab.unittest.TestCase
%% Empty (zero-element) list (List<Float64>)
Type = arrow.list(arrow.float64());
NumElements = int64(0);
+ NumNulls = int64(0);
Valid = logical.empty(0, 1);
Offsets = arrow.array(int32(0));
Values = arrow.array([]);
@@ -44,6 +45,7 @@ classdef tListArray < matlab.unittest.TestCase
Properties=struct(...
Type=Type, ...
NumElements=NumElements, ...
+ NumNulls=NumNulls, ...
Valid=Valid, ...
Offsets=Offsets, ...
Values=Values ...
@@ -53,6 +55,7 @@ classdef tListArray < matlab.unittest.TestCase
%% List with NULLs (List<String>)
Type = arrow.list(arrow.string());
NumElements = int64(4);
+ NumNulls = int64(2);
Valid = [true, false, true, false];
Offsets = arrow.array(int32([0, 1, 4, 6, 7]));
Values = arrow.array(["A", missing, "C", "D", "E", missing, "G"]);
@@ -65,6 +68,7 @@ classdef tListArray < matlab.unittest.TestCase
Properties=struct(...
Type=Type, ...
NumElements=NumElements, ...
+ NumNulls=NumNulls, ...
Valid=Valid, ...
Offsets=Offsets, ...
Values=Values ...
@@ -74,6 +78,7 @@ classdef tListArray < matlab.unittest.TestCase
%% Single-level list (List<Float64>)
Type = arrow.list(arrow.float64());
NumElements = int64(3);
+ NumNulls = int64(0);
Valid = true(1, NumElements);
Offsets = arrow.array(int32([0, 2, 5, 9]));
Values = arrow.array([1, 2, 3, 4, 5, 6, 7, 8, 9]);
@@ -86,6 +91,7 @@ classdef tListArray < matlab.unittest.TestCase
Properties=struct(...
Type=Type, ...
NumElements=NumElements, ...
+ NumNulls=NumNulls, ...
Valid=Valid, ...
Offsets=Offsets, ...
Values=Values ...
@@ -95,6 +101,7 @@ classdef tListArray < matlab.unittest.TestCase
%% Multi-level list (List<List<Float64>>)
Type = arrow.list(arrow.list(arrow.float64()));
NumElements = int64(2);
+ NumNulls = int64(0);
Valid = true(1, NumElements);
Offsets = arrow.array(int32([0, 1, 3]));
Values = TestArrowArray.SingleLevelList.ArrowArray;
@@ -107,6 +114,7 @@ classdef tListArray < matlab.unittest.TestCase
Properties=struct(...
Type=Type, ...
NumElements=NumElements, ...
+ NumNulls=NumNulls, ...
Valid=Valid, ...
Offsets=Offsets, ...
Values=Values ...
diff --git a/matlab/test/arrow/array/tStringArray.m
b/matlab/test/arrow/array/tStringArray.m
index 5933b5a522..4c32148076 100644
--- a/matlab/test/arrow/array/tStringArray.m
+++ b/matlab/test/arrow/array/tStringArray.m
@@ -279,5 +279,27 @@ classdef tStringArray < matlab.unittest.TestCase
% Test supplying more than two arrays to isequal
tc.verifyFalse(isequal(array1, array1, array3, array4, array5));
end
+
+ function TestNumNulls(testCase)
+ % Verify the NumNulls property returns correct value.
+
+ % array1 has 0 null values.
+ data1 = ["A"; "B"; "C"; "D"; "E"; "F"];
+ array1 = testCase.ArrowArrayConstructorFcn(data1);
+ testCase.verifyEqual(array1.NumNulls, int64(0));
+
+ % array2 has 2 null values.
+ array2 = testCase.ArrowArrayConstructorFcn(data1, Valid=[1 2 3 4]);
+ testCase.verifyEqual(array2.NumNulls, int64(2));
+ end
+
+ function TestNumNullsNoSetter(testCase)
+ % Verify the NumNulls property is read-only.
+
+ data = ["A"; "B"; "C"; missing; "D"; "E"; "F"];
+ array = testCase.ArrowArrayConstructorFcn(data);
+ fcn = @() setfield(array, "NumNulls", 1);
+ testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
end
end
diff --git a/matlab/test/arrow/array/tStructArray.m
b/matlab/test/arrow/array/tStructArray.m
index 5dd153d322..473169fb15 100644
--- a/matlab/test/arrow/array/tStructArray.m
+++ b/matlab/test/arrow/array/tStructArray.m
@@ -142,6 +142,26 @@ classdef tStructArray < matlab.unittest.TestCase
tc.verifyError(fcn, "MATLAB:class:SetProhibited");
end
+ function NumNulls(tc)
+ % Verify the NumNulls property.
+ import arrow.array.StructArray
+
+ array1 = StructArray.fromArrays(tc.Float64Array, tc.StringArray);
+ tc.verifyEqual(array1.NumNulls, int64(0));
+
+ array2 = StructArray.fromArrays(tc.Float64Array, tc.StringArray,
Valid=[1 3 5]);
+ tc.verifyEqual(array2.NumNulls, int64(2));
+ end
+
+ function NumNullsNoSetter(tc)
+ % Verify the NumNulls property is read-only.
+ import arrow.array.StructArray
+
+ array = StructArray.fromArrays(tc.Float64Array, tc.StringArray);
+ fcn = @() setfield(array, "NumNulls", 1);
+ tc.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
+
function Type(tc)
% Verify the Type property is set to the expected value.
import arrow.array.StructArray
diff --git a/matlab/test/arrow/array/tTime32Array.m
b/matlab/test/arrow/array/tTime32Array.m
index 99d5839974..9a46d517b0 100644
--- a/matlab/test/arrow/array/tTime32Array.m
+++ b/matlab/test/arrow/array/tTime32Array.m
@@ -98,6 +98,28 @@ classdef tTime32Array < matlab.unittest.TestCase
testCase.verifyEqual(array.NumElements, int64(5));
end
+ function TestNumNulls(testCase)
+ % Verify the NumNulls property returns correct value.
+
+ % array1 has 0 null values.
+ dates = seconds(1:10);
+ array1 = testCase.ArrowArrayConstructorFcn(dates);
+ testCase.verifyEqual(array1.NumNulls, int64(0));
+
+ % array2 has 8 null values.
+ array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+ testCase.verifyEqual(array2.NumNulls, int64(8));
+ end
+
+ function TestNumNullsNoSetter(testCase)
+ % Verify the NumNulls property is read-only.
+
+ data = seconds(1:10);
+ array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+ fcn = @() setfield(array, "NumNulls", 1);
+ testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
+
function TestToMATLAB(testCase, Unit)
% Verify toMATLAB() round-trips the original duration array.
times = seconds([100 200 355 400]);
diff --git a/matlab/test/arrow/array/tTime64Array.m
b/matlab/test/arrow/array/tTime64Array.m
index 816b7acddf..566173b83c 100644
--- a/matlab/test/arrow/array/tTime64Array.m
+++ b/matlab/test/arrow/array/tTime64Array.m
@@ -109,6 +109,29 @@ classdef tTime64Array < matlab.unittest.TestCase
testCase.verifyEqual(array.NumElements, int64(5));
end
+ function TestNumNulls(testCase)
+ % Verify the NumNulls property.
+
+ % array1 has 0 null values.
+ dates = seconds(1:10);
+ array1 = testCase.ArrowArrayConstructorFcn(dates);
+ testCase.verifyEqual(array1.NumNulls, int64(0));
+
+ % array2 has 8 null values.
+ array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+ testCase.verifyEqual(array2.NumNulls, int64(8));
+ end
+
+ function TestNumNullsNoSetter(testCase)
+ % Verify the NumNulls property is read-only.
+
+ data = seconds(1:10);
+ array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+ fcn = @() setfield(array, "NumNulls", 1);
+ testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
+
+
function TestToMATLAB(testCase, Unit)
% Verify toMATLAB() round-trips the original duration array.
times = seconds([100 200 355 400]);
diff --git a/matlab/test/arrow/array/tTimestampArray.m
b/matlab/test/arrow/array/tTimestampArray.m
index daf77c03fc..31d5830d70 100644
--- a/matlab/test/arrow/array/tTimestampArray.m
+++ b/matlab/test/arrow/array/tTimestampArray.m
@@ -50,6 +50,28 @@ classdef tTimestampArray < matlab.unittest.TestCase
arrowArray = testCase.ArrowArrayConstructorFcn(dates);
testCase.verifyEqual(arrowArray.NumElements, int64(5));
end
+
+ function TestNumNulls(testCase)
+ % Verify the NumNulls property returns correct value.
+
+ % array1 has 0 null values.
+ dates = datetime(2023, 1, 1) + days(1:5)';
+ array1 = testCase.ArrowArrayConstructorFcn(dates);
+ testCase.verifyEqual(array1.NumNulls, int64(0));
+
+ % array2 has 3 null values.
+ array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
+ testCase.verifyEqual(array2.NumNulls, int64(3));
+ end
+
+ function TestNumNullsNoSetter(testCase)
+ % Verify the NumNulls property is read-only.
+
+ data = datetime(2023, 1, 1) + days(1:5)';
+ array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
+ fcn = @() setfield(array, "NumNulls", 1);
+ testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
+ end
function TestDefaultTimestampType(testCase, TimeZone)
% Verify the TimestampArray's units is Microsecond by default and