Copilot commented on code in PR #302:
URL: https://github.com/apache/arrow-dotnet/pull/302#discussion_r3012744179
##########
src/Apache.Arrow/Arrays/LargeListArray.cs:
##########
@@ -14,12 +14,96 @@
// limitations under the License.
using System;
+using Apache.Arrow.Memory;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
public class LargeListArray : Array
{
+ public class Builder : IArrowArrayBuilder<LargeListArray, Builder>
+ {
+ public IArrowArrayBuilder<IArrowArray,
IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; }
+
+ public int Length => ValueOffsetsBufferBuilder.Length;
+
+ private ArrowBuffer.Builder<long> ValueOffsetsBufferBuilder { get;
}
+
+ private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }
+
+ public int NullCount { get; protected set; }
+
+ private IArrowType DataType { get; }
+
+ public Builder(IArrowType valueDataType) : this(new
LargeListType(valueDataType))
+ {
+ }
+
+ public Builder(Field valueField) : this(new
LargeListType(valueField))
+ {
+ }
+
+ internal Builder(LargeListType dataType)
+ {
+ ValueBuilder =
ArrowArrayBuilderFactory.Build(dataType.ValueDataType);
+ ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<long>();
+ ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
+ DataType = dataType;
+ }
+
+ public Builder Append()
+ {
+ ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
+ ValidityBufferBuilder.Append(true);
+
+ return this;
+ }
+
+ public Builder AppendNull()
+ {
+ ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
+ ValidityBufferBuilder.Append(false);
+ NullCount++;
+
+ return this;
+ }
+
+ public LargeListArray Build(MemoryAllocator allocator = default)
+ {
+ ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
+
+ ArrowBuffer validityBuffer = NullCount > 0
+ ?
ValidityBufferBuilder.Build(allocator)
+ : ArrowBuffer.Empty;
+
+ return new LargeListArray(DataType, Length - 1,
+ ValueOffsetsBufferBuilder.Build(allocator),
ValueBuilder.Build(allocator),
+ validityBuffer, NullCount, 0);
+ }
+
+ public Builder Reserve(int capacity)
+ {
+ ValueOffsetsBufferBuilder.Reserve(capacity + 1);
+ ValidityBufferBuilder.Reserve(capacity);
+ return this;
+ }
+
+ public Builder Resize(int length)
+ {
+ ValueOffsetsBufferBuilder.Resize(length + 1);
+ ValidityBufferBuilder.Resize(length);
+ return this;
+ }
+
+ public Builder Clear()
+ {
+ ValueOffsetsBufferBuilder.Clear();
+ ValueBuilder.Clear();
+ ValidityBufferBuilder.Clear();
Review Comment:
`Clear()` resets the offset/value/validity builders, but it does not reset
`NullCount`. If the builder had previously appended nulls, subsequent `Build()`
calls after `Clear()` will report an incorrect `nullCount` in `ArrayData` (and
`NullCount` on the resulting array), which can break downstream consumers
relying on `ArrayData.NullCount`. Reset `NullCount` to 0 as part of `Clear()`
(and consider aligning behavior with `ListArray.Builder`).
```suggestion
ValidityBufferBuilder.Clear();
NullCount = 0;
```
##########
src/Apache.Arrow/Arrays/LargeListArray.cs:
##########
@@ -14,12 +14,96 @@
// limitations under the License.
using System;
+using Apache.Arrow.Memory;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
public class LargeListArray : Array
{
+ public class Builder : IArrowArrayBuilder<LargeListArray, Builder>
+ {
+ public IArrowArrayBuilder<IArrowArray,
IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; }
+
+ public int Length => ValueOffsetsBufferBuilder.Length;
+
+ private ArrowBuffer.Builder<long> ValueOffsetsBufferBuilder { get;
}
+
+ private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }
+
+ public int NullCount { get; protected set; }
+
+ private IArrowType DataType { get; }
+
+ public Builder(IArrowType valueDataType) : this(new
LargeListType(valueDataType))
+ {
+ }
+
+ public Builder(Field valueField) : this(new
LargeListType(valueField))
+ {
+ }
+
+ internal Builder(LargeListType dataType)
+ {
+ ValueBuilder =
ArrowArrayBuilderFactory.Build(dataType.ValueDataType);
+ ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<long>();
+ ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
+ DataType = dataType;
+ }
+
+ public Builder Append()
+ {
+ ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
+ ValidityBufferBuilder.Append(true);
+
+ return this;
+ }
+
+ public Builder AppendNull()
+ {
+ ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
+ ValidityBufferBuilder.Append(false);
+ NullCount++;
+
+ return this;
+ }
+
+ public LargeListArray Build(MemoryAllocator allocator = default)
+ {
+ ValueOffsetsBufferBuilder.Append(ValueBuilder.Length);
+
+ ArrowBuffer validityBuffer = NullCount > 0
+ ?
ValidityBufferBuilder.Build(allocator)
+ : ArrowBuffer.Empty;
+
+ return new LargeListArray(DataType, Length - 1,
+ ValueOffsetsBufferBuilder.Build(allocator),
ValueBuilder.Build(allocator),
+ validityBuffer, NullCount, 0);
+ }
Review Comment:
`Build()` appends an extra trailing offset into `ValueOffsetsBufferBuilder`
every time it is called. This mutates the builder state and makes a second
`Build()` (or further `Append*/Build()` usage) produce an incorrect offset
buffer length vs. validity buffer length (and can yield incorrect array
lengths). Consider making `Build()` non-mutating (e.g., build a temporary
offsets buffer with the final offset appended, or remove the appended offset
after building).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]