This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-js.git


The following commit(s) were added to refs/heads/main by this push:
     new 5ebbe36  fix: Write zero-filled values buffer for fully-null Bool 
columns in IPC (#392)
5ebbe36 is described below

commit 5ebbe36a75c94304e2451f96cf7d6899c9c06af5
Author: Alon Spivack <[email protected]>
AuthorDate: Tue Mar 3 02:43:24 2026 +0200

    fix: Write zero-filled values buffer for fully-null Bool columns in IPC 
(#392)
    
    ## Summary
    
    When a Bool column is fully null (`nullCount >= length`),
    `assembleBoolVector` in
    `VectorAssembler` previously returned early without writing a values
    buffer,
    producing an IPC stream with 0 bytes for buffer `#1` .
    
    This violates the Arrow IPC specification, which requires a data buffer
    of
    `ceil(length / 8)` bytes for Bool arrays regardless of null count. Other
    implementations (PyArrow, arrow-rs) reject these streams with:
    
    > Buffer `#1` too small in array of type Bool. Expected at least 1
    byte(s), got 0
    
    ## Fix
    
    Write a zero-filled `Uint8Array` of the correct byte length
    `((data.length + 7) >> 3)`
    when all values are null, instead of returning early with no buffer.
    
    ## Tests
    
    Added 4 round-trip tests for fully-null Bool columns through
    `tableToIPC` / `tableFromIPC`:
    - Single-row fully-null Bool
    - 2-row fully-null Bool (file format)
    - 10-row fully-null Bool (crosses byte boundary)
    - Mixed table with normal Int32 + fully-null Bool columns
    
    Closes #68
    
    Co-authored-by: Alon Spivack <[email protected]>
---
 src/visitor/vectorassembler.ts       |  4 +--
 test/unit/ipc/serialization-tests.ts | 51 ++++++++++++++++++++++++++++++++++--
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/src/visitor/vectorassembler.ts b/src/visitor/vectorassembler.ts
index 2ac6f8f..ae4b712 100644
--- a/src/visitor/vectorassembler.ts
+++ b/src/visitor/vectorassembler.ts
@@ -186,8 +186,8 @@ function assembleBoolVector<T extends Bool>(this: 
VectorAssembler, data: Data<T>
     // Bool vector is a special case of FlatVector, as its data buffer needs 
to stay packed
     let values: Uint8Array;
     if (data.nullCount >= data.length) {
-        // If all values are null, just insert a placeholder empty data buffer 
(fastest path)
-        return addBuffer.call(this, new Uint8Array(0));
+        // If all values are null, write a zero-filled data buffer of the 
correct byte length
+        return addBuffer.call(this, new Uint8Array((data.length + 7) >> 3));
     } else if ((values = data.values) instanceof Uint8Array) {
         // If values is already a Uint8Array, slice the bitmap (fast path)
         return addBuffer.call(this, truncateBitmap(data.offset, data.length, 
values));
diff --git a/test/unit/ipc/serialization-tests.ts 
b/test/unit/ipc/serialization-tests.ts
index f31a242..300e02c 100644
--- a/test/unit/ipc/serialization-tests.ts
+++ b/test/unit/ipc/serialization-tests.ts
@@ -19,8 +19,8 @@ import '../../jest-extensions.js';
 import * as generate from '../../generate-test-data.js';
 
 import {
-    Table, Schema, Field, DataType, TypeMap, Dictionary, Int32, Float32, 
Uint8, Utf8, Null,
-    makeVector,
+    Table, Schema, Field, DataType, TypeMap, Dictionary, Int32, Float32, 
Uint8, Utf8, Null, Bool,
+    makeVector, vectorFromArray,
     tableFromIPC, tableToIPC, RecordBatchReader, RecordBatchStreamWriter
 } from 'apache-arrow';
 
@@ -121,6 +121,53 @@ describe('tableToIPC()', () => {
         expect(result).toEqualTable(source);
     });
 
+    test(`single-row fully-null Bool column round-trips`, () => {
+        const source = new Table({
+            a: vectorFromArray([null], new Bool()),
+        });
+        const buffer = tableToIPC(source);
+        const result = tableFromIPC(buffer);
+        expect(result.numRows).toBe(1);
+        expect(result.getChild('a')!.get(0)).toBeNull();
+    });
+
+    test(`fully-null Bool column round-trips through IPC file format`, () => {
+        const source = new Table({
+            a: vectorFromArray([null, null], new Bool()),
+        });
+        const buffer = tableToIPC(source, 'file');
+        const result = tableFromIPC(buffer);
+        expect(result.numRows).toBe(2);
+        expect(result.getChild('a')!.get(0)).toBeNull();
+        expect(result.getChild('a')!.get(1)).toBeNull();
+    });
+
+    test(`fully-null Bool column with length > 8 round-trips through 
serialization`, () => {
+        const source = new Table({
+            a: vectorFromArray(new Array(10).fill(null), new Bool()),
+        });
+        const buffer = tableToIPC(source);
+        const result = tableFromIPC(buffer);
+        expect(result.numRows).toBe(10);
+        for (let i = 0; i < 10; i++) {
+            expect(result.getChild('a')!.get(i)).toBeNull();
+        }
+    });
+
+    test(`mixed table with normal column and fully-null Bool column 
round-trips through serialization`, () => {
+        const source = new Table({
+            a: makeVector(new Int32Array([1, 2, 3])),
+            b: vectorFromArray([null, null, null], new Bool()),
+        });
+        const buffer = tableToIPC(source);
+        const result = tableFromIPC(buffer);
+        expect(result.numRows).toBe(3);
+        expect(result.getChild('a')!.toArray()).toEqual(new Int32Array([1, 2, 
3]));
+        for (let i = 0; i < 3; i++) {
+            expect(result.getChild('b')!.get(i)).toBeNull();
+        }
+    });
+
     const chunkLengths = [] as number[];
     const table = <T extends TypeMap = any>(schema: Schema<T>) => 
createTable(schema, chunkLengths);
     for (let i = -1; ++i < 3;) {

Reply via email to