This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 64d02708 ipc: clear variadicCounts in recordEncoder.reset() (#631)
64d02708 is described below
commit 64d02708e6b0d80592dd001c5a1e4a8922a39971
Author: Alfonso Subiotto Marqués <[email protected]>
AuthorDate: Wed Jan 14 17:10:20 2026 +0100
ipc: clear variadicCounts in recordEncoder.reset() (#631)
The reset() function did not clear variadicCounts, causing it to
accumulate across batch encodings. This produces malformed IPC where
each DictionaryBatch contains variadic counts from all previous batches,
which other IPC reader implementations do not expect (e.g. arrow-rs)
### Rationale for this change
arrow-rs was returning errors when reading IPC files written by arrow-go
### What changes are included in this PR?
Clearing state on reset
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
Signed-off-by: Alfonso Subiotto Marques <[email protected]>
---
arrow/ipc/writer.go | 1 +
arrow/ipc/writer_test.go | 51 ++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/arrow/ipc/writer.go b/arrow/ipc/writer.go
index 9cd10e79..29646ae2 100644
--- a/arrow/ipc/writer.go
+++ b/arrow/ipc/writer.go
@@ -366,6 +366,7 @@ func (w *recordEncoder) shouldCompress(uncompressed,
compressed int) bool {
func (w *recordEncoder) reset() {
w.start = 0
w.fields = make([]fieldMetadata, 0)
+ w.variadicCounts = nil
}
func (w *recordEncoder) getCompressor(id int) compressor {
diff --git a/arrow/ipc/writer_test.go b/arrow/ipc/writer_test.go
index c91661a1..07996e3b 100644
--- a/arrow/ipc/writer_test.go
+++ b/arrow/ipc/writer_test.go
@@ -25,13 +25,14 @@ import (
"strings"
"testing"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/bitutil"
"github.com/apache/arrow-go/v18/arrow/internal/flatbuf"
"github.com/apache/arrow-go/v18/arrow/memory"
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/require"
)
// reproducer from ARROW-13529
@@ -357,3 +358,49 @@ func TestWritePayload(t *testing.T) {
require.NoError(t, err)
require.True(t, msg.Type() == MessageRecordBatch)
}
+
+// TestVariadicCountsNotAccumulatedAcrossEncode verifies that variadicCounts
+// does not accumulate across encode calls separated by reset(). Without this,
+// each batch's variadic counts would include counts from previous batches,
+// producing malformed IPC that other implementations (e.g., arrow-rs) cannot
+// read.
+func TestVariadicCountsNotAccumulatedAcrossEncode(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(t, 0)
+
+ enc := newRecordEncoder(
+ mem, 0,
+ kMaxNestingDepth,
+ false,
+ -1,
+ 1,
+ 0,
+ nil,
+ )
+
+ // Create a StringView array with a long string (>12 bytes uses
out-of-line
+ // storage, which adds to variadicCounts).
+ bldr := array.NewStringViewBuilder(mem)
+ bldr.Append("this_is_a_long_string_value")
+ arr := bldr.NewArray()
+ bldr.Release()
+ defer arr.Release()
+
+ schema := arrow.NewSchema(
+ []arrow.Field{
+ {Name: "sv", Type: arrow.BinaryTypes.StringView},
+ }, nil,
+ )
+ rec := array.NewRecordBatch(schema, []arrow.Array{arr}, 1)
+ defer rec.Release()
+
+ expectedCounts := []int64{1}
+ for range 2 {
+ enc.reset()
+
+ var p Payload
+ require.NoError(t, enc.encode(&p, rec))
+ require.Equal(t, expectedCounts, enc.variadicCounts)
+ p.Release()
+ }
+}