shaoting-huang opened a new issue, #43276:
URL: https://github.com/apache/arrow/issues/43276
### Describe the bug, including details regarding any error messages,
version, and platform.
It causes panic when reading larger than batchsize written with delta binary
packed encoding with Nullable.
To reproduce:
```func TestDeltaBinaryPackedEncodingWithNull(t *testing.T) {
t.Run("test", func(t *testing.T) {
size := 10
buf := new(bytes.Buffer)
mem := memory.NewGoAllocator()
// Define the schema for the test data
fields := []arrow.Field{
{Name: "int64", Type: arrow.PrimitiveTypes.Int64,
Nullable: true},
}
schema := arrow.NewSchema(fields, nil)
// Create a record batch with the test data
b := array.NewRecordBuilder(mem, schema)
defer b.Release()
for i := 0; i < size; i++ {
b.Field(0).(*array.Int64Builder).Append(int64(i))
}
rec := b.NewRecord()
defer rec.Release()
// Write the data to Parquet using the file writer
props := parquet.NewWriterProperties(
parquet.WithCompression(compress.Codecs.Zstd),
parquet.WithCompressionLevel(3),
parquet.WithDictionaryDefault(false),
parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked))
writerProps := pqarrow.DefaultWriterProps()
pw, err := pqarrow.NewFileWriter(schema, buf, props,
writerProps)
assert.NoError(t, err)
pw.Write(rec)
pw.Close()
// Read the data back from the Parquet file
reader, err :=
file.NewParquetReader(bytes.NewReader(buf.Bytes()))
assert.NoError(t, err)
defer reader.Close()
pr, err := pqarrow.NewFileReader(reader,
pqarrow.ArrowReadProperties{BatchSize: 5}, memory.DefaultAllocator)
assert.NoError(t, err)
rr, err := pr.GetRecordReader(context.Background(), nil, nil)
assert.NoError(t, err)
totalRows := 0
for rr.Next() {
rec := rr.Record()
for i := 0; i < int(rec.NumRows()); i++ {
col := rec.Column(0).(*array.Int64)
val := col.Value(i)
assert.Equal(t, val, int64(totalRows+i))
}
totalRows += int(rec.NumRows())
}
if totalRows != size {
t.Fatalf("Expected %d rows, but got %d rows", size,
totalRows)
}
})
```
Cause the follow error:
```
panic: runtime error: slice bounds out of range [4:0] [recovered]
panic: runtime error: slice bounds out of range [4:0]
goroutine 178 [running]:
testing.tRunner.func1.2({0x103f56e00, 0x14000aa40a8})
/opt/homebrew/Cellar/[email protected]/1.21.11/libexec/src/testing/testing.go:1545 +0x1c4
testing.tRunner.func1()
/opt/homebrew/Cellar/[email protected]/1.21.11/libexec/src/testing/testing.go:1548 +0x360
panic({0x103f56e00?, 0x14000aa40a8?})
/opt/homebrew/Cellar/[email protected]/1.21.11/libexec/src/runtime/panic.go:914 +0x218
github.com/apache/arrow/go/v12/parquet/internal/encoding.(*DeltaBitPackInt64Decoder).Decode(0x140008af618,
{0x140005e8680?, 0x0?, 0x0?})
github.com/apache/arrow/go/[email protected]/parquet/internal/encoding/delta_bit_packing.go:273
+0x240
github.com/apache/arrow/go/v12/parquet/internal/encoding.DeltaBitPackInt64Decoder.DecodeSpaced({0x1400001f1e0,
{0x0, 0x0, 0x0}}, {0x140005e8680, 0x5, 0x8}, 0x0, {0x140005e8700, 0x1, ...},
...)
github.com/apache/arrow/go/[email protected]/parquet/internal/encoding/delta_bit_packing.go:291
+0x74
github.com/apache/arrow/go/v12/parquet/file.(*primitiveRecordReader).ReadValuesSpaced(0x140005e8380?,
0x5, 0x20?)
github.com/apache/arrow/go/[email protected]/parquet/file/record_reader.go:284 +0x2e4
github.com/apache/arrow/go/v12/parquet/file.(*recordReader).ReadRecordData(0x14000b8a9c0,
0x5)
github.com/apache/arrow/go/[email protected]/parquet/file/record_reader.go:548 +0x288
github.com/apache/arrow/go/v12/parquet/file.(*recordReader).ReadRecords(0x14000b8a9c0,
0x5)
github.com/apache/arrow/go/[email protected]/parquet/file/record_reader.go:574 +0x44
github.com/apache/arrow/go/v12/parquet/pqarrow.(*leafReader).LoadBatch(0x14000b8aa20,
0x5)
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/column_readers.go:109
+0xe0
github.com/apache/arrow/go/v12/parquet/pqarrow.(*ColumnReader).NextBatch(0x14000a9c160,
0x103e7de80?)
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/file_reader.go:131 +0x34
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).next.func2(0x0,
0x0?)
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/file_reader.go:665 +0x40
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).next(0x14000b8ab40)
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/file_reader.go:685 +0x1d0
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).Next(0x14000b8ab40?)
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/file_reader.go:760 +0x74
github.com/milvus-io/milvus/internal/storage.Test.func1(0x14000185a00)
```
### Component(s)
Go
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]