This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new d715ac72 fix(parquet/internal/encoding): Fix typed dictionary encoding 
(#479)
d715ac72 is described below

commit d715ac72537d93d95e5e852f6f0706da524afa0d
Author: Travis Patterson <[email protected]>
AuthorDate: Thu Aug 28 09:24:10 2025 -0600

    fix(parquet/internal/encoding): Fix typed dictionary encoding (#479)
    
    ### Rationale for this change
    The typed dict encoder tries to cast `memory.Allocator` to
    `TypedMemoTable`. This results in a panic.
    
    ### What changes are included in this PR?
    This PR changes `mem` to `memo` in the encoder so the `MemoTable` is the
    variable cast to `TypedMemoTable`.
    
    ### Are these changes tested?
    I've tested this locally and panics no longer happen 🎉
---
 parquet/internal/encoding/typed_encoder.go      |  2 +-
 parquet/internal/encoding/typed_encoder_test.go | 45 +++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/parquet/internal/encoding/typed_encoder.go 
b/parquet/internal/encoding/typed_encoder.go
index cec8c55c..bbc468b8 100644
--- a/parquet/internal/encoding/typed_encoder.go
+++ b/parquet/internal/encoding/typed_encoder.go
@@ -164,7 +164,7 @@ func (enc *typedDictEncoder[T]) PutDictionary(values 
arrow.Array) error {
        enc.dictEncodedSize += values.Len() * int(unsafe.Sizeof(T(0)))
        data := values.(arrvalues[T]).Values()
 
-       typedMemo := enc.mem.(TypedMemoTable[T])
+       typedMemo := enc.memo.(TypedMemoTable[T])
        for _, val := range data {
                if _, _, err := typedMemo.InsertOrGet(val); err != nil {
                        return err
diff --git a/parquet/internal/encoding/typed_encoder_test.go 
b/parquet/internal/encoding/typed_encoder_test.go
new file mode 100644
index 00000000..7b12bd3d
--- /dev/null
+++ b/parquet/internal/encoding/typed_encoder_test.go
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+       "testing"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/array"
+       "github.com/apache/arrow-go/v18/arrow/memory"
+       "github.com/apache/arrow-go/v18/parquet"
+       "github.com/apache/arrow-go/v18/parquet/schema"
+       "github.com/stretchr/testify/assert"
+)
+
+func TestPutDictionary(t *testing.T) {
+       exp := []int32{1, 2, 4, 8, 16}
+       ad := array.NewData(
+               arrow.PrimitiveTypes.Int32, len(exp),
+               []*memory.Buffer{nil, 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(exp))},
+               nil, 0, 0,
+       )
+       arr := array.NewInt32Data(ad)
+
+       typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1)
+       descr := schema.NewColumn(typ, 0, 0)
+       enc := &typedDictEncoder[int32]{newDictEncoderBase(descr, 
NewDictionary[int32](), memory.DefaultAllocator)}
+
+       err := enc.PutDictionary(arr)
+       assert.NoError(t, err)
+}

Reply via email to