This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 637fc280f fix(go/adbc/driver/bigquery): fix parsing repeated records 
with nested fields (#3240)
637fc280f is described below

commit 637fc280f4a49275ff1e00e6143040b8d02899e9
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Wed Aug 6 21:28:52 2025 -0300

    fix(go/adbc/driver/bigquery): fix parsing repeated records with nested 
fields (#3240)
    
    Co-authored-by: Xuliang (Harry) Sun 
<[email protected]>
---
 go/adbc/driver/bigquery/connection.go      |  50 ++++-------
 go/adbc/driver/bigquery/connection_test.go | 140 +++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+), 31 deletions(-)

diff --git a/go/adbc/driver/bigquery/connection.go 
b/go/adbc/driver/bigquery/connection.go
index a8ec4fa65..d904ad49b 100644
--- a/go/adbc/driver/bigquery/connection.go
+++ b/go/adbc/driver/bigquery/connection.go
@@ -805,40 +805,23 @@ func buildField(schema *bigquery.FieldSchema, level uint) 
(arrow.Field, error) {
        case bigquery.TimestampFieldType:
                field.Type = arrow.FixedWidthTypes.Timestamp_ms
        case bigquery.RecordFieldType:
-               if schema.Repeated {
-                       if len(schema.Schema) == 1 {
-                               arrayField, err := buildField(schema.Schema[0], 
level+1)
-                               if err != nil {
-                                       return arrow.Field{}, err
-                               }
-                               field.Type = arrow.ListOf(arrayField.Type)
-                               field.Metadata = arrayField.Metadata
-                               field.Nullable = arrayField.Nullable
-                       } else {
-                               return arrow.Field{}, adbc.Error{
-                                       Code: adbc.StatusInvalidArgument,
-                                       Msg:  fmt.Sprintf("Cannot create array 
schema for filed `%s`: len(schema.Schema) != 1", schema.Name),
-                               }
-                       }
-               } else {
-                       nestedFields := make([]arrow.Field, len(schema.Schema))
-                       for i, nestedSchema := range schema.Schema {
-                               f, err := buildField(nestedSchema, level+1)
-                               if err != nil {
-                                       return arrow.Field{}, err
-                               }
-                               nestedFields[i] = f
+               // create an Arrow struct for BigQuery Record fields
+               nestedFields := make([]arrow.Field, len(schema.Schema))
+               for i, nestedFieldSchema := range schema.Schema {
+                       f, err := buildField(nestedFieldSchema, level+1)
+                       if err != nil {
+                               return arrow.Field{}, err
                        }
-                       structType := arrow.StructOf(nestedFields...)
-                       if structType == nil {
-                               return arrow.Field{}, adbc.Error{
-                                       Code: adbc.StatusInvalidArgument,
-                                       Msg:  fmt.Sprintf("Cannot create a 
struct schema for record `%s`", schema.Name),
-                               }
+                       nestedFields[i] = f
+               }
+               structType := arrow.StructOf(nestedFields...)
+               if structType == nil {
+                       return arrow.Field{}, adbc.Error{
+                               Code: adbc.StatusInvalidArgument,
+                               Msg:  fmt.Sprintf("Cannot create a struct 
schema for record `%s`", schema.Name),
                        }
-                       field.Type = structType
                }
-
+               field.Type = structType
        case bigquery.DateFieldType:
                field.Type = arrow.FixedWidthTypes.Date32
        case bigquery.TimeFieldType:
@@ -870,6 +853,11 @@ func buildField(schema *bigquery.FieldSchema, level uint) 
(arrow.Field, error) {
                }
        }
 
+       // if the field is repeated, then it's a list of the type we just built
+       if schema.Repeated {
+               field.Type = arrow.ListOf(field.Type)
+       }
+
        if level == 0 {
                metadata["DefaultValueExpression"] = 
schema.DefaultValueExpression
        }
diff --git a/go/adbc/driver/bigquery/connection_test.go 
b/go/adbc/driver/bigquery/connection_test.go
new file mode 100644
index 000000000..e2bc22d2d
--- /dev/null
+++ b/go/adbc/driver/bigquery/connection_test.go
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package bigquery
+
+import (
+       "testing"
+
+       "cloud.google.com/go/bigquery"
+)
+
+func TestBuildField(t *testing.T) {
+       tests := []struct {
+               name            string
+               schema          *bigquery.FieldSchema
+               expectedTypeStr string
+               expectError     bool
+       }{
+               {
+                       name: "ArrayOfScalar",
+                       schema: &bigquery.FieldSchema{
+                               Name:        "test_array_scalar_field",
+                               Type:        bigquery.IntegerFieldType,
+                               Repeated:    true,
+                               Required:    false,
+                               Description: "Test array field with scalar 
type",
+                               Schema:      nil,
+                       },
+                       expectedTypeStr: "list<item: int64, nullable>",
+                       expectError:     false,
+               },
+               {
+                       name: "ArrayOfRecordWithMultipleFields",
+                       schema: &bigquery.FieldSchema{
+                               Name:        "test_array_field",
+                               Type:        bigquery.RecordFieldType,
+                               Repeated:    true,
+                               Required:    false,
+                               Description: "Test array field with multiple 
nested fields",
+                               Schema: []*bigquery.FieldSchema{
+                                       {
+                                               Name:     "field1",
+                                               Type:     
bigquery.StringFieldType,
+                                               Required: false,
+                                       },
+                                       {
+                                               Name:     "field2",
+                                               Type:     
bigquery.IntegerFieldType,
+                                               Required: false,
+                                       },
+                               },
+                       },
+                       expectedTypeStr: "list<item: struct<field1: utf8, 
field2: int64>, nullable>",
+                       expectError:     false,
+               },
+               {
+                       name: "ArrayOfRecordWithSingleField",
+                       schema: &bigquery.FieldSchema{
+                               Name:        "test_single_array_field",
+                               Type:        bigquery.RecordFieldType,
+                               Repeated:    true,
+                               Required:    false,
+                               Description: "Test array field with single 
nested field",
+                               Schema: []*bigquery.FieldSchema{
+                                       {
+                                               Name:     "single_field",
+                                               Type:     
bigquery.StringFieldType,
+                                               Required: false,
+                                       },
+                               },
+                       },
+                       expectedTypeStr: "list<item: struct<single_field: 
utf8>, nullable>",
+                       expectError:     false,
+               },
+               {
+                       name: "NonRepeatedRecord",
+                       schema: &bigquery.FieldSchema{
+                               Name:        "test_struct_field",
+                               Type:        bigquery.RecordFieldType,
+                               Repeated:    false,
+                               Required:    false,
+                               Description: "Test struct field with multiple 
nested fields",
+                               Schema: []*bigquery.FieldSchema{
+                                       {
+                                               Name:     "nested_string",
+                                               Type:     
bigquery.StringFieldType,
+                                               Required: false,
+                                       },
+                                       {
+                                               Name:     "nested_int",
+                                               Type:     
bigquery.IntegerFieldType,
+                                               Required: true,
+                                       },
+                               },
+                       },
+                       expectedTypeStr: "struct<nested_string: utf8, 
nested_int: int64>",
+                       expectError:     false,
+               },
+       }
+
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       field, err := buildField(tt.schema, 0)
+
+                       if tt.expectError {
+                               if err == nil {
+                                       t.Fatalf("Expected error for test case 
%s, but got nil", tt.name)
+                               }
+                               return
+                       }
+
+                       if err != nil {
+                               t.Fatalf("Expected no error for test case %s, 
got: %v", tt.name, err)
+                       }
+
+                       if field.Name != tt.schema.Name {
+                               t.Errorf("Expected field name '%s', got '%s'", 
tt.schema.Name, field.Name)
+                       }
+
+                       typeStr := field.Type.String()
+                       if typeStr != tt.expectedTypeStr {
+                               t.Errorf("Expected field type string to be 
'%s', got '%s'", tt.expectedTypeStr, typeStr)
+                       }
+               })
+       }
+}

Reply via email to