This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 637fc280f fix(go/adbc/driver/bigquery): fix parsing repeated records
with nested fields (#3240)
637fc280f is described below
commit 637fc280f4a49275ff1e00e6143040b8d02899e9
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Wed Aug 6 21:28:52 2025 -0300
fix(go/adbc/driver/bigquery): fix parsing repeated records with nested
fields (#3240)
Co-authored-by: Xuliang (Harry) Sun
<[email protected]>
---
go/adbc/driver/bigquery/connection.go | 50 ++++-------
go/adbc/driver/bigquery/connection_test.go | 140 +++++++++++++++++++++++++++++
2 files changed, 159 insertions(+), 31 deletions(-)
diff --git a/go/adbc/driver/bigquery/connection.go
b/go/adbc/driver/bigquery/connection.go
index a8ec4fa65..d904ad49b 100644
--- a/go/adbc/driver/bigquery/connection.go
+++ b/go/adbc/driver/bigquery/connection.go
@@ -805,40 +805,23 @@ func buildField(schema *bigquery.FieldSchema, level uint)
(arrow.Field, error) {
case bigquery.TimestampFieldType:
field.Type = arrow.FixedWidthTypes.Timestamp_ms
case bigquery.RecordFieldType:
- if schema.Repeated {
- if len(schema.Schema) == 1 {
- arrayField, err := buildField(schema.Schema[0],
level+1)
- if err != nil {
- return arrow.Field{}, err
- }
- field.Type = arrow.ListOf(arrayField.Type)
- field.Metadata = arrayField.Metadata
- field.Nullable = arrayField.Nullable
- } else {
- return arrow.Field{}, adbc.Error{
- Code: adbc.StatusInvalidArgument,
- Msg: fmt.Sprintf("Cannot create array
schema for filed `%s`: len(schema.Schema) != 1", schema.Name),
- }
- }
- } else {
- nestedFields := make([]arrow.Field, len(schema.Schema))
- for i, nestedSchema := range schema.Schema {
- f, err := buildField(nestedSchema, level+1)
- if err != nil {
- return arrow.Field{}, err
- }
- nestedFields[i] = f
+ // create an Arrow struct for BigQuery Record fields
+ nestedFields := make([]arrow.Field, len(schema.Schema))
+ for i, nestedFieldSchema := range schema.Schema {
+ f, err := buildField(nestedFieldSchema, level+1)
+ if err != nil {
+ return arrow.Field{}, err
}
- structType := arrow.StructOf(nestedFields...)
- if structType == nil {
- return arrow.Field{}, adbc.Error{
- Code: adbc.StatusInvalidArgument,
- Msg: fmt.Sprintf("Cannot create a
struct schema for record `%s`", schema.Name),
- }
+ nestedFields[i] = f
+ }
+ structType := arrow.StructOf(nestedFields...)
+ if structType == nil {
+ return arrow.Field{}, adbc.Error{
+ Code: adbc.StatusInvalidArgument,
+ Msg: fmt.Sprintf("Cannot create a struct
schema for record `%s`", schema.Name),
}
- field.Type = structType
}
-
+ field.Type = structType
case bigquery.DateFieldType:
field.Type = arrow.FixedWidthTypes.Date32
case bigquery.TimeFieldType:
@@ -870,6 +853,11 @@ func buildField(schema *bigquery.FieldSchema, level uint)
(arrow.Field, error) {
}
}
+ // if the field is repeated, then it's a list of the type we just built
+ if schema.Repeated {
+ field.Type = arrow.ListOf(field.Type)
+ }
+
if level == 0 {
metadata["DefaultValueExpression"] =
schema.DefaultValueExpression
}
diff --git a/go/adbc/driver/bigquery/connection_test.go
b/go/adbc/driver/bigquery/connection_test.go
new file mode 100644
index 000000000..e2bc22d2d
--- /dev/null
+++ b/go/adbc/driver/bigquery/connection_test.go
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package bigquery
+
+import (
+ "testing"
+
+ "cloud.google.com/go/bigquery"
+)
+
+func TestBuildField(t *testing.T) {
+ tests := []struct {
+ name string
+ schema *bigquery.FieldSchema
+ expectedTypeStr string
+ expectError bool
+ }{
+ {
+ name: "ArrayOfScalar",
+ schema: &bigquery.FieldSchema{
+ Name: "test_array_scalar_field",
+ Type: bigquery.IntegerFieldType,
+ Repeated: true,
+ Required: false,
+ Description: "Test array field with scalar
type",
+ Schema: nil,
+ },
+ expectedTypeStr: "list<item: int64, nullable>",
+ expectError: false,
+ },
+ {
+ name: "ArrayOfRecordWithMultipleFields",
+ schema: &bigquery.FieldSchema{
+ Name: "test_array_field",
+ Type: bigquery.RecordFieldType,
+ Repeated: true,
+ Required: false,
+ Description: "Test array field with multiple
nested fields",
+ Schema: []*bigquery.FieldSchema{
+ {
+ Name: "field1",
+ Type:
bigquery.StringFieldType,
+ Required: false,
+ },
+ {
+ Name: "field2",
+ Type:
bigquery.IntegerFieldType,
+ Required: false,
+ },
+ },
+ },
+ expectedTypeStr: "list<item: struct<field1: utf8,
field2: int64>, nullable>",
+ expectError: false,
+ },
+ {
+ name: "ArrayOfRecordWithSingleField",
+ schema: &bigquery.FieldSchema{
+ Name: "test_single_array_field",
+ Type: bigquery.RecordFieldType,
+ Repeated: true,
+ Required: false,
+ Description: "Test array field with single
nested field",
+ Schema: []*bigquery.FieldSchema{
+ {
+ Name: "single_field",
+ Type:
bigquery.StringFieldType,
+ Required: false,
+ },
+ },
+ },
+ expectedTypeStr: "list<item: struct<single_field:
utf8>, nullable>",
+ expectError: false,
+ },
+ {
+ name: "NonRepeatedRecord",
+ schema: &bigquery.FieldSchema{
+ Name: "test_struct_field",
+ Type: bigquery.RecordFieldType,
+ Repeated: false,
+ Required: false,
+ Description: "Test struct field with multiple
nested fields",
+ Schema: []*bigquery.FieldSchema{
+ {
+ Name: "nested_string",
+ Type:
bigquery.StringFieldType,
+ Required: false,
+ },
+ {
+ Name: "nested_int",
+ Type:
bigquery.IntegerFieldType,
+ Required: true,
+ },
+ },
+ },
+ expectedTypeStr: "struct<nested_string: utf8,
nested_int: int64>",
+ expectError: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ field, err := buildField(tt.schema, 0)
+
+ if tt.expectError {
+ if err == nil {
+ t.Fatalf("Expected error for test case
%s, but got nil", tt.name)
+ }
+ return
+ }
+
+ if err != nil {
+ t.Fatalf("Expected no error for test case %s,
got: %v", tt.name, err)
+ }
+
+ if field.Name != tt.schema.Name {
+ t.Errorf("Expected field name '%s', got '%s'",
tt.schema.Name, field.Name)
+ }
+
+ typeStr := field.Type.String()
+ if typeStr != tt.expectedTypeStr {
+ t.Errorf("Expected field type string to be
'%s', got '%s'", tt.expectedTypeStr, typeStr)
+ }
+ })
+ }
+}