Re: [PR] fix(infra/testing): Enable iceberg spark interops integration tests on ci pipeline [iceberg-go]

via GitHub Thu, 24 Jul 2025 09:19:44 -0700


laskoviymishka commented on code in PR #493:
URL: https://github.com/apache/iceberg-go/pull/493#discussion_r2228970795



##########
table/transaction_test.go:
##########
@@ -114,21 +137,143 @@ func (s *SparkIntegrationTestSuite) TestAddFile() {
        tbl, err = tx.Commit(s.ctx)
        s.Require().NoError(err)
 
-       spark, err := s.stack.ServiceContainer(s.T().Context(), "spark-iceberg")
+       err = recipe.ExecuteSpark(s.T(), "./validation.py", "--test", 
"TestAddedFile")
+       s.Require().NoError(err)
+}
+
+func (s *SparkIntegrationTestSuite) TestDifferentDataTypes() {
+       icebergSchema := iceberg.NewSchema(0,
+               iceberg.NestedField{ID: 1, Name: "bool", Type: 
iceberg.PrimitiveTypes.Bool},
+               iceberg.NestedField{ID: 2, Name: "string", Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 3, Name: "string_long", Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 4, Name: "int", Type: 
iceberg.PrimitiveTypes.Int32},
+               iceberg.NestedField{ID: 5, Name: "long", Type: 
iceberg.PrimitiveTypes.Int64},
+               iceberg.NestedField{ID: 6, Name: "float", Type: 
iceberg.PrimitiveTypes.Float32},
+               iceberg.NestedField{ID: 7, Name: "double", Type: 
iceberg.PrimitiveTypes.Float64},
+               iceberg.NestedField{ID: 8, Name: "timestamp", Type: 
iceberg.PrimitiveTypes.Timestamp},
+               iceberg.NestedField{ID: 9, Name: "timestamptz", Type: 
iceberg.PrimitiveTypes.TimestampTz},
+               iceberg.NestedField{ID: 10, Name: "date", Type: 
iceberg.PrimitiveTypes.Date},
+               iceberg.NestedField{ID: 11, Name: "uuid", Type: 
iceberg.PrimitiveTypes.UUID},
+               iceberg.NestedField{ID: 12, Name: "binary", Type: 
iceberg.PrimitiveTypes.Binary},
+               iceberg.NestedField{ID: 13, Name: "fixed", Type: 
iceberg.FixedTypeOf(16)},
+               iceberg.NestedField{ID: 14, Name: "small_dec", Type: 
iceberg.DecimalTypeOf(8, 2)},
+               iceberg.NestedField{ID: 15, Name: "med_dec", Type: 
iceberg.DecimalTypeOf(16, 2)},
+               iceberg.NestedField{ID: 16, Name: "large_dec", Type: 
iceberg.DecimalTypeOf(24, 2)},
+               iceberg.NestedField{ID: 17, Name: "list", Type: 
&iceberg.ListType{
+                       ElementID: 18,
+                       Element:   iceberg.PrimitiveTypes.Int32},
+               },
+       )
+
+       arrowSchema, err := table.SchemaToArrowSchema(icebergSchema, nil, true, 
false)
+       s.Require().NoError(err)
+
+       arrTable, err := array.TableFromJSON(memory.DefaultAllocator, 
arrowSchema, []string{
+               `[
+                       {
+                               "bool": false,
+                               "string": "a",
+                               "string_long": "` + strings.Repeat("a", 22) + 
`",
+                               "int": 1,
+                               "long": 1,
+                               "float": 0.0,
+                               "double": 0.0,
+                               "timestamp": "2023-01-01T19:25:00.000000+08:00",
+                               "timestamptz": "2023-01-01T19:25:00.000000Z",
+                               "date": "2023-01-01",
+                               "uuid": "00000000-0000-0000-0000-000000000000",
+                               "binary": "AQ==",
+                               "fixed": "AAAAAAAAAAAAAAAAAAAAAA==",
+                               "small_dec": "123456.78",
+                               "med_dec": "12345678901234.56",
+                               "large_dec": "1234567890123456789012.34",
+                               "list": [1, 2, 3]
+                       },
+                       {
+                               "bool": null,
+                               "string": null,
+                               "string_long": null,
+                               "int": null,
+                               "long": null,
+                               "float": null,
+                               "double": null,
+                               "timestamp": null,
+                               "timestamptz": null,
+                               "date": null,
+                               "uuid": null,
+                               "binary": null,
+                               "fixed": null,
+                               "small_dec": null,
+                               "med_dec": null,
+                               "large_dec": null,
+                               "list": null
+                       },
+                       {
+                               "bool": true,
+                               "string": "z",
+                               "string_long": "` + strings.Repeat("z", 22) + 
`",
+                               "int": 9,
+                               "long": 9,
+                               "float": 0.9,
+                               "double": 0.9,
+                               "timestamp": "2023-03-01T19:25:00.000000+08:00",
+                               "timestamptz": "2023-03-01T19:25:00.000000Z",
+                               "date": "2023-03-01",
+                               "uuid": "11111111-1111-1111-1111-111111111111",
+                               "binary": "Eg==",
+                               "fixed": "EREREREREREREREREREREQ==",
+                               "small_dec": "876543.21",
+                               "med_dec": "65432109876543.21",
+                               "large_dec": "4321098765432109876543.21",
+                               "list": [-1, -2, -3]
+                       }
+                ]`,
+       })
+       s.Require().NoError(err)
+       defer arrTable.Release()
+
+       tbl, err := s.cat.CreateTable(s.ctx, catalog.ToIdentifier("default", 
"go_test_different_data_types"), icebergSchema)
+       s.Require().NoError(err)
+
+       tx := tbl.NewTransaction()
+       err = tx.AppendTable(s.ctx, arrTable, 1, nil)
+       s.Require().NoError(err)
+
+       _, err = tx.Commit(s.ctx)
        s.Require().NoError(err)
 
-       _, stdout, err := spark.Exec(s.ctx, []string{"ipython", 
"./run_spark_count_sql.py"})
+       err = recipe.ExecuteSpark(s.T(), "./validation.py", "--test", 
"TestReadDifferentDataTypes")
+       s.Require().NoError(err)
+}
+
+func (s *SparkIntegrationTestSuite) TestUpdateSpec() {
+       icebergSchema := iceberg.NewSchema(0,
+               iceberg.NestedField{ID: 1, Name: "foo", Type: 
iceberg.PrimitiveTypes.Bool},
+               iceberg.NestedField{ID: 2, Name: "bar", Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 3, Name: "baz", Type: 
iceberg.PrimitiveTypes.Int32},
+       )
+
+       partitionSpec := iceberg.NewPartitionSpec(
+               iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: 
iceberg.TruncateTransform{Width: 5}, Name: "bar_truncate"},
+       )
+
+       tbl, err := s.cat.CreateTable(
+               s.ctx,
+               catalog.ToIdentifier("default", "go_test_update_spec"),
+               icebergSchema,
+               catalog.WithPartitionSpec(&partitionSpec),
+       )
+       s.Require().NoError(err)
+
+       tx := tbl.NewTransaction()
+       err = tx.UpdateSpec(false).
+               AddField("baz", iceberg.BucketTransform{NumBuckets: 3}, "").
+               Commit()
        s.Require().NoError(err)
+       _, err = tx.Commit(s.ctx)
 
-       output, err := io.ReadAll(stdout)
+       err = recipe.ExecuteSpark(s.T(), "./validation.py", "--test", 
"TestReadSpecUpdate")

Review Comment:
   This brings actual assertions from the Go code into Python, which isn’t 
ideal for debuggability.
   Can we move as much of the verification as possible into the actual tests 
instead?



##########
table/transaction_test.go:
##########
@@ -114,21 +137,143 @@ func (s *SparkIntegrationTestSuite) TestAddFile() {
        tbl, err = tx.Commit(s.ctx)
        s.Require().NoError(err)
 
-       spark, err := s.stack.ServiceContainer(s.T().Context(), "spark-iceberg")
+       err = recipe.ExecuteSpark(s.T(), "./validation.py", "--test", 
"TestAddedFile")
+       s.Require().NoError(err)
+}
+
+func (s *SparkIntegrationTestSuite) TestDifferentDataTypes() {
+       icebergSchema := iceberg.NewSchema(0,
+               iceberg.NestedField{ID: 1, Name: "bool", Type: 
iceberg.PrimitiveTypes.Bool},
+               iceberg.NestedField{ID: 2, Name: "string", Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 3, Name: "string_long", Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 4, Name: "int", Type: 
iceberg.PrimitiveTypes.Int32},
+               iceberg.NestedField{ID: 5, Name: "long", Type: 
iceberg.PrimitiveTypes.Int64},
+               iceberg.NestedField{ID: 6, Name: "float", Type: 
iceberg.PrimitiveTypes.Float32},
+               iceberg.NestedField{ID: 7, Name: "double", Type: 
iceberg.PrimitiveTypes.Float64},
+               iceberg.NestedField{ID: 8, Name: "timestamp", Type: 
iceberg.PrimitiveTypes.Timestamp},
+               iceberg.NestedField{ID: 9, Name: "timestamptz", Type: 
iceberg.PrimitiveTypes.TimestampTz},
+               iceberg.NestedField{ID: 10, Name: "date", Type: 
iceberg.PrimitiveTypes.Date},
+               iceberg.NestedField{ID: 11, Name: "uuid", Type: 
iceberg.PrimitiveTypes.UUID},
+               iceberg.NestedField{ID: 12, Name: "binary", Type: 
iceberg.PrimitiveTypes.Binary},
+               iceberg.NestedField{ID: 13, Name: "fixed", Type: 
iceberg.FixedTypeOf(16)},
+               iceberg.NestedField{ID: 14, Name: "small_dec", Type: 
iceberg.DecimalTypeOf(8, 2)},
+               iceberg.NestedField{ID: 15, Name: "med_dec", Type: 
iceberg.DecimalTypeOf(16, 2)},
+               iceberg.NestedField{ID: 16, Name: "large_dec", Type: 
iceberg.DecimalTypeOf(24, 2)},
+               iceberg.NestedField{ID: 17, Name: "list", Type: 
&iceberg.ListType{
+                       ElementID: 18,
+                       Element:   iceberg.PrimitiveTypes.Int32},
+               },
+       )
+
+       arrowSchema, err := table.SchemaToArrowSchema(icebergSchema, nil, true, 
false)
+       s.Require().NoError(err)
+
+       arrTable, err := array.TableFromJSON(memory.DefaultAllocator, 
arrowSchema, []string{
+               `[
+                       {
+                               "bool": false,
+                               "string": "a",
+                               "string_long": "` + strings.Repeat("a", 22) + 
`",
+                               "int": 1,
+                               "long": 1,
+                               "float": 0.0,
+                               "double": 0.0,
+                               "timestamp": "2023-01-01T19:25:00.000000+08:00",
+                               "timestamptz": "2023-01-01T19:25:00.000000Z",
+                               "date": "2023-01-01",
+                               "uuid": "00000000-0000-0000-0000-000000000000",
+                               "binary": "AQ==",
+                               "fixed": "AAAAAAAAAAAAAAAAAAAAAA==",
+                               "small_dec": "123456.78",
+                               "med_dec": "12345678901234.56",
+                               "large_dec": "1234567890123456789012.34",
+                               "list": [1, 2, 3]
+                       },
+                       {
+                               "bool": null,
+                               "string": null,
+                               "string_long": null,
+                               "int": null,
+                               "long": null,
+                               "float": null,
+                               "double": null,
+                               "timestamp": null,
+                               "timestamptz": null,
+                               "date": null,
+                               "uuid": null,
+                               "binary": null,
+                               "fixed": null,
+                               "small_dec": null,
+                               "med_dec": null,
+                               "large_dec": null,
+                               "list": null
+                       },
+                       {
+                               "bool": true,
+                               "string": "z",
+                               "string_long": "` + strings.Repeat("z", 22) + 
`",
+                               "int": 9,
+                               "long": 9,
+                               "float": 0.9,
+                               "double": 0.9,
+                               "timestamp": "2023-03-01T19:25:00.000000+08:00",
+                               "timestamptz": "2023-03-01T19:25:00.000000Z",
+                               "date": "2023-03-01",
+                               "uuid": "11111111-1111-1111-1111-111111111111",
+                               "binary": "Eg==",
+                               "fixed": "EREREREREREREREREREREQ==",
+                               "small_dec": "876543.21",
+                               "med_dec": "65432109876543.21",
+                               "large_dec": "4321098765432109876543.21",
+                               "list": [-1, -2, -3]
+                       }
+                ]`,
+       })
+       s.Require().NoError(err)
+       defer arrTable.Release()
+
+       tbl, err := s.cat.CreateTable(s.ctx, catalog.ToIdentifier("default", 
"go_test_different_data_types"), icebergSchema)
+       s.Require().NoError(err)
+
+       tx := tbl.NewTransaction()
+       err = tx.AppendTable(s.ctx, arrTable, 1, nil)
+       s.Require().NoError(err)
+
+       _, err = tx.Commit(s.ctx)
        s.Require().NoError(err)
 
-       _, stdout, err := spark.Exec(s.ctx, []string{"ipython", 
"./run_spark_count_sql.py"})
+       err = recipe.ExecuteSpark(s.T(), "./validation.py", "--test", 
"TestReadDifferentDataTypes")
+       s.Require().NoError(err)
+}
+
+func (s *SparkIntegrationTestSuite) TestUpdateSpec() {
+       icebergSchema := iceberg.NewSchema(0,
+               iceberg.NestedField{ID: 1, Name: "foo", Type: 
iceberg.PrimitiveTypes.Bool},
+               iceberg.NestedField{ID: 2, Name: "bar", Type: 
iceberg.PrimitiveTypes.String},
+               iceberg.NestedField{ID: 3, Name: "baz", Type: 
iceberg.PrimitiveTypes.Int32},
+       )
+
+       partitionSpec := iceberg.NewPartitionSpec(
+               iceberg.PartitionField{SourceID: 2, FieldID: 1000, Transform: 
iceberg.TruncateTransform{Width: 5}, Name: "bar_truncate"},
+       )
+
+       tbl, err := s.cat.CreateTable(
+               s.ctx,
+               catalog.ToIdentifier("default", "go_test_update_spec"),
+               icebergSchema,
+               catalog.WithPartitionSpec(&partitionSpec),
+       )
+       s.Require().NoError(err)
+
+       tx := tbl.NewTransaction()
+       err = tx.UpdateSpec(false).
+               AddField("baz", iceberg.BucketTransform{NumBuckets: 3}, "").
+               Commit()
        s.Require().NoError(err)
+       _, err = tx.Commit(s.ctx)
 
-       output, err := io.ReadAll(stdout)
+       err = recipe.ExecuteSpark(s.T(), "./validation.py", "--test", 
"TestReadSpecUpdate")
        s.Require().NoError(err)
-       strings.HasSuffix(string(output), `

Review Comment:
   Why not verify output?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Re: [PR] fix(infra/testing): Enable iceberg spark interops integration tests on ci pipeline [iceberg-go]

Reply via email to