laskoviymishka commented on code in PR #984:
URL: https://github.com/apache/iceberg-go/pull/984#discussion_r3218034750
##########
types.go:
##########
@@ -777,6 +822,144 @@ func (UnknownType) primitive() {}
func (UnknownType) Type() string { return "unknown" }
func (UnknownType) String() string { return "unknown" }
+const defaultGeoCRS = "OGC:CRS84"
+
+type GeometryType struct {
+ crs string
+}
+
+func GeometryTypeOf(crs string) (GeometryType, error) {
+ if crs == "" {
+ return GeometryType{}, fmt.Errorf("%w: invalid CRS: (empty
string)", ErrInvalidTypeString)
+ }
+ if strings.EqualFold(crs, defaultGeoCRS) {
+ return GeometryType{}, nil
+ }
+
+ return GeometryType{crs: crs}, nil
+}
+
+func (g GeometryType) CRS() string {
+ if g.crs == "" {
+ return defaultGeoCRS
+ }
+
+ return g.crs
+}
+
+func (g GeometryType) Equals(other Type) bool {
+ rhs, ok := other.(GeometryType)
+ if !ok {
+ return false
+ }
+
+ return g.crs == rhs.crs
+}
+
+func (GeometryType) primitive() {}
+func (g GeometryType) Type() string {
+ if g.crs == "" {
+ return "geometry"
+ }
+
+ return fmt.Sprintf("geometry(%s)", g.crs)
+}
+
+func (g GeometryType) String() string {
+ return g.Type()
+}
+
+const defaultGeographyAlgorithm = string(geoarrow.EdgeSpherical)
+
+func toGeoArrowEdgeInterpolation(s string) (geoarrow.EdgeInterpolation, error)
{
+ algo :=
geoarrow.EdgeInterpolation(strings.ToLower(strings.TrimSpace(s)))
+ switch algo {
+ case geoarrow.EdgeSpherical, geoarrow.EdgeVincenty,
+ geoarrow.EdgeThomas, geoarrow.EdgeAndoyer, geoarrow.EdgeKarney,
+ geoarrow.EdgePlanar:
Review Comment:
`EdgePlanar` isn't in the Iceberg V3 spec's algorithm set
(`spherical|vincenty|thomas|andoyer|karney`). With it in the allow-list a Go
writer can emit `geography(crs, planar)` that Java and PyIceberg won't accept —
cross-client wire incompatibility baked in at the type-construction boundary.
Drop `geoarrow.EdgePlanar` from the case to stay within spec.
##########
types_test.go:
##########
@@ -479,3 +479,198 @@ func TestPropUInt(t *testing.T) {
assert.Equal(t, uint(77), iceberg.PropUInt(props, "garbage", 77),
"falls back on parse error")
assert.Equal(t, uint(5), iceberg.PropUInt(props, "missing", 5), "falls
back on missing key")
}
+
+func TestGeometryType(t *testing.T) {
+ t.Run("default CRS", func(t *testing.T) {
+ geom := iceberg.GeometryType{}
+ assert.Equal(t, "OGC:CRS84", geom.CRS())
+ assert.Equal(t, "geometry", geom.String())
+ assert.True(t, geom.Equals(iceberg.GeometryType{}))
+ })
+
+ t.Run("custom CRS", func(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf("srid:3857")
+ require.NoError(t, err)
+ assert.Equal(t, "srid:3857", geom.CRS())
+ assert.Equal(t, "geometry(srid:3857)", geom.String())
+ })
+
+ t.Run("CRS normalization", func(t *testing.T) {
+ geom1, err := iceberg.GeometryTypeOf("OGC:CRS84")
+ require.NoError(t, err)
+ geom2 := iceberg.GeometryType{}
+ assert.True(t, geom1.Equals(geom2))
+ assert.Equal(t, "geometry", geom1.String())
+ })
+
+ t.Run("empty CRS error", func(t *testing.T) {
+ _, err := iceberg.GeometryTypeOf("")
+ assert.ErrorContains(t, err, "invalid CRS: (empty string)")
+ })
+
+ t.Run("JSON parsing - default", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type": "geometry",
"required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "OGC:CRS84", geom.CRS())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - custom CRS", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type":
"geometry(srid:4326)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4326", geom.CRS())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - case insensitive", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type":
"Geometry(SRID:4326)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "SRID:4326", geom.CRS())
+ })
+
+ t.Run("JSON parsing - whitespace tolerance", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type": "geometry(
srid:4326 )", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4326", geom.CRS())
+ })
+
+ t.Run("rejects extra argument", func(t *testing.T) {
+ data := `{"id": 1, "name": "loc", "type": "geometry(srid:4326,
extra)", "required": false}`
+ var n iceberg.NestedField
+ err := json.Unmarshal([]byte(data), &n)
+ assert.ErrorIs(t, err, iceberg.ErrInvalidTypeString)
+ })
+}
+
+func TestGeographyType(t *testing.T) {
+ t.Run("default CRS and algorithm", func(t *testing.T) {
+ geog := iceberg.GeographyType{}
+ assert.Equal(t, "OGC:CRS84", geog.CRS())
+ assert.Equal(t, "spherical", geog.Algorithm())
+ assert.Equal(t, "geography", geog.String())
+ assert.True(t, geog.Equals(iceberg.GeographyType{}))
+ })
+
+ t.Run("custom CRS only", func(t *testing.T) {
+ geog, err := iceberg.GeographyTypeOf("srid:4269", "spherical")
+ require.NoError(t, err)
+ assert.Equal(t, "srid:4269", geog.CRS())
+ assert.Equal(t, "spherical", geog.Algorithm())
+ assert.Equal(t, "geography(srid:4269)", geog.String())
+ })
+
+ t.Run("default CRS with algorithm", func(t *testing.T) {
Review Comment:
Won't block this PR on it, but I'd add a one-liner here asserting
`GeographyTypeOf("OGC:CRS84", "spherical").Equals(GeographyType{})` is true.
The canonicalization reads correct, but it's unproven by the current tests, and
that's the contract that prevents fingerprint drift from Java.
##########
table/metadata_builder_internal_test.go:
##########
@@ -1840,6 +1844,110 @@ func TestUnknownTypeValidation(t *testing.T) {
})
}
+func TestGeometryGeographyNullOnlyDefaults(t *testing.T) {
+ testTypes := []struct {
+ name string
+ typ iceberg.Type
+ }{
+ {"geometry", iceberg.GeometryType{}},
+ {"geography", iceberg.GeographyType{}},
+ }
+
+ for _, tt := range testTypes {
+ t.Run(tt.name+" with non-null initial default", func(t
*testing.T) {
+ defaultValue := "POINT(0 0)"
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ InitialDefault: &defaultValue,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "columns must default to
null")
+ require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+ })
+
+ t.Run(tt.name+" with non-null write default", func(t
*testing.T) {
+ defaultValue := "POINT(0 0)"
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ WriteDefault: &defaultValue,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 3)
+ require.Error(t, err)
+ require.ErrorContains(t, err, "columns must default to
null")
+ require.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+ })
+
+ t.Run(tt.name+" with null defaults", func(t *testing.T) {
+ sc := iceberg.NewSchema(0,
+ iceberg.NestedField{
+ Type: tt.typ,
+ ID: 1,
+ Name: "location",
+ Required: false,
+ },
+ )
+
+ err := checkSchemaCompatibility(sc, 3)
+ require.NoError(t, err)
+ })
+
+ t.Run(tt.name+" in v2 with non-null initial default", func(t
*testing.T) {
Review Comment:
Nice-to-have, not blocking. This asserts `"is not supported until v3"` but
the v2 path fires both the type-unsupported error and the must-default-to-null
error — `ErrorContains` happens to match the first. The new exact-count test
below covers the duplicate-output concern, so the structural risk is gone.
Splitting this into two subtests (v2 type-unsupported, v3 must-default-to-null)
would still be cleaner, but the current shape isn't wrong.
##########
types_test.go:
##########
@@ -479,3 +479,198 @@ func TestPropUInt(t *testing.T) {
assert.Equal(t, uint(77), iceberg.PropUInt(props, "garbage", 77),
"falls back on parse error")
assert.Equal(t, uint(5), iceberg.PropUInt(props, "missing", 5), "falls
back on missing key")
}
+
+func TestGeometryType(t *testing.T) {
+ t.Run("default CRS", func(t *testing.T) {
+ geom := iceberg.GeometryType{}
+ assert.Equal(t, "OGC:CRS84", geom.CRS())
+ assert.Equal(t, "geometry", geom.String())
+ assert.True(t, geom.Equals(iceberg.GeometryType{}))
+ })
+
+ t.Run("custom CRS", func(t *testing.T) {
+ geom, err := iceberg.GeometryTypeOf("srid:3857")
+ require.NoError(t, err)
+ assert.Equal(t, "srid:3857", geom.CRS())
+ assert.Equal(t, "geometry(srid:3857)", geom.String())
+ })
+
+ t.Run("CRS normalization", func(t *testing.T) {
+ geom1, err := iceberg.GeometryTypeOf("OGC:CRS84")
+ require.NoError(t, err)
+ geom2 := iceberg.GeometryType{}
+ assert.True(t, geom1.Equals(geom2))
+ assert.Equal(t, "geometry", geom1.String())
+ })
+
+ t.Run("empty CRS error", func(t *testing.T) {
+ _, err := iceberg.GeometryTypeOf("")
+ assert.ErrorContains(t, err, "invalid CRS: (empty string)")
+ })
+
+ t.Run("JSON parsing - default", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type": "geometry",
"required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "OGC:CRS84", geom.CRS())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - custom CRS", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type":
"geometry(srid:4326)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4326", geom.CRS())
+
+ out, err := json.Marshal(n)
+ require.NoError(t, err)
+ assert.JSONEq(t, data, string(out))
+ })
+
+ t.Run("JSON parsing - case insensitive", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type":
"Geometry(SRID:4326)", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "SRID:4326", geom.CRS())
+ })
+
+ t.Run("JSON parsing - whitespace tolerance", func(t *testing.T) {
+ data := `{"id": 1, "name": "location", "type": "geometry(
srid:4326 )", "required": false}`
+ var n iceberg.NestedField
+ require.NoError(t, json.Unmarshal([]byte(data), &n))
+ geom, ok := n.Type.(iceberg.GeometryType)
+ require.True(t, ok)
+ assert.Equal(t, "srid:4326", geom.CRS())
+ })
+
+ t.Run("rejects extra argument", func(t *testing.T) {
+ data := `{"id": 1, "name": "loc", "type": "geometry(srid:4326,
extra)", "required": false}`
+ var n iceberg.NestedField
+ err := json.Unmarshal([]byte(data), &n)
+ assert.ErrorIs(t, err, iceberg.ErrInvalidTypeString)
+ })
+}
+
+func TestGeographyType(t *testing.T) {
Review Comment:
Won't block this PR, but worth a follow-up: a Java-emitted JSON fixture for
a geo schema with non-default CRS + algorithm, parsed here and re-marshaled
byte-compared, would lock down the wire compatibility contract before #991
builds on it. The internal Go round-trips we have today guard against
regressions in our own code but not against drift from Java's canonical form.
##########
table/substrait/substrait.go:
##########
@@ -169,6 +169,10 @@ func (convertToSubstrait) VisitUnknown() types.Type {
// Returning nil indicates this type cannot be converted to Substrait
return nil
}
+func (convertToSubstrait) VisitGeometry(iceberg.GeometryType) types.Type {
return &types.BinaryType{} }
Review Comment:
Pure style nit, won't block on it: `VisitGeometry` is one-line,
`VisitGeography` (line 173) is multi-line. Surrounding methods are uniformly
one-liners.
##########
go.mod:
##########
@@ -142,6 +142,7 @@ require (
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsevents v0.2.0 // indirect
github.com/fvbommel/sortorder v1.1.0 // indirect
+ github.com/geoarrow/geoarrow-go v0.0.0-20260403143023-f54751c3e3a1 //
indirect
Review Comment:
Pseudo-version pin (`v0.0.0-20260403143023-f54751c3e3a1`) is fragile — an
upstream rebase or force-push would break our build.
Worth pinning to a tagged release once `geoarrow-go` cuts one.
I'm ok with that as is, @zeroshade is this fine for you?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]