paleolimbot commented on code in PR #1138:
URL: https://github.com/apache/iceberg-go/pull/1138#discussion_r3382621252
##########
table/arrow_utils.go:
##########
@@ -1828,3 +1844,72 @@ func positionDeleteRecordsToDataFilesDV(ctx
context.Context, rootLocation string
}
}
}
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+ if len(meta.CRS) == 0 && meta.CRSType == "" {
+ return "srid:0", nil
+ }
+
+ var crs string
+ if len(meta.CRS) > 0 {
+ if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+ return "", fmt.Errorf("invalid geoarrow CRS metadata:
%w", err)
+ }
+ }
+
+ if crs == "OGC:CRS84" || crs == "EPSG:4326" {
Review Comment:
> EPSG:4326 and OGC:CRS84 aren't the same CRS
They are for the purposes of the GeoArrow and Parquet specifications, which
explicitly define the axis order for these cases
##########
table/arrow_utils_test.go:
##########
@@ -428,6 +429,237 @@ func TestVariantArrowConversion(t *testing.T) {
})
}
+func assertGeoArrowWKB(t *testing.T, dt arrow.DataType, storage
arrow.DataType, want geoarrow.Metadata) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+ assert.Equal(t, want, wkb.Metadata())
+}
+
+func assertGeoArrowWKBMetadataJSON(t *testing.T, dt arrow.DataType, storage
arrow.DataType, wantJSON string) {
+ t.Helper()
+
+ wkb, ok := dt.(*geoarrow.WKBType)
+ require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+ assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+ assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+ assert.Equal(t, wantJSON, wkb.Serialize())
+}
+
+func jsonCRS(s string) json.RawMessage {
+ raw, _ := json.Marshal(s)
+
+ return raw
+}
+
+func TestIcebergGeoTypesToArrowSchema(t *testing.T) {
+ geomSRID, err := iceberg.GeometryTypeOf("srid:4326")
+ require.NoError(t, err)
+ geogKarney, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+ require.NoError(t, err)
+
+ // Note that these tests below are based on arrow-rs tests
(https://github.com/apache/arrow-rs/pull/10065)
+ geomSRID0, err := iceberg.GeometryTypeOf("srid:0")
+ require.NoError(t, err)
+ geomEPSG4267, err := iceberg.GeometryTypeOf("EPSG:4267")
+ require.NoError(t, err)
+
+ geogSpherical, err := iceberg.GeographyTypeOf("OGC:CRS84", "spherical")
+ require.NoError(t, err)
+ geogKarneyDefaultCRS, err := iceberg.GeographyTypeOf("OGC:CRS84",
"karney")
+ require.NoError(t, err)
+ geogVincenty, err := iceberg.GeographyTypeOf("OGC:CRS84", "vincenty")
+ require.NoError(t, err)
+ geogAndoyer, err := iceberg.GeographyTypeOf("OGC:CRS84", "andoyer")
+ require.NoError(t, err)
+ geogThomas, err := iceberg.GeographyTypeOf("OGC:CRS84", "thomas")
+ require.NoError(t, err)
+ geogSRID0, err := iceberg.GeographyTypeOf("srid:0", "spherical")
+ require.NoError(t, err)
+ geogEPSG4267, err := iceberg.GeographyTypeOf("EPSG:4267", "spherical")
+ require.NoError(t, err)
+
+ typeCases := []struct {
+ name string
+ ice iceberg.Type
+ wantMetaJSON string
+ }{
+ // Geometry with default CRS (defaults to OGC:CRS84 per Parquet
spec)
+ {
+ name: "geometry_default_crs",
+ ice: iceberg.GeometryType{},
+ wantMetaJSON: `{"crs":"OGC:CRS84"}`,
+ },
Review Comment:
These test cases look good to me. Thanks!
There should be a separate set of cases for the opposite direction (like in
the PR that I linked), where you start with a metadata string and expect a
specific iceberg type.
##########
table/arrow_utils.go:
##########
@@ -1828,3 +1844,72 @@ func positionDeleteRecordsToDataFilesDV(ctx
context.Context, rootLocation string
}
}
}
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+ if len(meta.CRS) == 0 && meta.CRSType == "" {
+ return "srid:0", nil
+ }
+
+ var crs string
+ if len(meta.CRS) > 0 {
+ if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+ return "", fmt.Errorf("invalid geoarrow CRS metadata:
%w", err)
+ }
+ }
+
+ if crs == "OGC:CRS84" || crs == "EPSG:4326" {
+ return "OGC:CRS84", nil
+ }
+
+ switch meta.CRSType {
+ case geoarrow.CRSTypeSRID:
+ return "srid:" + crs, nil
+ case geoarrow.CRSTypePROJJSON:
+ return "", errors.New("geoarrow CRS type projjson not supported
yet")
+ default:
Review Comment:
The `CRSType`, if it's coming from JSON, is just a hint, is not required,
and is often absent. Here you probably want to:
- Check if crs is a JSON object. If it is, check the `id` member and paste
together `crs["id"]["authority"]`, `:`, and `crs["id"]["code"]`. If any of
those are missing, error for an unsupported CRS>
- Check if crs is a string. If it's shorter than 32 characters, let it
through verbatim. There's no official restriction on allowed characters in
authorities or codes but the length check should reject anything questionable.
##########
table/arrow_utils.go:
##########
@@ -1828,3 +1844,72 @@ func positionDeleteRecordsToDataFilesDV(ctx
context.Context, rootLocation string
}
}
}
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+ if len(meta.CRS) == 0 && meta.CRSType == "" {
+ return "srid:0", nil
+ }
+
+ var crs string
+ if len(meta.CRS) > 0 {
+ if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+ return "", fmt.Errorf("invalid geoarrow CRS metadata:
%w", err)
+ }
+ }
+
+ if crs == "OGC:CRS84" || crs == "EPSG:4326" {
+ return "OGC:CRS84", nil
+ }
+
+ switch meta.CRSType {
+ case geoarrow.CRSTypeSRID:
+ return "srid:" + crs, nil
+ case geoarrow.CRSTypePROJJSON:
+ return "", errors.New("geoarrow CRS type projjson not supported
yet")
+ default:
+ return crs, nil
+ }
+}
+
+func geoArrowMetadataToIcebergType(meta geoarrow.Metadata) (iceberg.Type,
error) {
+ crs, err := geoArrowCRSToIcebergCRS(meta)
+ if err != nil {
+ return nil, err
+ }
+
+ switch meta.Edges {
+ case geoarrow.EdgePlanar:
+ return iceberg.GeometryTypeOf(crs)
+ case geoarrow.EdgeVincenty, geoarrow.EdgeKarney, geoarrow.EdgeThomas,
geoarrow.EdgeAndoyer, geoarrow.EdgeSpherical:
+ return iceberg.GeographyTypeOf(crs, string(meta.Edges))
+ default:
+ return nil, fmt.Errorf("unsupported geoarrow edges %q",
meta.Edges)
+ }
+}
+
+func icebergCRSToGeoArrowMetadata(crs string) geoarrow.Metadata {
+ if strings.HasPrefix(strings.ToLower(crs), "srid:") {
+ id := crs[len("srid:"):]
+
+ if id == "0" {
+ return geoarrow.NewMetadata() // srid:0 maps to omitted
GeoArrow CRS
+ }
+
+ raw, _ := json.Marshal(id)
+
+ return geoarrow.Metadata{
+ CRS: raw,
+ CRSType: geoarrow.CRSTypeSRID,
+ }
+ }
+
+ if strings.HasPrefix(strings.ToLower(crs), "projjson:") {
+ panic(fmt.Errorf("%w: projjson CRS not supported yet",
iceberg.ErrInvalidSchema))
+ }
+
+ raw, _ := json.Marshal(crs)
+
+ return geoarrow.Metadata{
Review Comment:
`"crs_type": "authority_code"` is optional and no consumer actually requires
this, but it is polite to include it (I put this in the Arrow C++ export path).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]