paleolimbot commented on code in PR #1138:
URL: https://github.com/apache/iceberg-go/pull/1138#discussion_r3382621252


##########
table/arrow_utils.go:
##########
@@ -1828,3 +1844,72 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 && meta.CRSType == "" {
+               return "srid:0", nil
+       }
+
+       var crs string
+       if len(meta.CRS) > 0 {
+               if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                       return "", fmt.Errorf("invalid geoarrow CRS metadata: 
%w", err)
+               }
+       }
+
+       if crs == "OGC:CRS84" || crs == "EPSG:4326" {

Review Comment:
   > EPSG:4326 and OGC:CRS84 aren't the same CRS
   
   They are for the purposes of the GeoArrow and Parquet specifications, which 
explicitly define the axis order for these cases



##########
table/arrow_utils_test.go:
##########
@@ -428,6 +429,237 @@ func TestVariantArrowConversion(t *testing.T) {
        })
 }
 
+func assertGeoArrowWKB(t *testing.T, dt arrow.DataType, storage 
arrow.DataType, want geoarrow.Metadata) {
+       t.Helper()
+
+       wkb, ok := dt.(*geoarrow.WKBType)
+       require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+       assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+       assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+       assert.Equal(t, want, wkb.Metadata())
+}
+
+func assertGeoArrowWKBMetadataJSON(t *testing.T, dt arrow.DataType, storage 
arrow.DataType, wantJSON string) {
+       t.Helper()
+
+       wkb, ok := dt.(*geoarrow.WKBType)
+       require.True(t, ok, "expected *geoarrow.WKBType, got %T", dt)
+       assert.Equal(t, "geoarrow.wkb", wkb.ExtensionName())
+       assert.True(t, arrow.TypeEqual(storage, wkb.StorageType()))
+       assert.Equal(t, wantJSON, wkb.Serialize())
+}
+
+func jsonCRS(s string) json.RawMessage {
+       raw, _ := json.Marshal(s)
+
+       return raw
+}
+
+func TestIcebergGeoTypesToArrowSchema(t *testing.T) {
+       geomSRID, err := iceberg.GeometryTypeOf("srid:4326")
+       require.NoError(t, err)
+       geogKarney, err := iceberg.GeographyTypeOf("srid:4269", "karney")
+       require.NoError(t, err)
+
+       // Note that these tests below are based on arrow-rs tests 
(https://github.com/apache/arrow-rs/pull/10065)
+       geomSRID0, err := iceberg.GeometryTypeOf("srid:0")
+       require.NoError(t, err)
+       geomEPSG4267, err := iceberg.GeometryTypeOf("EPSG:4267")
+       require.NoError(t, err)
+
+       geogSpherical, err := iceberg.GeographyTypeOf("OGC:CRS84", "spherical")
+       require.NoError(t, err)
+       geogKarneyDefaultCRS, err := iceberg.GeographyTypeOf("OGC:CRS84", 
"karney")
+       require.NoError(t, err)
+       geogVincenty, err := iceberg.GeographyTypeOf("OGC:CRS84", "vincenty")
+       require.NoError(t, err)
+       geogAndoyer, err := iceberg.GeographyTypeOf("OGC:CRS84", "andoyer")
+       require.NoError(t, err)
+       geogThomas, err := iceberg.GeographyTypeOf("OGC:CRS84", "thomas")
+       require.NoError(t, err)
+       geogSRID0, err := iceberg.GeographyTypeOf("srid:0", "spherical")
+       require.NoError(t, err)
+       geogEPSG4267, err := iceberg.GeographyTypeOf("EPSG:4267", "spherical")
+       require.NoError(t, err)
+
+       typeCases := []struct {
+               name         string
+               ice          iceberg.Type
+               wantMetaJSON string
+       }{
+               // Geometry with default CRS (defaults to OGC:CRS84 per Parquet 
spec)
+               {
+                       name:         "geometry_default_crs",
+                       ice:          iceberg.GeometryType{},
+                       wantMetaJSON: `{"crs":"OGC:CRS84"}`,
+               },

Review Comment:
   These test cases look good to me. Thanks!
   
   There should be a separate set of cases for the opposite direction (like in 
the PR that I linked), where you start with a metadata string and expect a 
specific iceberg type.



##########
table/arrow_utils.go:
##########
@@ -1828,3 +1844,72 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 && meta.CRSType == "" {
+               return "srid:0", nil
+       }
+
+       var crs string
+       if len(meta.CRS) > 0 {
+               if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                       return "", fmt.Errorf("invalid geoarrow CRS metadata: 
%w", err)
+               }
+       }
+
+       if crs == "OGC:CRS84" || crs == "EPSG:4326" {
+               return "OGC:CRS84", nil
+       }
+
+       switch meta.CRSType {
+       case geoarrow.CRSTypeSRID:
+               return "srid:" + crs, nil
+       case geoarrow.CRSTypePROJJSON:
+               return "", errors.New("geoarrow CRS type projjson not supported 
yet")
+       default:

Review Comment:
   The `CRSType`, if it's coming from JSON, is just a hint, is not required, 
and is often absent. Here you probably want to:
   
   - Check if crs is a JSON object. If it is, check the `id` member and paste 
together `crs["id"]["authority"]`, `:`, and `crs["id"]["code"]`. If any of 
those are missing, error for an unsupported CRS>
   - Check if crs is a string. If it's shorter than 32 characters, let it 
through verbatim. There's no official restriction on allowed characters in 
authorities or codes but the length check should reject anything questionable.



##########
table/arrow_utils.go:
##########
@@ -1828,3 +1844,72 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 && meta.CRSType == "" {
+               return "srid:0", nil
+       }
+
+       var crs string
+       if len(meta.CRS) > 0 {
+               if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                       return "", fmt.Errorf("invalid geoarrow CRS metadata: 
%w", err)
+               }
+       }
+
+       if crs == "OGC:CRS84" || crs == "EPSG:4326" {
+               return "OGC:CRS84", nil
+       }
+
+       switch meta.CRSType {
+       case geoarrow.CRSTypeSRID:
+               return "srid:" + crs, nil
+       case geoarrow.CRSTypePROJJSON:
+               return "", errors.New("geoarrow CRS type projjson not supported 
yet")
+       default:
+               return crs, nil
+       }
+}
+
+func geoArrowMetadataToIcebergType(meta geoarrow.Metadata) (iceberg.Type, 
error) {
+       crs, err := geoArrowCRSToIcebergCRS(meta)
+       if err != nil {
+               return nil, err
+       }
+
+       switch meta.Edges {
+       case geoarrow.EdgePlanar:
+               return iceberg.GeometryTypeOf(crs)
+       case geoarrow.EdgeVincenty, geoarrow.EdgeKarney, geoarrow.EdgeThomas, 
geoarrow.EdgeAndoyer, geoarrow.EdgeSpherical:
+               return iceberg.GeographyTypeOf(crs, string(meta.Edges))
+       default:
+               return nil, fmt.Errorf("unsupported geoarrow edges %q", 
meta.Edges)
+       }
+}
+
+func icebergCRSToGeoArrowMetadata(crs string) geoarrow.Metadata {
+       if strings.HasPrefix(strings.ToLower(crs), "srid:") {
+               id := crs[len("srid:"):]
+
+               if id == "0" {
+                       return geoarrow.NewMetadata() // srid:0 maps to omitted 
GeoArrow CRS
+               }
+
+               raw, _ := json.Marshal(id)
+
+               return geoarrow.Metadata{
+                       CRS:     raw,
+                       CRSType: geoarrow.CRSTypeSRID,
+               }
+       }
+
+       if strings.HasPrefix(strings.ToLower(crs), "projjson:") {
+               panic(fmt.Errorf("%w: projjson CRS not supported yet", 
iceberg.ErrInvalidSchema))
+       }
+
+       raw, _ := json.Marshal(crs)
+
+       return geoarrow.Metadata{

Review Comment:
   `"crs_type": "authority_code"` is optional and no consumer actually requires 
this, but it is polite to include it (I put this in the Arrow C++ export path).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to