paleolimbot commented on code in PR #1138:
URL: https://github.com/apache/iceberg-go/pull/1138#discussion_r3375449928


##########
table/arrow_utils.go:
##########
@@ -1788,3 +1803,63 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 && meta.CRSType == "" {
+               return "OGC:CRS84", nil
+       }
+
+       var crs string
+       if len(meta.CRS) > 0 {
+               if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                       return "", fmt.Errorf("invalid geoarrow CRS metadata: 
%w", err)
+               }
+       }
+
+       switch meta.CRSType {
+       case geoarrow.CRSTypeSRID:
+               return "srid:" + crs, nil
+       case geoarrow.CRSTypeAuthorityCode:
+               return crs, nil
+       default:
+               return "", fmt.Errorf("unsupported geoarrow CRS type %q", 
meta.CRSType)
+       }
+}
+
+func geoArrowMetadataToIcebergType(meta geoarrow.Metadata) (iceberg.Type, 
error) {
+       crs, err := geoArrowCRSToIcebergCRS(meta)
+       if err != nil {
+               return nil, err
+       }
+
+       switch meta.Edges {
+       case geoarrow.EdgePlanar:
+               return iceberg.GeometryTypeOf(crs)
+       case geoarrow.EdgeVincenty, geoarrow.EdgeKarney, geoarrow.EdgeThomas, 
geoarrow.EdgeAndoyer, geoarrow.EdgeSpherical:
+               return iceberg.GeographyTypeOf(crs, string(meta.Edges))
+       default:
+               return nil, fmt.Errorf("unsupported geoarrow edges %q", 
meta.Edges)
+       }
+}
+
+func icebergCRSToGeoArrowMetadata(crs string) geoarrow.Metadata {
+       if crs == "OGC:CRS84" {
+               return geoarrow.NewMetadata()
+       }

Review Comment:
   This is reversed I think...if the iceberg CRS is `""`, the GeoArrow CRS is 
`"OGC:CRS84"` (unless `NewMetadata()`.



##########
table/arrow_utils.go:
##########
@@ -1788,3 +1803,63 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 && meta.CRSType == "" {
+               return "OGC:CRS84", nil
+       }
+
+       var crs string
+       if len(meta.CRS) > 0 {
+               if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                       return "", fmt.Errorf("invalid geoarrow CRS metadata: 
%w", err)
+               }
+       }
+
+       switch meta.CRSType {
+       case geoarrow.CRSTypeSRID:
+               return "srid:" + crs, nil
+       case geoarrow.CRSTypeAuthorityCode:
+               return crs, nil
+       default:
+               return "", fmt.Errorf("unsupported geoarrow CRS type %q", 
meta.CRSType)
+       }
+}
+
+func geoArrowMetadataToIcebergType(meta geoarrow.Metadata) (iceberg.Type, 
error) {
+       crs, err := geoArrowCRSToIcebergCRS(meta)
+       if err != nil {
+               return nil, err
+       }
+
+       switch meta.Edges {
+       case geoarrow.EdgePlanar:
+               return iceberg.GeometryTypeOf(crs)
+       case geoarrow.EdgeVincenty, geoarrow.EdgeKarney, geoarrow.EdgeThomas, 
geoarrow.EdgeAndoyer, geoarrow.EdgeSpherical:
+               return iceberg.GeographyTypeOf(crs, string(meta.Edges))
+       default:
+               return nil, fmt.Errorf("unsupported geoarrow edges %q", 
meta.Edges)
+       }
+}
+
+func icebergCRSToGeoArrowMetadata(crs string) geoarrow.Metadata {
+       if crs == "OGC:CRS84" {
+               return geoarrow.NewMetadata()
+       }
+
+       if strings.HasPrefix(strings.ToLower(crs), "srid:") {
+               id := crs[len("srid:"):]
+               raw, _ := json.Marshal(id)
+
+               return geoarrow.Metadata{
+                       CRS:     raw,
+                       CRSType: geoarrow.CRSTypeSRID,
+               }
+       }

Review Comment:
   There is one special case here: `"srid:0"` maps to an omitted GeoArrow CRS.



##########
table/arrow_utils.go:
##########
@@ -1788,3 +1803,63 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 && meta.CRSType == "" {
+               return "OGC:CRS84", nil
+       }
+
+       var crs string
+       if len(meta.CRS) > 0 {
+               if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                       return "", fmt.Errorf("invalid geoarrow CRS metadata: 
%w", err)
+               }
+       }
+
+       switch meta.CRSType {
+       case geoarrow.CRSTypeSRID:
+               return "srid:" + crs, nil
+       case geoarrow.CRSTypeAuthorityCode:
+               return crs, nil
+       default:
+               return "", fmt.Errorf("unsupported geoarrow CRS type %q", 
meta.CRSType)
+       }

Review Comment:
   If the CRSType is PROJJSON, this is a important case to handle (because most 
GeoArrow producers produce that). You can convert most of these to 
authority:code by looking for the `id` member that looks like this `"crs": 
{..., "id":{"authority": "OGC", "code": "CRS84"}` or this `"crs": {..., 
"id":{"authority": "EPSG", "code": 3857}`. It would also be a good idea to 
convert the two "lonlat" CRSes ("EPSG:4326" and "OGC:CRS84") to the Iceberg 
"default" canonically.
   
   When you can't extract an authority:code, this would need to be written to a 
table property and written to the CRS field as (`projjson:<the table property 
name>`). Probably easier to error for that case for now.



##########
table/arrow_utils.go:
##########
@@ -1788,3 +1803,63 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 && meta.CRSType == "" {
+               return "OGC:CRS84", nil
+       }
+
+       var crs string
+       if len(meta.CRS) > 0 {
+               if err := json.Unmarshal(meta.CRS, &crs); err != nil {
+                       return "", fmt.Errorf("invalid geoarrow CRS metadata: 
%w", err)
+               }
+       }
+
+       switch meta.CRSType {
+       case geoarrow.CRSTypeSRID:
+               return "srid:" + crs, nil
+       case geoarrow.CRSTypeAuthorityCode:
+               return crs, nil
+       default:
+               return "", fmt.Errorf("unsupported geoarrow CRS type %q", 
meta.CRSType)
+       }
+}
+
+func geoArrowMetadataToIcebergType(meta geoarrow.Metadata) (iceberg.Type, 
error) {
+       crs, err := geoArrowCRSToIcebergCRS(meta)
+       if err != nil {
+               return nil, err
+       }
+
+       switch meta.Edges {
+       case geoarrow.EdgePlanar:
+               return iceberg.GeometryTypeOf(crs)
+       case geoarrow.EdgeVincenty, geoarrow.EdgeKarney, geoarrow.EdgeThomas, 
geoarrow.EdgeAndoyer, geoarrow.EdgeSpherical:
+               return iceberg.GeographyTypeOf(crs, string(meta.Edges))
+       default:
+               return nil, fmt.Errorf("unsupported geoarrow edges %q", 
meta.Edges)
+       }
+}
+
+func icebergCRSToGeoArrowMetadata(crs string) geoarrow.Metadata {
+       if crs == "OGC:CRS84" {
+               return geoarrow.NewMetadata()
+       }
+
+       if strings.HasPrefix(strings.ToLower(crs), "srid:") {
+               id := crs[len("srid:"):]
+               raw, _ := json.Marshal(id)
+
+               return geoarrow.Metadata{
+                       CRS:     raw,
+                       CRSType: geoarrow.CRSTypeSRID,
+               }
+       }
+       raw, _ := json.Marshal(crs)
+
+       return geoarrow.Metadata{
+               CRS:     raw,
+               CRSType: geoarrow.CRSTypeAuthorityCode,
+       }

Review Comment:
   I believe there is one special case here: if the Iceberg CRS is 
`projjson:<table property>`, the CRS is the JSON object in that table property 
field. You should probably either support that or error with a note saying that 
it's not supported.



##########
table/arrow_utils.go:
##########
@@ -1788,3 +1803,63 @@ func positionDeleteRecordsToDataFilesDV(ctx 
context.Context, rootLocation string
                }
        }
 }
+
+func geoArrowCRSToIcebergCRS(meta geoarrow.Metadata) (string, error) {
+       if len(meta.CRS) == 0 && meta.CRSType == "" {
+               return "OGC:CRS84", nil
+       }

Review Comment:
   If the GeoArrow CRS is omitted from the extension metadata, the Parquet 
equivalent is `"srid:0"`. If the GeoArrow CRS is `"OGC:CRS84"` or 
`"EPSG:4326"`, the canonical Parquet CRS is "omitted" (i.e., default). I 
believe the Parquet and Iceberg CRS definitions are in sync now but it's worth 
double checking.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to