szehon-ho commented on code in PR #12667: URL: https://github.com/apache/iceberg/pull/12667#discussion_r2049485177
########## api/src/main/java/org/apache/iceberg/geospatial/GeospatialBound.java: ########## @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Comparator; +import java.util.Objects; + +/** + * Represents a geospatial bound (minimum or maximum) for Iceberg tables. + * + * <p>According to the <a href="https://iceberg.apache.org/spec/#bound-serialization">Bound + * serialization section of Iceberg Table spec</a>, geospatial bounds are serialized differently + * from the regular WKB representation. Geometry and geography bounds are single point encoded as a + * concatenation of 8-byte little-endian IEEE 754 coordinate values in the order X, Y, Z (optional), + * M (optional). + * + * <p>The encoding varies based on which coordinates are present: + * + * <ul> + * <li>x:y (2 doubles) when both z and m are unset + * <li>x:y:z (3 doubles) when only m is unset + * <li>x:y:NaN:m (4 doubles) when only z is unset + * <li>x:y:z:m (4 doubles) when all coordinates are set + * </ul> + * + * <p>This class represents a lower or upper geospatial bound and handles serialization and + * deserialization of these bounds to/from byte arrays, conforming to the Iceberg specification. + */ +public class GeospatialBound implements Serializable, Comparable<GeospatialBound> { + /** + * Parses a geospatial bound from a byte buffer according to Iceberg spec. + * + * <p>Based on the buffer size, this method determines which coordinates are present: - 16 bytes + * (2 doubles): x and y only - 24 bytes (3 doubles): x, y, and z - 32 bytes (4 doubles): x, y, z + * (might be NaN), and m + * + * @param buffer the ByteBuffer containing the serialized geospatial bound + * @return a GeospatialBound object representing the parsed bound + * @throws IllegalArgumentException if the buffer has an invalid size + */ + public static GeospatialBound fromByteBuffer(ByteBuffer buffer) { + // Create a duplicate to avoid modifying the original buffer's position and byte order + ByteBuffer tmp = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); Review Comment: wondering, can we just set it back if not the same, in the end? just seems a little wasteful ########## api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Type; + +public class GeospatialPredicateEvaluators { + private GeospatialPredicateEvaluators() {} + + public interface GeospatialPredicateEvaluator { + /** + * Test whether this bounding box intersects with another. + * + * @param bbox1 the first bounding box + * @param bbox2 the second bounding box + * @return true if this box intersects the other box + */ + boolean intersects(GeospatialBoundingBox bbox1, GeospatialBoundingBox bbox2); + } + + public static GeospatialPredicateEvaluator create(Type type) { + switch (type.typeId()) { + case GEOMETRY: + return new GeometryEvaluator(); + case GEOGRAPHY: + return new GeographyEvaluator(); + default: + throw new UnsupportedOperationException( + "Unsupported type for GeospatialBoundingBox: " + type); + } + } + + static class GeometryEvaluator implements GeospatialPredicateEvaluator { + @Override + public boolean intersects(GeospatialBoundingBox bbox1, GeospatialBoundingBox bbox2) { + return intersectsWithWrapAround(bbox1, bbox2); + } + + static boolean intersectsWithWrapAround( + GeospatialBoundingBox bbox1, GeospatialBoundingBox bbox2) { + // Let's check y first, and if y does not intersect, we can return false + if (bbox1.min().y() > bbox2.max().y() || bbox1.max().y() < bbox2.min().y()) { + return false; + } + + // Now check x, need to take wrap-around into account + if (bbox1.min().x() <= bbox1.max().x() && bbox2.min().x() <= bbox2.max().x()) { + // No wrap-around + return bbox1.min().x() <= bbox2.max().x() && bbox1.max().x() >= bbox2.min().x(); + } else if (bbox1.min().x() > bbox1.max().x() && bbox2.min().x() <= bbox2.max().x()) { + // bbox1 wraps around the antimeridian, bbox2 does not + return bbox1.min().x() <= bbox2.max().x() || bbox1.max().x() >= bbox2.min().x(); + } else if (bbox1.min().x() <= bbox1.max().x() && bbox2.min().x() > bbox2.max().x()) { + // bbox2 wraps around the antimeridian, bbox1 does not + return intersectsWithWrapAround(bbox2, bbox1); + } else { + // Both wrap around the antimeridian, they must intersect + return true; + } + } + } + + static class GeographyEvaluator implements GeospatialPredicateEvaluator { + @Override + public boolean intersects(GeospatialBoundingBox bbox1, GeospatialBoundingBox bbox2) { + validateBoundingBox(bbox1); + validateBoundingBox(bbox2); + return GeometryEvaluator.intersectsWithWrapAround(bbox1, bbox2); Review Comment: just checking, we can do intersect for Geography bbox and Geometry bbox the same way? ########## api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Type; + +public class GeospatialPredicateEvaluators { + private GeospatialPredicateEvaluators() {} + + public interface GeospatialPredicateEvaluator { + /** + * Test whether this bounding box intersects with another. + * + * @param bbox1 the first bounding box + * @param bbox2 the second bounding box + * @return true if this box intersects the other box + */ + boolean intersects(GeospatialBoundingBox bbox1, GeospatialBoundingBox bbox2); + } + + public static GeospatialPredicateEvaluator create(Type type) { + switch (type.typeId()) { + case GEOMETRY: + return new GeometryEvaluator(); + case GEOGRAPHY: + return new GeographyEvaluator(); + default: + throw new UnsupportedOperationException( + "Unsupported type for GeospatialBoundingBox: " + type); + } + } + + static class GeometryEvaluator implements GeospatialPredicateEvaluator { + @Override + public boolean intersects(GeospatialBoundingBox bbox1, GeospatialBoundingBox bbox2) { + return intersectsWithWrapAround(bbox1, bbox2); + } + + static boolean intersectsWithWrapAround( Review Comment: probably worth a javadoc comment what this is about ########## api/src/main/java/org/apache/iceberg/geospatial/GeospatialBoundingBox.java: ########## @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.util.Objects; + +/** + * Represents a geospatial bounding box composed of minimum and maximum bounds. + * + * <p>A bounding box (also called a Minimum Bounding Rectangle or MBR) is defined by two points: the + * minimum and maximum coordinates that define the box's corners. This provides a simple + * approximation of a more complex geometry for efficient filtering and data skipping. + */ +public class GeospatialBoundingBox implements Serializable, Comparable<GeospatialBoundingBox> { + public static final GeospatialBoundingBox SANITIZED = Review Comment: i wonder, cant we keep this logic within the ExpressionUtil sanitize code, and not have it pollute this class (to keep the class basic)? ########## api/src/main/java/org/apache/iceberg/geospatial/GeospatialBoundingBox.java: ########## @@ -0,0 +1,111 @@ +/* Review Comment: Nit: BoundingBox for simplicity? (package name already mentions geospatial). Same for other class in package ########## api/src/main/java/org/apache/iceberg/geospatial/GeospatialPredicateEvaluators.java: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.geospatial; + +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Type; + +public class GeospatialPredicateEvaluators { + private GeospatialPredicateEvaluators() {} + + public interface GeospatialPredicateEvaluator { + /** + * Test whether this bounding box intersects with another. + * + * @param bbox1 the first bounding box + * @param bbox2 the second bounding box + * @return true if this box intersects the other box + */ + boolean intersects(GeospatialBoundingBox bbox1, GeospatialBoundingBox bbox2); + } + + public static GeospatialPredicateEvaluator create(Type type) { Review Comment: dont we need to sanity check for understood CRS, Edge parameters, if we can evaluate it? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org