This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new a41ebdd1954 branch-3.0: [fix](func) Fix precision loss in 
ST_GeometryFromWKB coordinate parsing #46661 (#47264)
a41ebdd1954 is described below

commit a41ebdd195452513f36b4d2bb0ce30e85eff4dae
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Mar 20 14:38:05 2025 +0800

    branch-3.0: [fix](func) Fix precision loss in ST_GeometryFromWKB coordinate 
parsing #46661 (#47264)
    
    Cherry-picked from #46661
    
    Co-authored-by: lw112 <131352377+felixw...@users.noreply.github.com>
---
 be/src/geo/geo_types.cpp                           |   2 +-
 be/src/geo/wkb_parse.cpp                           | 153 ++++++++++++++-------
 be/src/geo/wkb_parse.h                             |  14 +-
 .../data/correctness_p0/test_select_constant.out   | Bin 211 -> 214 bytes
 .../data/nereids_function_p0/scalar_function/S.out | Bin 53552 -> 53624 bytes
 .../data/nereids_p0/test_select_constant.out       | Bin 186 -> 189 bytes
 .../spatial_functions/test_gis_function.out        | Bin 1438 -> 1604 bytes
 .../data/query_p0/test_select_constant.out         | Bin 186 -> 189 bytes
 .../spatial_functions/test_gis_function.groovy     |   6 +
 9 files changed, 126 insertions(+), 49 deletions(-)

diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp
index bee6f69ba8e..dc27595da3b 100644
--- a/be/src/geo/geo_types.cpp
+++ b/be/src/geo/geo_types.cpp
@@ -57,7 +57,7 @@ GeoCircle::~GeoCircle() = default;
 
 void print_s2point(std::ostream& os, const S2Point& point) {
     S2LatLng coord(point);
-    os << std::setprecision(12) << coord.lng().degrees() << " " << 
coord.lat().degrees();
+    os << std::setprecision(15) << coord.lng().degrees() << " " << 
coord.lat().degrees();
 }
 
 static inline bool is_valid_lng_lat(double lng, double lat) {
diff --git a/be/src/geo/wkb_parse.cpp b/be/src/geo/wkb_parse.cpp
index e24328d7564..7b345929fc0 100644
--- a/be/src/geo/wkb_parse.cpp
+++ b/be/src/geo/wkb_parse.cpp
@@ -122,108 +122,169 @@ WkbParseContext* WkbParse::read(std::istream& is, 
WkbParseContext* ctx) {
     auto size = is.tellg();
     is.seekg(0, std::ios::beg);
 
-    std::vector<unsigned char> buf(static_cast<size_t>(size));
-    is.read(reinterpret_cast<char*>(buf.data()), 
static_cast<std::streamsize>(size));
-
-    ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default 
to machine endian
+    // Check if size is valid
+    if (size <= 0) {
+        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+        return ctx;
+    }
 
-    ctx->shape = readGeometry(ctx).release();
+    std::vector<unsigned char> buf(static_cast<size_t>(size));
+    if (!is.read(reinterpret_cast<char*>(buf.data()), 
static_cast<std::streamsize>(size))) {
+        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+        return ctx;
+    }
 
-    if (!ctx->shape) {
+    // Ensure we have at least one byte for byte order
+    if (buf.empty()) {
         ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+        return ctx;
     }
-    return ctx;
-}
 
-std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
-    // determine byte order
-    unsigned char byteOrder = ctx->dis.readByte();
+    // First read the byte order using machine endian
+    auto byteOrder = buf[0];
 
-    // default is machine endian
+    // Create ByteOrderDataInStream with the correct byte order
     if (byteOrder == byteOrder::wkbNDR) {
+        ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
         ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
     } else if (byteOrder == byteOrder::wkbXDR) {
+        ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
         ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG);
+    } else {
+        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+        return ctx;
+    }
+
+    std::unique_ptr<GeoShape> shape = readGeometry(ctx);
+    if (!shape) {
+        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+        return ctx;
     }
 
-    uint32_t typeInt = ctx->dis.readUnsigned();
+    ctx->shape = shape.release();
+    return ctx;
+}
 
-    uint32_t geometryType = (typeInt & 0xffff) % 1000;
+std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
+    try {
+        // Ensure we have enough data to read
+        if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes 
for type
+            return nullptr;
+        }
 
-    std::unique_ptr<GeoShape> shape;
+        // Skip the byte order as we've already handled it
+        ctx->dis.readByte();
 
-    switch (geometryType) {
-    case wkbType::wkbPoint:
-        shape.reset(readPoint(ctx).release());
-        break;
-    case wkbType::wkbLine:
-        shape.reset(readLine(ctx).release());
-        break;
-    case wkbType::wkbPolygon:
-        shape.reset(readPolygon(ctx).release());
-        break;
-    default:
+        uint32_t typeInt = ctx->dis.readUnsigned();
+
+        // Check if geometry has SRID
+        bool has_srid = (typeInt & WKB_SRID_FLAG) != 0;
+
+        // Read SRID if present
+        if (has_srid) {
+            ctx->dis.readUnsigned(); // Read and store SRID if needed
+        }
+
+        // Get the base geometry type
+        uint32_t geometryType = typeInt & WKB_TYPE_MASK;
+
+        std::unique_ptr<GeoShape> shape;
+
+        switch (geometryType) {
+        case wkbType::wkbPoint:
+            shape = readPoint(ctx);
+            break;
+        case wkbType::wkbLine:
+            shape = readLine(ctx);
+            break;
+        case wkbType::wkbPolygon:
+            shape = readPolygon(ctx);
+            break;
+        default:
+            return nullptr;
+        }
+
+        return shape;
+    } catch (...) {
+        // Handle any exceptions from reading operations
         return nullptr;
     }
-    return shape;
 }
 
 std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) {
     GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx);
-    std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
+    if (coords.list.empty()) {
+        return nullptr;
+    }
 
-    if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) {
-        return point;
-    } else {
+    std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
+    if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) {
         return nullptr;
     }
+
+    return point;
 }
 
 std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) {
     uint32_t size = ctx->dis.readUnsigned();
-    minMemSize(wkbLine, size, ctx);
+    if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) {
+        return nullptr;
+    }
 
     GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
-    std::unique_ptr<GeoLine> line = GeoLine::create_unique();
+    if (coords.list.empty()) {
+        return nullptr;
+    }
 
-    if (line->from_coords(coords) == GEO_PARSE_OK) {
-        return line;
-    } else {
+    std::unique_ptr<GeoLine> line = GeoLine::create_unique();
+    if (!line || line->from_coords(coords) != GEO_PARSE_OK) {
         return nullptr;
     }
+
+    return line;
 }
 
 std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) {
     uint32_t num_loops = ctx->dis.readUnsigned();
-    minMemSize(wkbPolygon, num_loops, ctx);
+    if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) {
+        return nullptr;
+    }
+
     GeoCoordinateListList coordss;
-    for (int i = 0; i < num_loops; ++i) {
+    for (uint32_t i = 0; i < num_loops; ++i) {
         uint32_t size = ctx->dis.readUnsigned();
-        GeoCoordinateList* coords = new GeoCoordinateList();
+        if (size < 3) { // A polygon loop must have at least 3 points
+            return nullptr;
+        }
+
+        auto coords = std::make_unique<GeoCoordinateList>();
         *coords = WkbParse::readCoordinateList(size, ctx);
-        coordss.add(coords);
+        if (coords->list.empty()) {
+            return nullptr;
+        }
+        coordss.add(coords.release());
     }
 
     std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique();
-
-    if (polygon->from_coords(coordss) == GEO_PARSE_OK) {
-        return polygon;
-    } else {
+    if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) {
         return nullptr;
     }
+
+    return polygon;
 }
 
 GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* 
ctx) {
     GeoCoordinateList coords;
     for (uint32_t i = 0; i < size; i++) {
-        readCoordinate(ctx);
+        if (!readCoordinate(ctx)) {
+            return GeoCoordinateList();
+        }
         unsigned int j = 0;
         GeoCoordinate coord;
         coord.x = ctx->ordValues[j++];
         coord.y = ctx->ordValues[j++];
         coords.add(coord);
     }
-
     return coords;
 }
 
diff --git a/be/src/geo/wkb_parse.h b/be/src/geo/wkb_parse.h
index e03dddf97a3..de6d0e9b2d9 100644
--- a/be/src/geo/wkb_parse.h
+++ b/be/src/geo/wkb_parse.h
@@ -17,8 +17,7 @@
 
 #pragma once
 
-#include <stdint.h>
-
+#include <cstdint>
 #include <iosfwd>
 #include <memory>
 
@@ -34,6 +33,17 @@ class GeoLine;
 class GeoPoint;
 class GeoPolygon;
 
+// WKB format constants
+// According to OpenGIS Implementation Specification:
+// The high bit of the type value is set to 1 if the WKB contains a SRID.
+// Reference: OpenGIS Implementation Specification for Geographic information 
- Simple feature access - Part 1: Common architecture
+// Bit mask to check if WKB contains SRID
+constexpr uint32_t WKB_SRID_FLAG = 0x20000000;
+
+// The geometry type is stored in the least significant byte of the type value
+// Bit mask to extract the base geometry type
+constexpr uint32_t WKB_TYPE_MASK = 0xFF;
+
 class WkbParse {
 public:
     static GeoParseStatus parse_wkb(std::istream& is, GeoShape** shape);
diff --git a/regression-test/data/correctness_p0/test_select_constant.out 
b/regression-test/data/correctness_p0/test_select_constant.out
index 33c56d3a22c..6737e373d16 100644
Binary files a/regression-test/data/correctness_p0/test_select_constant.out and 
b/regression-test/data/correctness_p0/test_select_constant.out differ
diff --git a/regression-test/data/nereids_function_p0/scalar_function/S.out 
b/regression-test/data/nereids_function_p0/scalar_function/S.out
index 53a5a1639a7..b346ae022a0 100644
Binary files a/regression-test/data/nereids_function_p0/scalar_function/S.out 
and b/regression-test/data/nereids_function_p0/scalar_function/S.out differ
diff --git a/regression-test/data/nereids_p0/test_select_constant.out 
b/regression-test/data/nereids_p0/test_select_constant.out
index cb391ffb121..6faa26295a1 100644
Binary files a/regression-test/data/nereids_p0/test_select_constant.out and 
b/regression-test/data/nereids_p0/test_select_constant.out differ
diff --git 
a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out
 
b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out
index db1b1ffcae5..59bc628249f 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out
 and 
b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out
 differ
diff --git a/regression-test/data/query_p0/test_select_constant.out 
b/regression-test/data/query_p0/test_select_constant.out
index cb391ffb121..6faa26295a1 100644
Binary files a/regression-test/data/query_p0/test_select_constant.out and 
b/regression-test/data/query_p0/test_select_constant.out differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy
 
b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy
index f76cb44cb4a..81eadfb0cc0 100644
--- 
a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy
@@ -70,4 +70,10 @@ suite("test_gis_function", "arrow_flight_sql") {
     qt_sql "SELECT 
ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_GeometryFromText(\"LINESTRING (1 1, 2 
2)\"))));"
     qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON 
((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229 
22.539826,114.106203 22.542680,114.104486 22.547119))\"))));"
 
+    qt_sql "SELECT 
ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));"
+    qt_sql "SELECT ST_X(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1, 1))));"
+    qt_sql "SELECT 
ST_AsText(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1.23456789, 2.34567891))));"
+    qt_sql "SELECT ST_X(ST_Point(0.9999999999999, 1));"
+    qt_sql "SELECT ST_Y(ST_Point(1, 1.0000000000001));"
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to