This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new a41ebdd1954 branch-3.0: [fix](func) Fix precision loss in ST_GeometryFromWKB coordinate parsing #46661 (#47264) a41ebdd1954 is described below commit a41ebdd195452513f36b4d2bb0ce30e85eff4dae Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Thu Mar 20 14:38:05 2025 +0800 branch-3.0: [fix](func) Fix precision loss in ST_GeometryFromWKB coordinate parsing #46661 (#47264) Cherry-picked from #46661 Co-authored-by: lw112 <131352377+felixw...@users.noreply.github.com> --- be/src/geo/geo_types.cpp | 2 +- be/src/geo/wkb_parse.cpp | 153 ++++++++++++++------- be/src/geo/wkb_parse.h | 14 +- .../data/correctness_p0/test_select_constant.out | Bin 211 -> 214 bytes .../data/nereids_function_p0/scalar_function/S.out | Bin 53552 -> 53624 bytes .../data/nereids_p0/test_select_constant.out | Bin 186 -> 189 bytes .../spatial_functions/test_gis_function.out | Bin 1438 -> 1604 bytes .../data/query_p0/test_select_constant.out | Bin 186 -> 189 bytes .../spatial_functions/test_gis_function.groovy | 6 + 9 files changed, 126 insertions(+), 49 deletions(-) diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp index bee6f69ba8e..dc27595da3b 100644 --- a/be/src/geo/geo_types.cpp +++ b/be/src/geo/geo_types.cpp @@ -57,7 +57,7 @@ GeoCircle::~GeoCircle() = default; void print_s2point(std::ostream& os, const S2Point& point) { S2LatLng coord(point); - os << std::setprecision(12) << coord.lng().degrees() << " " << coord.lat().degrees(); + os << std::setprecision(15) << coord.lng().degrees() << " " << coord.lat().degrees(); } static inline bool is_valid_lng_lat(double lng, double lat) { diff --git a/be/src/geo/wkb_parse.cpp b/be/src/geo/wkb_parse.cpp index e24328d7564..7b345929fc0 100644 --- a/be/src/geo/wkb_parse.cpp +++ b/be/src/geo/wkb_parse.cpp @@ -122,108 +122,169 @@ WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { auto size = is.tellg(); is.seekg(0, std::ios::beg); - std::vector<unsigned char> buf(static_cast<size_t>(size)); - is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size)); - - ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default to machine endian + // Check if size is valid + if (size <= 0) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } - ctx->shape = readGeometry(ctx).release(); + std::vector<unsigned char> buf(static_cast<size_t>(size)); + if (!is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size))) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } - if (!ctx->shape) { + // Ensure we have at least one byte for byte order + if (buf.empty()) { ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; } - return ctx; -} -std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) { - // determine byte order - unsigned char byteOrder = ctx->dis.readByte(); + // First read the byte order using machine endian + auto byteOrder = buf[0]; - // default is machine endian + // Create ByteOrderDataInStream with the correct byte order if (byteOrder == byteOrder::wkbNDR) { + ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE); } else if (byteOrder == byteOrder::wkbXDR) { + ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG); + } else { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; + } + + std::unique_ptr<GeoShape> shape = readGeometry(ctx); + if (!shape) { + ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; + return ctx; } - uint32_t typeInt = ctx->dis.readUnsigned(); + ctx->shape = shape.release(); + return ctx; +} - uint32_t geometryType = (typeInt & 0xffff) % 1000; +std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) { + try { + // Ensure we have enough data to read + if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type + return nullptr; + } - std::unique_ptr<GeoShape> shape; + // Skip the byte order as we've already handled it + ctx->dis.readByte(); - switch (geometryType) { - case wkbType::wkbPoint: - shape.reset(readPoint(ctx).release()); - break; - case wkbType::wkbLine: - shape.reset(readLine(ctx).release()); - break; - case wkbType::wkbPolygon: - shape.reset(readPolygon(ctx).release()); - break; - default: + uint32_t typeInt = ctx->dis.readUnsigned(); + + // Check if geometry has SRID + bool has_srid = (typeInt & WKB_SRID_FLAG) != 0; + + // Read SRID if present + if (has_srid) { + ctx->dis.readUnsigned(); // Read and store SRID if needed + } + + // Get the base geometry type + uint32_t geometryType = typeInt & WKB_TYPE_MASK; + + std::unique_ptr<GeoShape> shape; + + switch (geometryType) { + case wkbType::wkbPoint: + shape = readPoint(ctx); + break; + case wkbType::wkbLine: + shape = readLine(ctx); + break; + case wkbType::wkbPolygon: + shape = readPolygon(ctx); + break; + default: + return nullptr; + } + + return shape; + } catch (...) { + // Handle any exceptions from reading operations return nullptr; } - return shape; } std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) { GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx); - std::unique_ptr<GeoPoint> point = GeoPoint::create_unique(); + if (coords.list.empty()) { + return nullptr; + } - if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) { - return point; - } else { + std::unique_ptr<GeoPoint> point = GeoPoint::create_unique(); + if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) { return nullptr; } + + return point; } std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) { uint32_t size = ctx->dis.readUnsigned(); - minMemSize(wkbLine, size, ctx); + if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) { + return nullptr; + } GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx); - std::unique_ptr<GeoLine> line = GeoLine::create_unique(); + if (coords.list.empty()) { + return nullptr; + } - if (line->from_coords(coords) == GEO_PARSE_OK) { - return line; - } else { + std::unique_ptr<GeoLine> line = GeoLine::create_unique(); + if (!line || line->from_coords(coords) != GEO_PARSE_OK) { return nullptr; } + + return line; } std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) { uint32_t num_loops = ctx->dis.readUnsigned(); - minMemSize(wkbPolygon, num_loops, ctx); + if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) { + return nullptr; + } + GeoCoordinateListList coordss; - for (int i = 0; i < num_loops; ++i) { + for (uint32_t i = 0; i < num_loops; ++i) { uint32_t size = ctx->dis.readUnsigned(); - GeoCoordinateList* coords = new GeoCoordinateList(); + if (size < 3) { // A polygon loop must have at least 3 points + return nullptr; + } + + auto coords = std::make_unique<GeoCoordinateList>(); *coords = WkbParse::readCoordinateList(size, ctx); - coordss.add(coords); + if (coords->list.empty()) { + return nullptr; + } + coordss.add(coords.release()); } std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique(); - - if (polygon->from_coords(coordss) == GEO_PARSE_OK) { - return polygon; - } else { + if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) { return nullptr; } + + return polygon; } GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) { GeoCoordinateList coords; for (uint32_t i = 0; i < size; i++) { - readCoordinate(ctx); + if (!readCoordinate(ctx)) { + return GeoCoordinateList(); + } unsigned int j = 0; GeoCoordinate coord; coord.x = ctx->ordValues[j++]; coord.y = ctx->ordValues[j++]; coords.add(coord); } - return coords; } diff --git a/be/src/geo/wkb_parse.h b/be/src/geo/wkb_parse.h index e03dddf97a3..de6d0e9b2d9 100644 --- a/be/src/geo/wkb_parse.h +++ b/be/src/geo/wkb_parse.h @@ -17,8 +17,7 @@ #pragma once -#include <stdint.h> - +#include <cstdint> #include <iosfwd> #include <memory> @@ -34,6 +33,17 @@ class GeoLine; class GeoPoint; class GeoPolygon; +// WKB format constants +// According to OpenGIS Implementation Specification: +// The high bit of the type value is set to 1 if the WKB contains a SRID. +// Reference: OpenGIS Implementation Specification for Geographic information - Simple feature access - Part 1: Common architecture +// Bit mask to check if WKB contains SRID +constexpr uint32_t WKB_SRID_FLAG = 0x20000000; + +// The geometry type is stored in the least significant byte of the type value +// Bit mask to extract the base geometry type +constexpr uint32_t WKB_TYPE_MASK = 0xFF; + class WkbParse { public: static GeoParseStatus parse_wkb(std::istream& is, GeoShape** shape); diff --git a/regression-test/data/correctness_p0/test_select_constant.out b/regression-test/data/correctness_p0/test_select_constant.out index 33c56d3a22c..6737e373d16 100644 Binary files a/regression-test/data/correctness_p0/test_select_constant.out and b/regression-test/data/correctness_p0/test_select_constant.out differ diff --git a/regression-test/data/nereids_function_p0/scalar_function/S.out b/regression-test/data/nereids_function_p0/scalar_function/S.out index 53a5a1639a7..b346ae022a0 100644 Binary files a/regression-test/data/nereids_function_p0/scalar_function/S.out and b/regression-test/data/nereids_function_p0/scalar_function/S.out differ diff --git a/regression-test/data/nereids_p0/test_select_constant.out b/regression-test/data/nereids_p0/test_select_constant.out index cb391ffb121..6faa26295a1 100644 Binary files a/regression-test/data/nereids_p0/test_select_constant.out and b/regression-test/data/nereids_p0/test_select_constant.out differ diff --git a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out index db1b1ffcae5..59bc628249f 100644 Binary files a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out and b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out differ diff --git a/regression-test/data/query_p0/test_select_constant.out b/regression-test/data/query_p0/test_select_constant.out index cb391ffb121..6faa26295a1 100644 Binary files a/regression-test/data/query_p0/test_select_constant.out and b/regression-test/data/query_p0/test_select_constant.out differ diff --git a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy index f76cb44cb4a..81eadfb0cc0 100644 --- a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy @@ -70,4 +70,10 @@ suite("test_gis_function", "arrow_flight_sql") { qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_GeometryFromText(\"LINESTRING (1 1, 2 2)\"))));" qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON ((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229 22.539826,114.106203 22.542680,114.104486 22.547119))\"))));" + qt_sql "SELECT ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));" + qt_sql "SELECT ST_X(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1, 1))));" + qt_sql "SELECT ST_AsText(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1.23456789, 2.34567891))));" + qt_sql "SELECT ST_X(ST_Point(0.9999999999999, 1));" + qt_sql "SELECT ST_Y(ST_Point(1, 1.0000000000001));" + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org