github-actions[bot] commented on code in PR #38277:
URL: https://github.com/apache/doris/pull/38277#discussion_r1689024588


##########
be/src/vec/exec/format/parquet/parquet_common.h:
##########
@@ -17,10 +17,12 @@
 
 #pragma once
 
+#include <gen_cpp/parquet_types.h>

Review Comment:
   warning: 'gen_cpp/parquet_types.h' file not found [clang-diagnostic-error]
   ```cpp
   #include <gen_cpp/parquet_types.h>
            ^
   ```
   



##########
be/src/vec/exec/format/parquet/parquet_common.h:
##########
@@ -156,4 +158,303 @@
     size_t _num_filtered;
     size_t _read_index;
 };
-} // namespace doris::vectorized
+
+enum class ColumnOrderName { UNDEFINED, TYPE_DEFINED_ORDER };
+
+enum class SortOrder { SIGNED, UNSIGNED, UNKNOWN };
+
+class ParsedVersion {
+public:
+    ParsedVersion(std::string application, std::optional<std::string> version,
+                  std::optional<std::string> appBuildHash)
+            : application(std::move(application)),
+              version(std::move(version)),
+              appBuildHash(std::move(appBuildHash)) {}
+
+    bool operator==(const ParsedVersion& other) const {
+        return application == other.application && version == other.version &&
+               appBuildHash == other.appBuildHash;
+    }
+
+    bool operator!=(const ParsedVersion& other) const { return !(*this == 
other); }
+
+    size_t hash() const {
+        std::hash<std::string> hasher;
+        return hasher(application) ^ (version ? hasher(*version) : 0) ^
+               (appBuildHash ? hasher(*appBuildHash) : 0);
+    }
+
+    std::string toString() const {
+        return "ParsedVersion(application=" + application +
+               ", semver=" + (version ? *version : "null") +
+               ", appBuildHash=" + (appBuildHash ? *appBuildHash : "null") + 
")";
+    }
+
+public:

Review Comment:
   warning: redundant access specifier has the same accessibility as the 
previous access specifier [readability-redundant-access-specifiers]
   
   ```suggestion
   
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/vec/exec/format/parquet/parquet_common.h:166:** previously declared 
here
   ```cpp
   public:
   ^
   ```
   
   </details>
   



##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -142,24 +141,65 @@
         break;
             FOR_REINTERPRET_TYPES(DISPATCH)
 #undef DISPATCH
+        case TYPE_FLOAT:
+            if constexpr (std::is_same_v<CppType, float>) {
+                if (col_schema->physical_type != tparquet::Type::FLOAT) return 
false;
+                min_value = *reinterpret_cast<const 
CppType*>(encoded_min.data());
+                max_value = *reinterpret_cast<const 
CppType*>(encoded_max.data());
+                if (std::isnan(min_value) || std::isnan(max_value)) {
+                    return false;
+                }
+                // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 
values would be skipped
+                if (std::signbit(min_value) == 0 && min_value == 0.0f) {
+                    min_value = -0.0f;
+                }
+                if (std::signbit(max_value) != 0 && max_value == -0.0f) {
+                    max_value = 0.0f;

Review Comment:
   warning: floating point literal has suffix 'f', which is not uppercase 
[readability-uppercase-literal-suffix]
   
   ```suggestion
                       max_value = 0.0F;
   ```
   



##########
be/src/vec/exec/format/parquet/parquet_common.h:
##########
@@ -156,4 +158,303 @@
     size_t _num_filtered;
     size_t _read_index;
 };
-} // namespace doris::vectorized
+
+enum class ColumnOrderName { UNDEFINED, TYPE_DEFINED_ORDER };
+
+enum class SortOrder { SIGNED, UNSIGNED, UNKNOWN };
+
+class ParsedVersion {
+public:
+    ParsedVersion(std::string application, std::optional<std::string> version,
+                  std::optional<std::string> appBuildHash)
+            : application(std::move(application)),
+              version(std::move(version)),
+              appBuildHash(std::move(appBuildHash)) {}
+
+    bool operator==(const ParsedVersion& other) const {
+        return application == other.application && version == other.version &&
+               appBuildHash == other.appBuildHash;
+    }
+
+    bool operator!=(const ParsedVersion& other) const { return !(*this == 
other); }
+
+    size_t hash() const {
+        std::hash<std::string> hasher;
+        return hasher(application) ^ (version ? hasher(*version) : 0) ^
+               (appBuildHash ? hasher(*appBuildHash) : 0);
+    }
+
+    std::string toString() const {
+        return "ParsedVersion(application=" + application +
+               ", semver=" + (version ? *version : "null") +
+               ", appBuildHash=" + (appBuildHash ? *appBuildHash : "null") + 
")";
+    }
+
+public:
+    std::string application;
+    std::optional<std::string> version;
+    std::optional<std::string> appBuildHash;
+};
+
+class VersionParser {
+public:
+    static Status parse(const std::string& createdBy,
+                        std::unique_ptr<ParsedVersion>* parsedVersion) {
+        static const std::string FORMAT =
+                
"(.*?)\\s+version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?";
+        static const std::regex PATTERN(FORMAT);
+
+        std::smatch matcher;
+        if (!std::regex_match(createdBy, matcher, PATTERN)) {
+            return Status::InternalError(fmt::format(
+                    "Could not parse created_by: {}, using format: {}", 
createdBy, FORMAT));
+        }
+
+        std::string application = matcher[1].str();
+        if (application.empty()) {
+            return Status::InternalError("application cannot be null or 
empty");
+        }
+        std::optional<std::string> semver = matcher[2].str().empty()
+                                                    ? std::nullopt
+                                                    : 
std::optional<std::string>(matcher[2].str());
+        std::optional<std::string> appBuildHash =
+                matcher[3].str().empty() ? std::nullopt
+                                         : 
std::optional<std::string>(matcher[3].str());
+        *parsedVersion = std::make_unique<ParsedVersion>(application, semver, 
appBuildHash);
+        return Status::OK();
+    }
+};
+
+class SemanticVersion {
+public:
+    SemanticVersion(int major, int minor, int patch)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(false),
+              _unknown(std::nullopt),
+              _pre(std::nullopt),
+              _build_info(std::nullopt) {}
+
+#ifdef BE_TEST
+    SemanticVersion(int major, int minor, int patch, bool has_unknown)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(has_unknown),
+              _unknown(std::nullopt),
+              _pre(std::nullopt),
+              _build_info(std::nullopt) {}
+#endif
+
+    SemanticVersion(int major, int minor, int patch, 
std::optional<std::string> unknown,
+                    std::optional<std::string> pre, std::optional<std::string> 
build_info)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(unknown.has_value() && !unknown.value().empty()),
+              _unknown(std::move(unknown)),
+              _pre(pre.has_value() ? 
std::optional<Prerelease>(Prerelease(std::move(pre.value())))
+                                   : std::nullopt),
+              _build_info(std::move(build_info)) {}
+
+    static Status parse(const std::string& version,
+                        std::unique_ptr<SemanticVersion>* semantic_version) {
+        static const std::regex pattern(
+                R"(^(\d+)\.(\d+)\.(\d+)([^-+]*)?(?:-([^+]*))?(?:\+(.*))?$)");
+        std::smatch match;
+
+        if (!std::regex_match(version, match, pattern)) {
+            return Status::InternalError(version + " does not match format");
+        }
+
+        int major = std::stoi(match[1].str());
+        int minor = std::stoi(match[2].str());
+        int patch = std::stoi(match[3].str());
+        std::optional<std::string> unknown =
+                match[4].str().empty() ? std::nullopt : 
std::optional<std::string>(match[4].str());
+        std::optional<std::string> prerelease =
+                match[5].str().empty() ? std::nullopt : 
std::optional<std::string>(match[5].str());
+        std::optional<std::string> build_info =
+                match[6].str().empty() ? std::nullopt : 
std::optional<std::string>(match[6].str());
+        if (major < 0 || minor < 0 || patch < 0) {
+            return Status::InternalError("major({}), minor({}), and patch({}) 
must all be >= 0",
+                                         major, minor, patch);
+        }
+        *semantic_version = std::make_unique<SemanticVersion>(major, minor, 
patch, unknown,
+                                                              prerelease, 
build_info);
+        return Status::OK();
+    }
+
+    int compareTo(const SemanticVersion& other) const {
+        if (int cmp = compareIntegers(_major, other._major); cmp != 0) return 
cmp;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (int cmp = compareIntegers(_major, other._major); cmp != 0) { 
return cmp;
   }
   ```
   



##########
be/test/vec/exec/parquet/parquet_corrupt_statistics_test.cpp:
##########
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <regex>
+
+#include "vec/exec/format/parquet/parquet_common.h"
+
+namespace doris {
+namespace vectorized {

Review Comment:
   warning: nested namespaces can be concatenated 
[modernize-concat-nested-namespaces]
   
   ```suggestion
   namespace doris::vectorized {
   ```
   
   be/test/vec/exec/parquet/parquet_corrupt_statistics_test.cpp:132:
   ```diff
   - } // namespace vectorized
   - } // namespace doris
   + } // namespace doris
   ```
   



##########
be/test/vec/exec/parquet/parquet_statistics_test.cpp:
##########
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <regex>
+
+#include "vec/exec/format/parquet/parquet_pred_cmp.h"
+
+namespace doris {
+namespace vectorized {

Review Comment:
   warning: nested namespaces can be concatenated 
[modernize-concat-nested-namespaces]
   
   ```suggestion
   namespace doris::vectorized {
   ```
   
   be/test/vec/exec/parquet/parquet_statistics_test.cpp:-1:
   ```diff
   
   + }
   ```
   



##########
be/test/vec/exec/parquet/parquet_version_test.cpp:
##########
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <regex>
+
+#include "vec/exec/format/parquet/parquet_common.h"
+
+namespace doris {
+namespace vectorized {

Review Comment:
   warning: nested namespaces can be concatenated 
[modernize-concat-nested-namespaces]
   
   ```suggestion
   namespace doris::vectorized {
   ```
   
   be/test/vec/exec/parquet/parquet_version_test.cpp:219:
   ```diff
   - } // namespace vectorized
   - } // namespace doris
   + } // namespace doris
   ```
   



##########
be/src/vec/exec/format/parquet/parquet_common.h:
##########
@@ -156,4 +158,303 @@
     size_t _num_filtered;
     size_t _read_index;
 };
-} // namespace doris::vectorized
+
+enum class ColumnOrderName { UNDEFINED, TYPE_DEFINED_ORDER };
+
+enum class SortOrder { SIGNED, UNSIGNED, UNKNOWN };
+
+class ParsedVersion {
+public:
+    ParsedVersion(std::string application, std::optional<std::string> version,
+                  std::optional<std::string> appBuildHash)
+            : application(std::move(application)),
+              version(std::move(version)),
+              appBuildHash(std::move(appBuildHash)) {}
+
+    bool operator==(const ParsedVersion& other) const {
+        return application == other.application && version == other.version &&
+               appBuildHash == other.appBuildHash;
+    }
+
+    bool operator!=(const ParsedVersion& other) const { return !(*this == 
other); }
+
+    size_t hash() const {
+        std::hash<std::string> hasher;
+        return hasher(application) ^ (version ? hasher(*version) : 0) ^
+               (appBuildHash ? hasher(*appBuildHash) : 0);
+    }
+
+    std::string toString() const {
+        return "ParsedVersion(application=" + application +
+               ", semver=" + (version ? *version : "null") +
+               ", appBuildHash=" + (appBuildHash ? *appBuildHash : "null") + 
")";
+    }
+
+public:
+    std::string application;
+    std::optional<std::string> version;
+    std::optional<std::string> appBuildHash;
+};
+
+class VersionParser {
+public:
+    static Status parse(const std::string& createdBy,
+                        std::unique_ptr<ParsedVersion>* parsedVersion) {
+        static const std::string FORMAT =
+                
"(.*?)\\s+version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?";
+        static const std::regex PATTERN(FORMAT);
+
+        std::smatch matcher;
+        if (!std::regex_match(createdBy, matcher, PATTERN)) {
+            return Status::InternalError(fmt::format(
+                    "Could not parse created_by: {}, using format: {}", 
createdBy, FORMAT));
+        }
+
+        std::string application = matcher[1].str();
+        if (application.empty()) {
+            return Status::InternalError("application cannot be null or 
empty");
+        }
+        std::optional<std::string> semver = matcher[2].str().empty()
+                                                    ? std::nullopt
+                                                    : 
std::optional<std::string>(matcher[2].str());
+        std::optional<std::string> appBuildHash =
+                matcher[3].str().empty() ? std::nullopt
+                                         : 
std::optional<std::string>(matcher[3].str());
+        *parsedVersion = std::make_unique<ParsedVersion>(application, semver, 
appBuildHash);
+        return Status::OK();
+    }
+};
+
+class SemanticVersion {
+public:
+    SemanticVersion(int major, int minor, int patch)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(false),
+              _unknown(std::nullopt),
+              _pre(std::nullopt),
+              _build_info(std::nullopt) {}
+
+#ifdef BE_TEST
+    SemanticVersion(int major, int minor, int patch, bool has_unknown)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(has_unknown),
+              _unknown(std::nullopt),
+              _pre(std::nullopt),
+              _build_info(std::nullopt) {}
+#endif
+
+    SemanticVersion(int major, int minor, int patch, 
std::optional<std::string> unknown,
+                    std::optional<std::string> pre, std::optional<std::string> 
build_info)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(unknown.has_value() && !unknown.value().empty()),
+              _unknown(std::move(unknown)),
+              _pre(pre.has_value() ? 
std::optional<Prerelease>(Prerelease(std::move(pre.value())))
+                                   : std::nullopt),
+              _build_info(std::move(build_info)) {}
+
+    static Status parse(const std::string& version,
+                        std::unique_ptr<SemanticVersion>* semantic_version) {
+        static const std::regex pattern(
+                R"(^(\d+)\.(\d+)\.(\d+)([^-+]*)?(?:-([^+]*))?(?:\+(.*))?$)");
+        std::smatch match;
+
+        if (!std::regex_match(version, match, pattern)) {
+            return Status::InternalError(version + " does not match format");
+        }
+
+        int major = std::stoi(match[1].str());
+        int minor = std::stoi(match[2].str());
+        int patch = std::stoi(match[3].str());
+        std::optional<std::string> unknown =
+                match[4].str().empty() ? std::nullopt : 
std::optional<std::string>(match[4].str());
+        std::optional<std::string> prerelease =
+                match[5].str().empty() ? std::nullopt : 
std::optional<std::string>(match[5].str());
+        std::optional<std::string> build_info =
+                match[6].str().empty() ? std::nullopt : 
std::optional<std::string>(match[6].str());
+        if (major < 0 || minor < 0 || patch < 0) {
+            return Status::InternalError("major({}), minor({}), and patch({}) 
must all be >= 0",
+                                         major, minor, patch);
+        }
+        *semantic_version = std::make_unique<SemanticVersion>(major, minor, 
patch, unknown,
+                                                              prerelease, 
build_info);
+        return Status::OK();
+    }
+
+    int compareTo(const SemanticVersion& other) const {
+        if (int cmp = compareIntegers(_major, other._major); cmp != 0) return 
cmp;
+        if (int cmp = compareIntegers(_minor, other._minor); cmp != 0) return 
cmp;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (int cmp = compareIntegers(_minor, other._minor); cmp != 0) { 
return cmp;
   }
   ```
   



##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -142,24 +141,65 @@ class ParquetPredicate {
         break;
             FOR_REINTERPRET_TYPES(DISPATCH)
 #undef DISPATCH
+        case TYPE_FLOAT:
+            if constexpr (std::is_same_v<CppType, float>) {
+                if (col_schema->physical_type != tparquet::Type::FLOAT) return 
false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
                   if (col_schema->physical_type != tparquet::Type::FLOAT) { 
return false;
   }
   ```
   



##########
be/src/vec/exec/format/parquet/parquet_common.h:
##########
@@ -156,4 +158,303 @@
     size_t _num_filtered;
     size_t _read_index;
 };
-} // namespace doris::vectorized
+
+enum class ColumnOrderName { UNDEFINED, TYPE_DEFINED_ORDER };
+
+enum class SortOrder { SIGNED, UNSIGNED, UNKNOWN };
+
+class ParsedVersion {
+public:
+    ParsedVersion(std::string application, std::optional<std::string> version,
+                  std::optional<std::string> appBuildHash)
+            : application(std::move(application)),
+              version(std::move(version)),
+              appBuildHash(std::move(appBuildHash)) {}
+
+    bool operator==(const ParsedVersion& other) const {
+        return application == other.application && version == other.version &&
+               appBuildHash == other.appBuildHash;
+    }
+
+    bool operator!=(const ParsedVersion& other) const { return !(*this == 
other); }
+
+    size_t hash() const {
+        std::hash<std::string> hasher;
+        return hasher(application) ^ (version ? hasher(*version) : 0) ^
+               (appBuildHash ? hasher(*appBuildHash) : 0);
+    }
+
+    std::string toString() const {
+        return "ParsedVersion(application=" + application +
+               ", semver=" + (version ? *version : "null") +
+               ", appBuildHash=" + (appBuildHash ? *appBuildHash : "null") + 
")";
+    }
+
+public:
+    std::string application;
+    std::optional<std::string> version;
+    std::optional<std::string> appBuildHash;
+};
+
+class VersionParser {
+public:
+    static Status parse(const std::string& createdBy,
+                        std::unique_ptr<ParsedVersion>* parsedVersion) {
+        static const std::string FORMAT =
+                
"(.*?)\\s+version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?";
+        static const std::regex PATTERN(FORMAT);
+
+        std::smatch matcher;
+        if (!std::regex_match(createdBy, matcher, PATTERN)) {
+            return Status::InternalError(fmt::format(
+                    "Could not parse created_by: {}, using format: {}", 
createdBy, FORMAT));
+        }
+
+        std::string application = matcher[1].str();
+        if (application.empty()) {
+            return Status::InternalError("application cannot be null or 
empty");
+        }
+        std::optional<std::string> semver = matcher[2].str().empty()
+                                                    ? std::nullopt
+                                                    : 
std::optional<std::string>(matcher[2].str());
+        std::optional<std::string> appBuildHash =
+                matcher[3].str().empty() ? std::nullopt
+                                         : 
std::optional<std::string>(matcher[3].str());
+        *parsedVersion = std::make_unique<ParsedVersion>(application, semver, 
appBuildHash);
+        return Status::OK();
+    }
+};
+
+class SemanticVersion {
+public:
+    SemanticVersion(int major, int minor, int patch)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(false),
+              _unknown(std::nullopt),
+              _pre(std::nullopt),
+              _build_info(std::nullopt) {}
+
+#ifdef BE_TEST
+    SemanticVersion(int major, int minor, int patch, bool has_unknown)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(has_unknown),
+              _unknown(std::nullopt),
+              _pre(std::nullopt),
+              _build_info(std::nullopt) {}
+#endif
+
+    SemanticVersion(int major, int minor, int patch, 
std::optional<std::string> unknown,
+                    std::optional<std::string> pre, std::optional<std::string> 
build_info)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(unknown.has_value() && !unknown.value().empty()),
+              _unknown(std::move(unknown)),
+              _pre(pre.has_value() ? 
std::optional<Prerelease>(Prerelease(std::move(pre.value())))
+                                   : std::nullopt),
+              _build_info(std::move(build_info)) {}
+
+    static Status parse(const std::string& version,
+                        std::unique_ptr<SemanticVersion>* semantic_version) {
+        static const std::regex pattern(
+                R"(^(\d+)\.(\d+)\.(\d+)([^-+]*)?(?:-([^+]*))?(?:\+(.*))?$)");
+        std::smatch match;
+
+        if (!std::regex_match(version, match, pattern)) {
+            return Status::InternalError(version + " does not match format");
+        }
+
+        int major = std::stoi(match[1].str());
+        int minor = std::stoi(match[2].str());
+        int patch = std::stoi(match[3].str());
+        std::optional<std::string> unknown =
+                match[4].str().empty() ? std::nullopt : 
std::optional<std::string>(match[4].str());
+        std::optional<std::string> prerelease =
+                match[5].str().empty() ? std::nullopt : 
std::optional<std::string>(match[5].str());
+        std::optional<std::string> build_info =
+                match[6].str().empty() ? std::nullopt : 
std::optional<std::string>(match[6].str());
+        if (major < 0 || minor < 0 || patch < 0) {
+            return Status::InternalError("major({}), minor({}), and patch({}) 
must all be >= 0",
+                                         major, minor, patch);
+        }
+        *semantic_version = std::make_unique<SemanticVersion>(major, minor, 
patch, unknown,
+                                                              prerelease, 
build_info);
+        return Status::OK();
+    }
+
+    int compareTo(const SemanticVersion& other) const {
+        if (int cmp = compareIntegers(_major, other._major); cmp != 0) return 
cmp;
+        if (int cmp = compareIntegers(_minor, other._minor); cmp != 0) return 
cmp;
+        if (int cmp = compareIntegers(_patch, other._patch); cmp != 0) return 
cmp;
+        if (int cmp = compareBooleans(other._prerelease, _prerelease); cmp != 
0) return cmp;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (int cmp = compareBooleans(other._prerelease, _prerelease); cmp 
!= 0) { return cmp;
   }
   ```
   



##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -142,24 +141,65 @@
         break;
             FOR_REINTERPRET_TYPES(DISPATCH)
 #undef DISPATCH
+        case TYPE_FLOAT:
+            if constexpr (std::is_same_v<CppType, float>) {
+                if (col_schema->physical_type != tparquet::Type::FLOAT) return 
false;
+                min_value = *reinterpret_cast<const 
CppType*>(encoded_min.data());
+                max_value = *reinterpret_cast<const 
CppType*>(encoded_max.data());
+                if (std::isnan(min_value) || std::isnan(max_value)) {
+                    return false;
+                }
+                // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 
values would be skipped
+                if (std::signbit(min_value) == 0 && min_value == 0.0f) {
+                    min_value = -0.0f;

Review Comment:
   warning: floating point literal has suffix 'f', which is not uppercase 
[readability-uppercase-literal-suffix]
   
   ```suggestion
                       min_value = -0.0F;
   ```
   



##########
be/test/vec/exec/parquet/parquet_statistics_test.cpp:
##########
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>

Review Comment:
   warning: 'gtest/gtest.h' file not found [clang-diagnostic-error]
   ```cpp
   #include <gtest/gtest.h>
            ^
   ```
   



##########
be/src/vec/exec/format/parquet/parquet_common.h:
##########
@@ -156,4 +158,303 @@
     size_t _num_filtered;
     size_t _read_index;
 };
-} // namespace doris::vectorized
+
+enum class ColumnOrderName { UNDEFINED, TYPE_DEFINED_ORDER };
+
+enum class SortOrder { SIGNED, UNSIGNED, UNKNOWN };
+
+class ParsedVersion {
+public:
+    ParsedVersion(std::string application, std::optional<std::string> version,
+                  std::optional<std::string> appBuildHash)
+            : application(std::move(application)),
+              version(std::move(version)),
+              appBuildHash(std::move(appBuildHash)) {}
+
+    bool operator==(const ParsedVersion& other) const {
+        return application == other.application && version == other.version &&
+               appBuildHash == other.appBuildHash;
+    }
+
+    bool operator!=(const ParsedVersion& other) const { return !(*this == 
other); }
+
+    size_t hash() const {
+        std::hash<std::string> hasher;
+        return hasher(application) ^ (version ? hasher(*version) : 0) ^
+               (appBuildHash ? hasher(*appBuildHash) : 0);
+    }
+
+    std::string toString() const {
+        return "ParsedVersion(application=" + application +
+               ", semver=" + (version ? *version : "null") +
+               ", appBuildHash=" + (appBuildHash ? *appBuildHash : "null") + 
")";
+    }
+
+public:
+    std::string application;
+    std::optional<std::string> version;
+    std::optional<std::string> appBuildHash;
+};
+
+class VersionParser {
+public:
+    static Status parse(const std::string& createdBy,
+                        std::unique_ptr<ParsedVersion>* parsedVersion) {
+        static const std::string FORMAT =
+                
"(.*?)\\s+version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?";
+        static const std::regex PATTERN(FORMAT);
+
+        std::smatch matcher;
+        if (!std::regex_match(createdBy, matcher, PATTERN)) {
+            return Status::InternalError(fmt::format(
+                    "Could not parse created_by: {}, using format: {}", 
createdBy, FORMAT));
+        }
+
+        std::string application = matcher[1].str();
+        if (application.empty()) {
+            return Status::InternalError("application cannot be null or 
empty");
+        }
+        std::optional<std::string> semver = matcher[2].str().empty()
+                                                    ? std::nullopt
+                                                    : 
std::optional<std::string>(matcher[2].str());
+        std::optional<std::string> appBuildHash =
+                matcher[3].str().empty() ? std::nullopt
+                                         : 
std::optional<std::string>(matcher[3].str());
+        *parsedVersion = std::make_unique<ParsedVersion>(application, semver, 
appBuildHash);
+        return Status::OK();
+    }
+};
+
+class SemanticVersion {
+public:
+    SemanticVersion(int major, int minor, int patch)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(false),
+              _unknown(std::nullopt),
+              _pre(std::nullopt),
+              _build_info(std::nullopt) {}
+
+#ifdef BE_TEST
+    SemanticVersion(int major, int minor, int patch, bool has_unknown)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(has_unknown),
+              _unknown(std::nullopt),
+              _pre(std::nullopt),
+              _build_info(std::nullopt) {}
+#endif
+
+    SemanticVersion(int major, int minor, int patch, 
std::optional<std::string> unknown,
+                    std::optional<std::string> pre, std::optional<std::string> 
build_info)
+            : _major(major),
+              _minor(minor),
+              _patch(patch),
+              _prerelease(unknown.has_value() && !unknown.value().empty()),
+              _unknown(std::move(unknown)),
+              _pre(pre.has_value() ? 
std::optional<Prerelease>(Prerelease(std::move(pre.value())))
+                                   : std::nullopt),
+              _build_info(std::move(build_info)) {}
+
+    static Status parse(const std::string& version,
+                        std::unique_ptr<SemanticVersion>* semantic_version) {
+        static const std::regex pattern(
+                R"(^(\d+)\.(\d+)\.(\d+)([^-+]*)?(?:-([^+]*))?(?:\+(.*))?$)");
+        std::smatch match;
+
+        if (!std::regex_match(version, match, pattern)) {
+            return Status::InternalError(version + " does not match format");
+        }
+
+        int major = std::stoi(match[1].str());
+        int minor = std::stoi(match[2].str());
+        int patch = std::stoi(match[3].str());
+        std::optional<std::string> unknown =
+                match[4].str().empty() ? std::nullopt : 
std::optional<std::string>(match[4].str());
+        std::optional<std::string> prerelease =
+                match[5].str().empty() ? std::nullopt : 
std::optional<std::string>(match[5].str());
+        std::optional<std::string> build_info =
+                match[6].str().empty() ? std::nullopt : 
std::optional<std::string>(match[6].str());
+        if (major < 0 || minor < 0 || patch < 0) {
+            return Status::InternalError("major({}), minor({}), and patch({}) 
must all be >= 0",
+                                         major, minor, patch);
+        }
+        *semantic_version = std::make_unique<SemanticVersion>(major, minor, 
patch, unknown,
+                                                              prerelease, 
build_info);
+        return Status::OK();
+    }
+
+    int compareTo(const SemanticVersion& other) const {
+        if (int cmp = compareIntegers(_major, other._major); cmp != 0) return 
cmp;
+        if (int cmp = compareIntegers(_minor, other._minor); cmp != 0) return 
cmp;
+        if (int cmp = compareIntegers(_patch, other._patch); cmp != 0) return 
cmp;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (int cmp = compareIntegers(_patch, other._patch); cmp != 0) { 
return cmp;
   }
   ```
   



##########
be/test/vec/exec/parquet/parquet_corrupt_statistics_test.cpp:
##########
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>

Review Comment:
   warning: 'gtest/gtest.h' file not found [clang-diagnostic-error]
   ```cpp
   #include <gtest/gtest.h>
            ^
   ```
   



##########
be/test/vec/exec/parquet/parquet_statistics_test.cpp:
##########
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <regex>
+
+#include "vec/exec/format/parquet/parquet_pred_cmp.h"
+
+namespace doris {
+namespace vectorized {
+class ParquetStatisticsTest : public testing::Test {
+public:
+    ParquetStatisticsTest() {}
+};
+
+TEST_F(ParquetStatisticsTest, test_try_read_old_utf8_stats) {

Review Comment:
   warning: function 'TEST_F' exceeds recommended size/complexity thresholds 
[readability-function-size]
   ```cpp
   TEST_F(ParquetStatisticsTest, test_try_read_old_utf8_stats) {
   ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/test/vec/exec/parquet/parquet_statistics_test.cpp:30:** 121 lines 
including whitespace and comments (threshold 80)
   ```cpp
   TEST_F(ParquetStatisticsTest, test_try_read_old_utf8_stats) {
   ^
   ```
   
   </details>
   



##########
be/src/vec/exec/format/parquet/parquet_pred_cmp.h:
##########
@@ -142,24 +141,65 @@
         break;
             FOR_REINTERPRET_TYPES(DISPATCH)
 #undef DISPATCH
+        case TYPE_FLOAT:
+            if constexpr (std::is_same_v<CppType, float>) {
+                if (col_schema->physical_type != tparquet::Type::FLOAT) return 
false;
+                min_value = *reinterpret_cast<const 
CppType*>(encoded_min.data());
+                max_value = *reinterpret_cast<const 
CppType*>(encoded_max.data());
+                if (std::isnan(min_value) || std::isnan(max_value)) {
+                    return false;
+                }
+                // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 
values would be skipped
+                if (std::signbit(min_value) == 0 && min_value == 0.0f) {
+                    min_value = -0.0f;
+                }
+                if (std::signbit(max_value) != 0 && max_value == -0.0f) {
+                    max_value = 0.0f;
+                }
+                break;
+            } else {
+                return false;
+            }
+        case TYPE_DOUBLE:
+            if constexpr (std::is_same_v<CppType, float>) {
+                if (col_schema->physical_type != tparquet::Type::DOUBLE) 
return false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
                   if (col_schema->physical_type != tparquet::Type::DOUBLE) { 
return false;
   }
   ```
   



##########
be/test/vec/exec/parquet/parquet_corrupt_statistics_test.cpp:
##########
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <regex>
+
+#include "vec/exec/format/parquet/parquet_common.h"
+
+namespace doris {
+namespace vectorized {
+class ParquetCorruptStatisticsTest : public testing::Test {
+public:
+    ParquetCorruptStatisticsTest() {}

Review Comment:
   warning: use '= default' to define a trivial default constructor 
[modernize-use-equals-default]
   
   ```suggestion
       ParquetCorruptStatisticsTest() = default;
   ```
   



##########
be/test/vec/exec/parquet/parquet_statistics_test.cpp:
##########
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <regex>
+
+#include "vec/exec/format/parquet/parquet_pred_cmp.h"
+
+namespace doris {
+namespace vectorized {
+class ParquetStatisticsTest : public testing::Test {
+public:
+    ParquetStatisticsTest() {}

Review Comment:
   warning: use '= default' to define a trivial default constructor 
[modernize-use-equals-default]
   
   ```suggestion
       ParquetStatisticsTest() = default;
   ```
   



##########
be/test/vec/exec/parquet/parquet_version_test.cpp:
##########
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <regex>
+
+#include "vec/exec/format/parquet/parquet_common.h"
+
+namespace doris {
+namespace vectorized {
+class ParquetVersionTest : public testing::Test {
+public:
+    ParquetVersionTest() {}

Review Comment:
   warning: use '= default' to define a trivial default constructor 
[modernize-use-equals-default]
   
   ```suggestion
       ParquetVersionTest() = default;
   ```
   



##########
be/test/vec/exec/parquet/parquet_version_test.cpp:
##########
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>

Review Comment:
   warning: 'gtest/gtest.h' file not found [clang-diagnostic-error]
   ```cpp
   #include <gtest/gtest.h>
            ^
   ```
   



##########
be/test/vec/exec/parquet/parquet_version_test.cpp:
##########
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <regex>
+
+#include "vec/exec/format/parquet/parquet_common.h"
+
+namespace doris {
+namespace vectorized {
+class ParquetVersionTest : public testing::Test {
+public:
+    ParquetVersionTest() {}
+};
+
+TEST_F(ParquetVersionTest, test_version_parser) {

Review Comment:
   warning: function 'TEST_F' exceeds recommended size/complexity thresholds 
[readability-function-size]
   ```cpp
   TEST_F(ParquetVersionTest, test_version_parser) {
   ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/test/vec/exec/parquet/parquet_version_test.cpp:30:** 91 lines including 
whitespace and comments (threshold 80)
   ```cpp
   TEST_F(ParquetVersionTest, test_version_parser) {
   ^
   ```
   
   </details>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to