Copilot commented on code in PR #60296:
URL: https://github.com/apache/doris/pull/60296#discussion_r2735118370


##########
be/CMakeLists.txt:
##########
@@ -838,12 +939,6 @@ endif()
 option(BUILD_FILE_CACHE_MICROBENCH_TOOL "Build file cache mirobench Tool" OFF)
 if (BUILD_FILE_CACHE_MICROBENCH_TOOL)
     add_subdirectory(${SRC_DIR}/io/tools)

Review Comment:
   The removal of the install block for start_file_cache_microbench.sh appears 
unrelated to paimon-cpp integration. This script file exists in 
bin/start_file_cache_microbench.sh but will no longer be installed when 
BUILD_FILE_CACHE_MICROBENCH_TOOL is enabled. This appears to be an accidental 
deletion that should be reverted.
   ```suggestion
       add_subdirectory(${SRC_DIR}/io/tools)
       if (NOT BUILD_BENCHMARK)
           install(FILES
               ${BASE_DIR}/../bin/start_file_cache_microbench.sh
               PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE
               GROUP_READ GROUP_WRITE GROUP_EXECUTE
               WORLD_READ WORLD_EXECUTE
               DESTINATION ${OUTPUT_DIR}/bin)
       endif()
   ```



##########
thirdparty/patches/paimon-cpp-b1ffd6f73e5edf57aac24ec2eaf6d2ef9e9a9850.patch:
##########
@@ -0,0 +1,266 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index f942510..ef5b68c 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -48,7 +48,12 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+ option(PAIMON_USE_ASAN "Use Address Sanitizer" OFF)
+ option(PAIMON_USE_UBSAN "Use Undefined Behavior Sanitizer" OFF)
+ option(PAIMON_ENABLE_AVRO "Whether to enable avro file format" ON)
+-option(PAIMON_ENABLE_ORC "Whether to enable orc file format" ON)
++option(PAIMON_ENABLE_ORC "Whether to enable orc file format" ON)
++option(PAIMON_USE_EXTERNAL_GLOG "Use external glog library instead of bundled 
glog" OFF)
++set(GLOG_INCLUDE_DIR "" CACHE PATH "External glog include directory")
++set(GLOG_STATIC_LIB "" CACHE FILEPATH "External glog static library")
++set(GFLAGS_INCLUDE_DIR "" CACHE PATH "External gflags include directory")
++set(GFLAGS_STATIC_LIB "" CACHE FILEPATH "External gflags static library")
+ option(PAIMON_ENABLE_LANCE "Whether to enable lance file format" OFF)
+ option(PAIMON_ENABLE_JINDO "Whether to enable jindo file system" OFF)
+ option(PAIMON_ENABLE_LUMINA "Whether to enable lumina vector index" ON)
+diff --git a/cmake_modules/BuildUtils.cmake b/cmake_modules/BuildUtils.cmake
+index c63d17f..73db743 100644
+--- a/cmake_modules/BuildUtils.cmake
++++ b/cmake_modules/BuildUtils.cmake
+@@ -55,7 +55,18 @@ function(add_paimon_lib LIB_NAME)
+     # Necessary to make static linking into other shared libraries work 
properly
+     set_property(TARGET ${LIB_NAME}_objlib PROPERTY POSITION_INDEPENDENT_CODE 
1)
+     if(ARG_DEPENDENCIES)
+-        add_dependencies(${LIB_NAME}_objlib ${ARG_DEPENDENCIES})
++        # Avoid add_dependencies on non-existent targets (e.g. when building 
static only).
++        set(_paimon_objlib_deps)
++        foreach(_paimon_dep IN LISTS ARG_DEPENDENCIES)
++            if(TARGET ${_paimon_dep})
++                list(APPEND _paimon_objlib_deps ${_paimon_dep})
++            endif()
++        endforeach()
++        if(_paimon_objlib_deps)
++            add_dependencies(${LIB_NAME}_objlib ${_paimon_objlib_deps})
++        endif()
++        unset(_paimon_objlib_deps)
++        unset(_paimon_dep)
+     endif()
+     set(LIB_DEPS $<TARGET_OBJECTS:${LIB_NAME}_objlib>)
+     set(LIB_INCLUDES)
+@@ -72,3 +72,3 @@ function(add_paimon_lib LIB_NAME)
+     if(ARG_EXTRA_INCLUDES)
+-        target_include_directories(${LIB_NAME}_objlib SYSTEM PUBLIC 
${ARG_EXTRA_INCLUDES})
++        target_include_directories(${LIB_NAME}_objlib SYSTEM BEFORE PUBLIC 
${ARG_EXTRA_INCLUDES})
+     endif()
+diff --git a/cmake_modules/DefineOptions.cmake 
b/cmake_modules/DefineOptions.cmake
+index 0211f40..c4a6b2f 100644
+--- a/cmake_modules/DefineOptions.cmake
++++ b/cmake_modules/DefineOptions.cmake
+@@ -95,6 +95,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL 
"${CMAKE_CURRENT_SOURCE_DIR}")
+     define_option(PAIMON_BUILD_STATIC "Build static libraries" OFF)
+ 
+     define_option(PAIMON_BUILD_SHARED "Build shared libraries" ON)
++    define_option(PAIMON_USE_EXTERNAL_RAPIDJSON "Use external RapidJSON 
headers" OFF)
+     #----------------------------------------------------------------------
+     set_option_category("Test")
+ 
+diff --git a/cmake_modules/SetupCxxFlags.cmake 
b/cmake_modules/SetupCxxFlags.cmake
+index e4b2710..2d705ba 100644
+--- a/cmake_modules/SetupCxxFlags.cmake
++++ b/cmake_modules/SetupCxxFlags.cmake
+@@ -41,6 +41,15 @@ set(CMAKE_CXX_EXTENSIONS OFF)
+ # shared libraries
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+ 
++# Use global-dynamic TLS model to avoid relocation issues when linking
++# with other libraries that may use different TLS models (e.g., when
++# rapidjson templates are instantiated in both this library and the
++# consuming application)
++if(NOT MSVC)
++    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftls-model=global-dynamic")
++    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ftls-model=global-dynamic")
++endif()
++ 
+ string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)
+ 
+ set(UNKNOWN_COMPILER_MESSAGE
+diff --git a/cmake_modules/ThirdpartyToolchain.cmake 
b/cmake_modules/ThirdpartyToolchain.cmake
+index 3f07104..dfe2ba3 100644
+--- a/cmake_modules/ThirdpartyToolchain.cmake
++++ b/cmake_modules/ThirdpartyToolchain.cmake
+@@ -1011,6 +1011,17 @@ endmacro()
+ 
+ build_fmt()
+-build_rapidjson()
++if(PAIMON_USE_EXTERNAL_RAPIDJSON)
++    if(NOT RAPIDJSON_INCLUDE_DIR)
++        message(FATAL_ERROR
++                "PAIMON_USE_EXTERNAL_RAPIDJSON=ON but RAPIDJSON_INCLUDE_DIR 
is not set")
++    endif()
++ 
++    file(MAKE_DIRECTORY "${RAPIDJSON_INCLUDE_DIR}")
++    add_library(RapidJSON INTERFACE IMPORTED)
++    target_include_directories(RapidJSON SYSTEM INTERFACE 
"${RAPIDJSON_INCLUDE_DIR}")
++else()
++    build_rapidjson()
++endif()
+ build_snappy()
+ build_zstd()
+ build_zlib()
+@@ -1018,10 +1018,34 @@ build_zlib()
+ build_lz4()
+ build_arrow()
+ build_tbb()
+-build_glog()
++if(PAIMON_USE_EXTERNAL_GLOG)
++    if(NOT GLOG_INCLUDE_DIR OR NOT GLOG_STATIC_LIB)
++        message(FATAL_ERROR
++                "PAIMON_USE_EXTERNAL_GLOG=ON but GLOG_INCLUDE_DIR or 
GLOG_STATIC_LIB is not set")
++    endif()
++    add_library(glog STATIC IMPORTED)
++    set(_glog_interface_includes "${GLOG_INCLUDE_DIR}")
++    set(_glog_interface_libs "")
++    if(GFLAGS_STATIC_LIB)
++        list(APPEND _glog_interface_libs "${GFLAGS_STATIC_LIB}")
++        if(GFLAGS_INCLUDE_DIR)
++            list(APPEND _glog_interface_includes "${GFLAGS_INCLUDE_DIR}")
++        endif()
++    endif()
++    set_target_properties(glog
++                          PROPERTIES IMPORTED_LOCATION "${GLOG_STATIC_LIB}"
++                                     INTERFACE_SYSTEM_INCLUDE_DIRECTORIES
++                                     "${_glog_interface_includes}"
++                                     INTERFACE_LINK_LIBRARIES 
"${_glog_interface_libs}"
++                                     INTERFACE_COMPILE_DEFINITIONS 
"GLOG_USE_GLOG_EXPORT")
++    unset(_glog_interface_includes)
++    unset(_glog_interface_libs)
++else()
++    build_glog()
++endif()
+ 
+ if(PAIMON_ENABLE_AVRO)
+     build_avro()
+ endif()
+ if(PAIMON_ENABLE_JINDO)
+     build_jindosdk_c()
+diff --git a/cmake_modules/arrow.diff b/cmake_modules/arrow.diff
+index 997cb6b..c7b1cf4 100644
+--- a/cmake_modules/arrow.diff
++++ b/cmake_modules/arrow.diff
+@@ -194,5 +194,22 @@ index 4d3acb491e..3906ff3c59 100644
+    int64_t max_row_group_length_;
+ +  int64_t max_row_group_size_;
+    int64_t pagesize_;
+-   ParquetDataPageVersion parquet_data_page_version_;
+-   ParquetVersion::type parquet_version_;
++  ParquetDataPageVersion parquet_data_page_version_;
++  ParquetVersion::type parquet_version_;
++ 
++diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
++--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
++@@ -1789,8 +1789,11 @@ if(ARROW_WITH_THRIFT)
++                      REQUIRED_VERSION
++                      0.11.0)
++ 
++-  string(REPLACE "." ";" Thrift_VERSION_LIST ${Thrift_VERSION})
+++  if(NOT Thrift_VERSION)
+++    set(Thrift_VERSION "${ARROW_THRIFT_BUILD_VERSION}")
+++  endif()
+++  string(REPLACE "." ";" Thrift_VERSION_LIST "${Thrift_VERSION}")
++   list(GET Thrift_VERSION_LIST 0 Thrift_VERSION_MAJOR)
++   list(GET Thrift_VERSION_LIST 1 Thrift_VERSION_MINOR)
++   list(GET Thrift_VERSION_LIST 2 Thrift_VERSION_PATCH)
++ endif()
+diff --git a/src/paimon/common/logging/logging.cpp 
b/src/paimon/common/logging/logging.cpp
+index 5892436..ce599f1 100644
+--- a/src/paimon/common/logging/logging.cpp
++++ b/src/paimon/common/logging/logging.cpp
+@@ -22,8 +22,8 @@
+ #include <optional>
+ #include <shared_mutex>
+ 
+-#include "glog/log_severity.h"
+ #include "glog/logging.h"
++#include "glog/log_severity.h"
+ #include "glog/raw_logging.h"
+ 
+ namespace paimon {
+diff --git a/src/paimon/common/memory/bytes.cpp 
b/src/paimon/common/memory/bytes.cpp
+index 6b2b6c9..8a5f6d7 100644
+--- a/src/paimon/common/memory/bytes.cpp
++++ b/src/paimon/common/memory/bytes.cpp
+@@ -59,9 +59,16 @@ Bytes& Bytes::operator=(Bytes&& other) noexcept {
+     if (&other == this) {
+         return *this;
+     }
+-    this->~Bytes();
+-    std::memcpy(this, &other, sizeof(*this));
+-    new (&other) Bytes();
++    if (data_ != nullptr) {
++        assert(pool_);
++        pool_->Free(data_, size_);
++    }
++    pool_ = other.pool_;
++    data_ = other.data_;
++    size_ = other.size_;
++    other.pool_ = nullptr;
++    other.data_ = nullptr;
++    other.size_ = 0;
+     return *this;
+ }
+
+diff --git a/cmake_modules/orc.diff b/cmake_modules/orc.diff
+--- a/cmake_modules/orc.diff
++++ b/cmake_modules/orc.diff
+@@ -339,5 +339,57 @@ set(ORC_INSTALL_INTERFACE_TARGETS)
+ +set(BUILD_POSITION_INDEPENDENT_LIB ON)
+ +
+  set(ORC_FORMAT_VERSION "1.0.0")
+  set(LZ4_VERSION "1.10.0")
+  set(SNAPPY_VERSION "1.2.1")
++ 
++diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
++--- a/c++/src/CMakeLists.txt
+++++ b/c++/src/CMakeLists.txt
++@@ -141,12 +141,18 @@
++-add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
++-   COMMAND ${PROTOBUF_EXECUTABLE}
++-        -I 
../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto
++-        --cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
++-        
../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto/orc_proto.proto
++-)
+++add_custom_command(OUTPUT paimon_orc_proto.pb.h paimon_orc_proto.pb.cc
+++   COMMAND ${CMAKE_COMMAND} -E copy_if_different
+++        
../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto/orc_proto.proto
+++        "${CMAKE_CURRENT_BINARY_DIR}/paimon_orc_proto.proto"
+++   COMMAND sed -i
+++        "s/package orc\\.proto/package paimon_orc.proto/"
+++        "${CMAKE_CURRENT_BINARY_DIR}/paimon_orc_proto.proto"

Review Comment:
   The patch modifies paimon-cpp's ORC build to use `sed -i` for namespace 
renaming (line 230), which is not portable to macOS. On macOS, `sed -i` 
requires an empty string argument like `sed -i ''`. This could cause build 
failures on macOS systems. Consider updating the patch to handle both Linux and 
macOS, or ensure paimon-cpp's build system handles this portability issue.
   ```suggestion
   ++   COMMAND /bin/sh -c "sed -i.bak 's/package orc\\.proto/package 
paimon_orc.proto/' paimon_orc_proto.proto && rm paimon_orc_proto.proto.bak"
   ```



##########
build.sh:
##########
@@ -634,6 +634,17 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
     echo "-- Build task executor simulator: ${BUILD_TASK_EXECUTOR_SIMULATOR}"
     echo "-- Build file cache lru tool: ${BUILD_FILE_CACHE_LRU_TOOL}"
 
+    if [[ -z "${ENABLE_PAIMON_CPP:-}" ]]; then
+        ENABLE_PAIMON_CPP=ON
+    fi
+    if [[ "${ENABLE_PAIMON_CPP}" == "ON" && -z "${PAIMON_HOME:-}" ]]; then
+        _paimon_default_home="${DORIS_THIRDPARTY}/installed/paimon-cpp"
+        if [[ -f "${_paimon_default_home}/lib/cmake/Paimon/PaimonConfig.cmake" 
]]; then
+            PAIMON_HOME="${_paimon_default_home}"
+        fi
+        unset _paimon_default_home
+    fi

Review Comment:
   Consider adding an echo statement to display the ENABLE_PAIMON_CPP status 
similar to other build options (e.g., lines 633-635). This would help users 
understand whether Paimon C++ integration is enabled during the build. Example: 
`echo "-- Enable Paimon C++: ${ENABLE_PAIMON_CPP}"`
   ```suggestion
       fi
       echo "-- Enable Paimon C++: ${ENABLE_PAIMON_CPP}"
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to