This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c3612374bb3 [chore](cloud) Update build and start script (#56031)
c3612374bb3 is described below
commit c3612374bb306fad284ecb61a24e7872c882f145
Author: Gavin Chou <[email protected]>
AuthorDate: Mon Sep 15 15:13:10 2025 +0800
[chore](cloud) Update build and start script (#56031)
* make HDFS storage optional when build in cloud mode
* optimize meta-service start script
---
build.sh | 1 +
cloud/CMakeLists.txt | 33 +++++----
cloud/script/start.sh | 90 +++++++++++++-----------
cloud/src/meta-service/meta_service_resource.cpp | 11 +++
cloud/src/recycler/CMakeLists.txt | 4 ++
cloud/src/recycler/checker.cpp | 7 ++
cloud/src/recycler/recycler.cpp | 7 ++
cloud/test/CMakeLists.txt | 11 ++-
run-cloud-ut.sh | 1 +
9 files changed, 107 insertions(+), 58 deletions(-)
diff --git a/build.sh b/build.sh
index a93c98483cf..cfac0453ca3 100755
--- a/build.sh
+++ b/build.sh
@@ -656,6 +656,7 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
-DMAKE_TEST=OFF \
"${CMAKE_USE_CCACHE}" \
-DUSE_LIBCPP="${USE_LIBCPP}" \
+ -DENABLE_HDFS_STORAGE_VAULT=${ENABLE_HDFS_STORAGE_VAULT:-ON} \
-DSTRIP_DEBUG_INFO="${STRIP_DEBUG_INFO}" \
-DUSE_JEMALLOC="${USE_JEMALLOC}" \
-DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \
diff --git a/cloud/CMakeLists.txt b/cloud/CMakeLists.txt
index 21cabcd72ec..d2da775e184 100644
--- a/cloud/CMakeLists.txt
+++ b/cloud/CMakeLists.txt
@@ -262,15 +262,19 @@ include_directories(
${GPERFTOOLS_HOME}/include
)
-if ("${DORIS_JAVA_HOME}" STREQUAL "")
- set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}")
-endif()
+option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON)
+if (ENABLE_HDFS_STORAGE_VAULT)
+ add_compile_definitions(ENABLE_HDFS_STORAGE_VAULT)
+ if ("${DORIS_JAVA_HOME}" STREQUAL "")
+ set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}")
+ endif()
-include_directories(${DORIS_JAVA_HOME}/include)
-if (NOT OS_MACOSX)
- include_directories(${DORIS_JAVA_HOME}/include/linux)
-else()
- include_directories(${DORIS_JAVA_HOME}/include/darwin)
+ include_directories(${DORIS_JAVA_HOME}/include)
+ if (NOT OS_MACOSX)
+ include_directories(${DORIS_JAVA_HOME}/include/linux)
+ else()
+ include_directories(${DORIS_JAVA_HOME}/include/darwin)
+ endif()
endif()
set(WL_START_GROUP "-Wl,--start-group")
@@ -312,10 +316,6 @@ set(DORIS_DEPENDENCIES
message(STATUS "DORIS_DEPENDENCIES is ${DORIS_DEPENDENCIES}")
-if ("${DORIS_JAVA_HOME}" STREQUAL "")
- set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}")
-endif()
-
# Add all external dependencies. They should come after the project's libs.
# static link gcc's lib
set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
@@ -333,9 +333,14 @@ set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
-static-libstdc++
-static-libgcc
-lresolv
- -L${DORIS_JAVA_HOME}/lib/server
- -ljvm
)
+
+if (ENABLE_HDFS_STORAGE_VAULT)
+ set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
+ -L${DORIS_JAVA_HOME}/lib/server
+ -ljvm)
+endif()
+
if (NOT (USE_LIBCPP AND COMPILER_CLANG))
set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS} -lstdc++fs)
endif()
diff --git a/cloud/script/start.sh b/cloud/script/start.sh
index 506a279ad78..befe0a9f2e9 100644
--- a/cloud/script/start.sh
+++ b/cloud/script/start.sh
@@ -54,8 +54,15 @@ if [[ ${RUN_RECYCLYER} -eq 1 ]]; then
fi
# echo "$@" "daemonized=${daemonized}"}
-# export env variables from doris_cloud.conf
-# read from doris_cloud.conf
+custom_start="${DORIS_HOME}/bin/custom_start.sh"
+if [[ -f "${custom_start}" ]]; then
+ source "${custom_start}"
+fi
+enable_hdfs=${enable_hdfs:-1}
+process_name="${process_name:-doris_cloud}"
+
+# export env variables from ${process_name}.conf
+# read from ${process_name}.conf
while read -r line; do
envline="$(echo "${line}" |
sed 's/[[:blank:]]*=[[:blank:]]*/=/g' |
@@ -66,7 +73,7 @@ while read -r line; do
if [[ "${envline}" == *"="* ]]; then
eval 'export "${envline}"'
fi
-done <"${DORIS_HOME}/conf/doris_cloud.conf"
+done <"${DORIS_HOME}/conf/${process_name}.conf"
role=''
if [[ ${RUN_METASERVICE} -eq 0 ]] && [[ ${RUN_RECYCLYER} -eq 0 ]]; then
@@ -78,53 +85,59 @@ elif [[ ${RUN_METASERVICE} -eq 0 ]] && [[ ${RUN_RECYCLYER}
-eq 1 ]]; then
elif [[ ${RUN_METASERVICE} -eq 1 ]] && [[ ${RUN_RECYCLYER} -eq 1 ]]; then
role='MetaService and Recycler'
fi
-process=doris_cloud
-if [[ ${RUN_VERSION} -eq 0 ]] && [[ -f "${DORIS_HOME}/bin/${process}.pid" ]];
then
- pid=$(cat "${DORIS_HOME}/bin/${process}.pid")
+if [[ ${RUN_VERSION} -eq 0 ]] && [[ -f "${DORIS_HOME}/bin/${process_name}.pid"
]]; then
+ pid=$(cat "${DORIS_HOME}/bin/${process_name}.pid")
if [[ "${pid}" != "" ]]; then
- if kill -0 "$(cat "${DORIS_HOME}/bin/${process}.pid")" >/dev/null
2>&1; then
+ if kill -0 "$(cat "${DORIS_HOME}/bin/${process_name}.pid")" >/dev/null
2>&1; then
echo "pid file existed, ${role} have already started, pid=${pid}"
exit 1
fi
fi
echo "pid file existed but process not alive, remove it, pid=${pid}"
- rm -f "${DORIS_HOME}/bin/${process}.pid"
+ rm -f "${DORIS_HOME}/bin/${process_name}.pid"
fi
lib_path="${DORIS_HOME}/lib"
-bin="${DORIS_HOME}/lib/doris_cloud"
+bin="${DORIS_HOME}/lib/${process_name}"
export LD_LIBRARY_PATH="${lib_path}:${LD_LIBRARY_PATH}"
-chmod 550 "${DORIS_HOME}/lib/doris_cloud"
+chmod 550 "${DORIS_HOME}/lib/${process_name}"
-if [[ -z "${JAVA_HOME}" ]]; then
- echo "The JAVA_HOME environment variable is not defined correctly"
- echo "This environment variable is needed to run this program"
- echo "NB: JAVA_HOME should point to a JDK not a JRE"
- echo "You can set it in doris_cloud.conf"
- exit 1
-fi
+if [[ ${enable_hdfs} -eq 1 ]]; then
+ if [[ -z "${JAVA_HOME}" ]]; then
+ echo "The JAVA_HOME environment variable is not defined correctly"
+ echo "This environment variable is needed to run this program"
+ echo "NB: JAVA_HOME should point to a JDK not a JRE"
+ echo "You can set it in doris_cloud.conf"
+ exit 1
+ fi
-if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
- # add hadoop libs
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
- for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
- DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
- done
-fi
+ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
+ # add hadoop libs
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
+ DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+ done
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
+ DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+ done
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
+ DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+ done
+ for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
+ DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+ done
+ fi
+
+ export CLASSPATH="${DORIS_CLASSPATH}"
-export CLASSPATH="${DORIS_CLASSPATH}"
+ export LD_LIBRARY_PATH="${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}"
-export LD_LIBRARY_PATH="${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}"
+ ## set libhdfs3 conf
+ if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then
+ export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml"
+ fi
+fi
# filter known leak
export LSAN_OPTIONS=suppressions=${DORIS_HOME}/conf/lsan_suppr.conf
@@ -136,13 +149,6 @@ export
UBSAN_OPTIONS=suppressions=${DORIS_HOME}/conf/ubsan_suppr.conf
export
ASAN_OPTIONS=symbolize=1:abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1:detect_container_overflow=0:check_malloc_usable_size=0:${ASAN_OPTIONS}
export UBSAN_OPTIONS=print_stacktrace=1:${UBSAN_OPTIONS}
-## set libhdfs3 conf
-if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then
- export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml"
-fi
-
-# echo "LIBHDFS3_CONF=${LIBHDFS3_CONF}"
-
# to enable dump jeprof heap stats prodigally, change `prof_active:false` to
`prof_active:true` or curl http://be_host:be_webport/jeheap/prof/true
# to control the dump interval change `lg_prof_interval` to a specific value,
it is pow/exponent of 2 in size of bytes, default 34 means 2 ** 34 = 16GB
# to control the dump path, change `prof_prefix` to a specific path, e.g.
/doris_cloud/log/ms_, by default it dumps at the path where the start command
called
@@ -155,7 +161,7 @@ fi
mkdir -p "${DORIS_HOME}/log"
echo "$(date +'%F %T') start with args: $*"
-out_file=${DORIS_HOME}/log/${process}.out
+out_file=${DORIS_HOME}/log/${process_name}.out
if [[ "${RUN_DAEMON}" -eq 1 ]]; then
# append 10 blank lines to ensure the following tail -n10 works correctly
printf "\n\n\n\n\n\n\n\n\n\n" >>"${out_file}"
diff --git a/cloud/src/meta-service/meta_service_resource.cpp
b/cloud/src/meta-service/meta_service_resource.cpp
index dfa528010f4..1e1b30bcf7b 100644
--- a/cloud/src/meta-service/meta_service_resource.cpp
+++ b/cloud/src/meta-service/meta_service_resource.cpp
@@ -364,6 +364,17 @@ bool normalize_hdfs_fs_name(std::string& fs_name) {
static int add_hdfs_storage_vault(InstanceInfoPB& instance, Transaction* txn,
StorageVaultPB& hdfs_param, MetaServiceCode&
code,
std::string& msg) {
+#ifndef ENABLE_HDFS_STORAGE_VAULT
+ code = MetaServiceCode::INVALID_ARGUMENT;
+ msg = fmt::format(
+ "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build
option), "
+ "but HDFS storage vaults were detected: {}",
+ hdfs_param.name());
+ LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build
option), "
+ << "but HDFS storage vaults were detected: " <<
hdfs_param.name();
+ return -1;
+#endif
+
if (!hdfs_param.has_hdfs_info()) {
code = MetaServiceCode::INVALID_ARGUMENT;
msg = fmt::format("vault_name={} passed invalid argument",
hdfs_param.name());
diff --git a/cloud/src/recycler/CMakeLists.txt
b/cloud/src/recycler/CMakeLists.txt
index 6dbb8a0d696..12dc7351853 100644
--- a/cloud/src/recycler/CMakeLists.txt
+++ b/cloud/src/recycler/CMakeLists.txt
@@ -9,6 +9,10 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lfdb_c
-L${THIRDPARTY_DIR
file(GLOB_RECURSE SRC_LIST CONFIGURE_DEPENDS *.cpp)
+if (NOT ENABLE_HDFS_STORAGE_VAULT)
+ list(REMOVE_ITEM SRC_LIST ${CMAKE_CURRENT_SOURCE_DIR}/hdfs_accessor.cpp)
+endif()
+
if(BUILD_AZURE STREQUAL "OFF")
list(REMOVE_ITEM SRC_LIST
"${CMAKE_CURRENT_SOURCE_DIR}/azure_obj_client.cpp")
endif()
diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp
index 6096551f6e0..a83d4725e27 100644
--- a/cloud/src/recycler/checker.cpp
+++ b/cloud/src/recycler/checker.cpp
@@ -52,7 +52,9 @@
#include "meta-store/keys.h"
#include "meta-store/txn_kv.h"
#include "meta-store/txn_kv_error.h"
+#ifdef ENABLE_HDFS_STORAGE_VAULT
#include "recycler/hdfs_accessor.h"
+#endif
#include "recycler/s3_accessor.h"
#include "recycler/storage_vault_accessor.h"
#ifdef UNIT_TEST
@@ -471,6 +473,7 @@ int InstanceChecker::init_storage_vault_accessors(const
InstanceInfoPB& instance
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
&accessor_map_, &vault);
if (vault.has_hdfs_info()) {
+#ifdef ENABLE_HDFS_STORAGE_VAULT
auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
int ret = accessor->init();
if (ret != 0) {
@@ -480,6 +483,10 @@ int InstanceChecker::init_storage_vault_accessors(const
InstanceInfoPB& instance
}
accessor_map_.emplace(vault.id(), std::move(accessor));
+#else
+ LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT
build option), "
+ << "but HDFS storage vaults were detected";
+#endif
} else if (vault.has_obj_info()) {
#ifdef UNIT_TEST
auto accessor = std::make_shared<MockAccessor>();
diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp
index 95c26385693..d5d368964a9 100644
--- a/cloud/src/recycler/recycler.cpp
+++ b/cloud/src/recycler/recycler.cpp
@@ -49,7 +49,9 @@
#include "meta-store/txn_kv_error.h"
#include "meta-store/versioned_value.h"
#include "recycler/checker.h"
+#ifdef ENABLE_HDFS_STORAGE_VAULT
#include "recycler/hdfs_accessor.h"
+#endif
#include "recycler/s3_accessor.h"
#include "recycler/storage_vault_accessor.h"
#ifdef UNIT_TEST
@@ -600,6 +602,7 @@ int InstanceRecycler::init_storage_vault_accessors() {
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
&accessor_map_, &vault);
if (vault.has_hdfs_info()) {
+#ifdef ENABLE_HDFS_STORAGE_VAULT
auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
int ret = accessor->init();
if (ret != 0) {
@@ -612,6 +615,10 @@ int InstanceRecycler::init_storage_vault_accessors() {
<< " resource_id=" << vault.id() << " name=" <<
vault.name()
<< " hdfs_vault=" <<
vault.hdfs_info().ShortDebugString();
accessor_map_.emplace(vault.id(), std::move(accessor));
+#else
+ LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT
build option), "
+ << "but HDFS storage vaults were detected";
+#endif
} else if (vault.has_obj_info()) {
auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
if (!s3_conf) {
diff --git a/cloud/test/CMakeLists.txt b/cloud/test/CMakeLists.txt
index ffd768809b8..e1bfb42a626 100644
--- a/cloud/test/CMakeLists.txt
+++ b/cloud/test/CMakeLists.txt
@@ -63,7 +63,11 @@ add_executable(s3_accessor_test s3_accessor_test.cpp)
add_executable(s3_accessor_mock_test s3_accessor_mock_test.cpp)
-add_executable(hdfs_accessor_test hdfs_accessor_test.cpp)
+option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON)
+if (ENABLE_HDFS_STORAGE_VAULT)
+ add_compile_definitions(ENABLE_HDFS_STORAGE_VAULT)
+ add_executable(hdfs_accessor_test hdfs_accessor_test.cpp)
+endif()
add_executable(stopwatch_test stopwatch_test.cpp)
@@ -110,7 +114,10 @@ target_link_libraries(s3_accessor_test ${TEST_LINK_LIBS})
target_link_libraries(s3_accessor_mock_test ${TEST_LINK_LIBS})
-target_link_libraries(hdfs_accessor_test ${TEST_LINK_LIBS})
+option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON)
+if (ENABLE_HDFS_STORAGE_VAULT)
+ target_link_libraries(hdfs_accessor_test ${TEST_LINK_LIBS})
+endif()
target_link_libraries(stopwatch_test ${TEST_LINK_LIBS})
diff --git a/run-cloud-ut.sh b/run-cloud-ut.sh
index 2aa3fa80a56..fe9ea3de61c 100755
--- a/run-cloud-ut.sh
+++ b/run-cloud-ut.sh
@@ -187,6 +187,7 @@ find . -name "*.gcda" -exec rm {} \;
-DMAKE_TEST=ON \
-DGLIBC_COMPATIBILITY="${GLIBC_COMPATIBILITY}" \
-DUSE_LIBCPP="${USE_LIBCPP}" \
+ -DENABLE_HDFS_STORAGE_VAULT=${ENABLE_HDFS_STORAGE_VAULT:-ON} \
-DUSE_MEM_TRACKER=ON \
-DUSE_JEMALLOC=OFF \
-DSTRICT_MEMORY_USE=OFF \
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]