This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7e61a85331 [refactor](libhdfs) introduce hadoop libhdfs (#18204) 7e61a85331 is described below commit 7e61a8533151d80a903d2452119b77bca59b43bc Author: Mingyu Chen <morning...@163.com> AuthorDate: Fri Mar 31 18:41:39 2023 +0800 [refactor](libhdfs) introduce hadoop libhdfs (#18204) 1. Introduce hadoop libhdfs 2. For Linux-X86 platform, use the hadoop libhdfs 3. For other platform, use libhdfs3, because currently we don't have hadoop libhdfs binary for other platform Co-authored-by: adonis0147 <adonis0...@gmail.com> --- be/CMakeLists.txt | 25 ++++++-- be/src/common/config.h | 2 - be/src/io/fs/err_utils.cpp | 12 +++- be/src/{util/hdfs_util.h => io/fs/hdfs.h} | 34 ++--------- be/src/io/fs/hdfs_file_reader.cpp | 7 ++- be/src/io/fs/hdfs_file_system.cpp | 7 +++ be/src/io/fs/hdfs_file_system.h | 4 +- be/src/io/hdfs_builder.cpp | 6 +- be/src/io/hdfs_builder.h | 8 ++- be/src/tools/meta_tool.cpp | 13 +++-- be/src/util/hdfs_util.h | 3 +- be/src/util/jni-util.cpp | 68 ++++++++++++++-------- be/src/util/jni-util.h | 5 ++ be/src/util/libjvm_loader.cpp | 9 +++ .../vec/exec/format/parquet/bool_rle_decoder.cpp | 3 + .../parquet/vparquet_column_chunk_reader.cpp | 3 +- bin/start_be.sh | 52 ++++++++++++++--- build.sh | 4 ++ conf/be.conf | 3 + .../org/apache/doris/clone/BeLoadRebalancer.java | 2 +- 20 files changed, 181 insertions(+), 89 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 8bb1100c51..cc0b6bbdc1 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -420,9 +420,6 @@ set_target_properties(k5crypto PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/li add_library(gssapi_krb5 STATIC IMPORTED) set_target_properties(gssapi_krb5 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libgssapi_krb5.a) -add_library(hdfs3 STATIC IMPORTED) -set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libhdfs3.a) - find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin) if (OS_MACOSX) @@ -762,12 +759,32 @@ set(COMMON_THIRDPARTY # put this after lz4 to avoid using lz4 lib in librdkafka librdkafka_cpp librdkafka - hdfs3 xml2 lzma simdjson ) +if (ARCH_AMD64 AND OS_LINUX) + add_library(hadoop_hdfs STATIC IMPORTED) + set_target_properties(hadoop_hdfs PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/hadoop_hdfs/native/libhdfs.a) + + set(COMMON_THIRDPARTY + ${COMMON_THIRDPARTY} + hadoop_hdfs + ) + add_definitions(-DUSE_HADOOP_HDFS) +else() + add_library(hdfs3 STATIC IMPORTED) + set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libhdfs3.a) + + # TODO: use arm hadoop hdfs to replace this + set(COMMON_THIRDPARTY + ${COMMON_THIRDPARTY} + hdfs3 + ) + add_definitions(-DUSE_LIBHDFS3) +endif() + if (absl_FOUND) set(COMMON_THIRDPARTY ${COMMON_THIRDPARTY} diff --git a/be/src/common/config.h b/be/src/common/config.h index b59d9d42e3..8f34155280 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -879,8 +879,6 @@ CONF_Int32(segcompaction_threshold_segment_num, "10"); // The segment whose row number above the threshold will be compacted during segcompaction CONF_Int32(segcompaction_small_threshold, "1048576"); -CONF_String(jvm_max_heap_size, "1024M"); - // enable java udf and jdbc scannode CONF_Bool(enable_java_support, "true"); diff --git a/be/src/io/fs/err_utils.cpp b/be/src/io/fs/err_utils.cpp index 1e788165e5..d01c7e7488 100644 --- a/be/src/io/fs/err_utils.cpp +++ b/be/src/io/fs/err_utils.cpp @@ -18,10 +18,11 @@ #include "io/fs/err_utils.h" #include <fmt/format.h> -#include <hdfs/hdfs.h> #include <sstream> +#include "io/fs/hdfs.h" + namespace doris { namespace io { @@ -37,8 +38,15 @@ std::string errcode_to_str(const std::error_code& ec) { std::string hdfs_error() { std::stringstream ss; char buf[1024]; - ss << "(" << errno << "), " << strerror_r(errno, buf, 1024); + ss << "(" << errno << "), " << strerror_r(errno, buf, 1024) << ")"; +#ifdef USE_HADOOP_HDFS + char* root_cause = hdfsGetLastExceptionRootCause(); + if (root_cause != nullptr) { + ss << ", reason: " << root_cause; + } +#else ss << ", reason: " << hdfsGetLastError(); +#endif return ss.str(); } diff --git a/be/src/util/hdfs_util.h b/be/src/io/fs/hdfs.h similarity index 59% copy from be/src/util/hdfs_util.h copy to be/src/io/fs/hdfs.h index f98bdd5ab3..eb9e1b2c07 100644 --- a/be/src/util/hdfs_util.h +++ b/be/src/io/fs/hdfs.h @@ -17,34 +17,8 @@ #pragma once +#ifdef USE_HADOOP_HDFS +#include <hadoop_hdfs/hdfs.h> +#else #include <hdfs/hdfs.h> - -#include <map> -#include <memory> -#include <string> - -#include "common/status.h" -#include "io/fs/path.h" -#include "io/hdfs_builder.h" - -namespace doris { -namespace io { - -class HDFSHandle { -public: - ~HDFSHandle() {} - - static HDFSHandle& instance(); - - hdfsFS create_hdfs_fs(HDFSCommonBuilder& builder); - -private: - HDFSHandle() {} -}; - -// if the format of path is hdfs://ip:port/path, replace it to /path. -// path like hdfs://ip:port/path can't be used by libhdfs3. -Path convert_path(const Path& path, const std::string& namenode); - -} // namespace io -} // namespace doris +#endif diff --git a/be/src/io/fs/hdfs_file_reader.cpp b/be/src/io/fs/hdfs_file_reader.cpp index 219410ac18..ddd035213b 100644 --- a/be/src/io/fs/hdfs_file_reader.cpp +++ b/be/src/io/fs/hdfs_file_reader.cpp @@ -17,9 +17,11 @@ #include "io/fs/hdfs_file_reader.h" +#include "io/fs/err_utils.h" #include "io/fs/hdfs_file_system.h" #include "service/backend_options.h" #include "util/doris_metrics.h" + namespace doris { namespace io { HdfsFileReader::HdfsFileReader(Path path, size_t file_size, const std::string& name_node, @@ -66,7 +68,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r int res = hdfsSeek(handle->hdfs_fs, _hdfs_file, offset); if (res != 0) { return Status::InternalError("Seek to offset failed. (BE: {}) offset={}, err: {}", - BackendOptions::get_localhost(), offset, hdfsGetLastError()); + BackendOptions::get_localhost(), offset, hdfs_error()); } size_t bytes_req = result.size; @@ -84,8 +86,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r if (loop_read < 0) { return Status::InternalError( "Read hdfs file failed. (BE: {}) namenode:{}, path:{}, err: {}", - BackendOptions::get_localhost(), _name_node, _path.string(), - hdfsGetLastError()); + BackendOptions::get_localhost(), _name_node, _path.string(), hdfs_error()); } if (loop_read == 0) { break; diff --git a/be/src/io/fs/hdfs_file_system.cpp b/be/src/io/fs/hdfs_file_system.cpp index 3eb5bea4c4..0d33bd30c9 100644 --- a/be/src/io/fs/hdfs_file_system.cpp +++ b/be/src/io/fs/hdfs_file_system.cpp @@ -68,6 +68,13 @@ private: Status HdfsFileSystem::create(const THdfsParams& hdfs_params, const std::string& path, std::shared_ptr<HdfsFileSystem>* fs) { +#ifdef USE_HADOOP_HDFS + if (!config::enable_java_support) { + return Status::InternalError( + "hdfs file system is not enabled, you can change be config enable_java_support to " + "true."); + } +#endif (*fs).reset(new HdfsFileSystem(hdfs_params, path)); return (*fs)->connect(); } diff --git a/be/src/io/fs/hdfs_file_system.h b/be/src/io/fs/hdfs_file_system.h index 75663bf198..9cec56b86c 100644 --- a/be/src/io/fs/hdfs_file_system.h +++ b/be/src/io/fs/hdfs_file_system.h @@ -18,13 +18,13 @@ #pragma once #include <gen_cpp/PlanNodes_types.h> -#include <hdfs/hdfs.h> #include <atomic> +#include "io/fs/hdfs.h" #include "io/fs/remote_file_system.h" -namespace doris { +namespace doris { namespace io { class HdfsFileSystemHandle { diff --git a/be/src/io/hdfs_builder.cpp b/be/src/io/hdfs_builder.cpp index b08b973860..8647a7450a 100644 --- a/be/src/io/hdfs_builder.cpp +++ b/be/src/io/hdfs_builder.cpp @@ -26,6 +26,7 @@ #include "util/string_util.h" #include "util/uid_util.h" #include "util/url_coding.h" + namespace doris { Status HDFSCommonBuilder::init_hdfs_builder() { @@ -35,6 +36,7 @@ Status HDFSCommonBuilder::init_hdfs_builder() { return Status::InternalError( "failed to init HDFSCommonBuilder, please check check be/conf/hdfs-site.xml"); } + hdfsBuilderSetForceNewInstance(hdfs_builder); return Status::OK(); } @@ -53,7 +55,10 @@ Status HDFSCommonBuilder::run_kinit() { if (!rc) { return Status::InternalError("Kinit failed, errMsg: " + msg); } +#ifdef USE_LIBHDFS3 + hdfsBuilderSetPrincipal(hdfs_builder, hdfs_kerberos_principal.c_str()); hdfsBuilderSetKerbTicketCachePath(hdfs_builder, ticket_path.c_str()); +#endif return Status::OK(); } @@ -100,7 +105,6 @@ Status createHDFSBuilder(const THdfsParams& hdfsParams, HDFSCommonBuilder* build if (hdfsParams.__isset.hdfs_kerberos_principal) { builder->need_kinit = true; builder->hdfs_kerberos_principal = hdfsParams.hdfs_kerberos_principal; - hdfsBuilderSetPrincipal(builder->get(), hdfsParams.hdfs_kerberos_principal.c_str()); } if (hdfsParams.__isset.hdfs_kerberos_keytab) { builder->need_kinit = true; diff --git a/be/src/io/hdfs_builder.h b/be/src/io/hdfs_builder.h index ecc08d5a71..7d448cb1cb 100644 --- a/be/src/io/hdfs_builder.h +++ b/be/src/io/hdfs_builder.h @@ -17,10 +17,9 @@ #pragma once -#include <hdfs/hdfs.h> - #include "common/status.h" #include "gen_cpp/PlanNodes_types.h" +#include "io/fs/hdfs.h" namespace doris { @@ -38,9 +37,12 @@ class HDFSCommonBuilder { public: HDFSCommonBuilder() {} ~HDFSCommonBuilder() { +#ifdef USE_LIBHDFS3 + // for hadoop hdfs, the hdfs_builder will be freed in hdfsConnect if (hdfs_builder != nullptr) { hdfsFreeBuilder(hdfs_builder); } +#endif } // Must call this to init hdfs_builder first. @@ -51,7 +53,7 @@ public: Status run_kinit(); private: - hdfsBuilder* hdfs_builder; + hdfsBuilder* hdfs_builder = nullptr; bool need_kinit {false}; std::string hdfs_kerberos_keytab; std::string hdfs_kerberos_principal; diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 02dffcd9c8..452402852f 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -142,7 +142,7 @@ void delete_meta(DataDir* data_dir) { Status init_data_dir(const std::string& dir, std::unique_ptr<DataDir>* ret) { std::string root_path; - RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(dir, &root_path)); + RETURN_IF_ERROR(doris::io::global_local_filesystem()->canonicalize(dir, &root_path)); doris::StorePath path; auto res = parse_root_path(root_path, &path); if (!res.ok()) { @@ -156,8 +156,8 @@ Status init_data_dir(const std::string& dir, std::unique_ptr<DataDir>* ret) { std::cout << "new data dir failed" << std::endl; return Status::InternalError("new data dir failed"); } - st = p->init(); - if (!st.ok()) { + res = p->init(); + if (!res.ok()) { std::cout << "data_dir load failed" << std::endl; return Status::InternalError("data_dir load failed"); } @@ -188,7 +188,7 @@ void batch_delete_meta(const std::string& tablet_file) { } // 1. get dir std::string dir; - Status st = io::global_local_filesystem()->canonicalize(v[0], &dir); + Status st = doris::io::global_local_filesystem()->canonicalize(v[0], &dir); if (!st.ok()) { std::cout << "invalid root dir in tablet_file: " << line << std::endl; err_num++; @@ -295,7 +295,7 @@ Status get_segment_footer(doris::io::FileReader* file_reader, SegmentFooterPB* f void show_segment_footer(const std::string& file_name) { doris::io::FileReaderSPtr file_reader; - Status st = doris::io::global_local_filesystem()->open_file(file_name, &file_reader); + Status status = doris::io::global_local_filesystem()->open_file(file_name, &file_reader); if (!status.ok()) { std::cout << "open file failed: " << status << std::endl; return; @@ -327,7 +327,8 @@ int main(int argc, char** argv) { show_meta(); } else if (FLAGS_operation == "batch_delete_meta") { std::string tablet_file; - Status st = io::global_local_filesystem()->canonicalize(FLAGS_tablet_file, &tablet_file); + Status st = + doris::io::global_local_filesystem()->canonicalize(FLAGS_tablet_file, &tablet_file); if (!st.ok()) { std::cout << "invalid tablet file: " << FLAGS_tablet_file << ", error: " << st.to_string() << std::endl; diff --git a/be/src/util/hdfs_util.h b/be/src/util/hdfs_util.h index f98bdd5ab3..2e56181df7 100644 --- a/be/src/util/hdfs_util.h +++ b/be/src/util/hdfs_util.h @@ -17,13 +17,12 @@ #pragma once -#include <hdfs/hdfs.h> - #include <map> #include <memory> #include <string> #include "common/status.h" +#include "io/fs/hdfs.h" #include "io/fs/path.h" #include "io/hdfs_builder.h" diff --git a/be/src/util/jni-util.cpp b/be/src/util/jni-util.cpp index e7a6eb8b8b..955efb78e7 100644 --- a/be/src/util/jni-util.cpp +++ b/be/src/util/jni-util.cpp @@ -25,11 +25,14 @@ #include <filesystem> #include <mutex> #include <sstream> +#include <string> +#include <vector> #include "common/config.h" #include "gutil/strings/substitute.h" -#include "jni_native_method.h" -#include "libjvm_loader.h" +#include "util/defer_op.h" +#include "util/jni_native_method.h" +#include "util/libjvm_loader.h" using std::string; @@ -37,10 +40,10 @@ namespace doris { namespace { JavaVM* g_vm; -std::once_flag g_vm_once; +[[maybe_unused]] std::once_flag g_vm_once; const std::string GetDorisJNIClasspath() { - const auto* classpath = getenv("DORIS_JNI_CLASSPATH_PARAMETER"); + const auto* classpath = getenv("DORIS_CLASSPATH"); if (classpath) { return classpath; } else { @@ -66,37 +69,47 @@ const std::string GetDorisJNIClasspath() { } } -void FindOrCreateJavaVM() { +// Only used on non-x86 platform +[[maybe_unused]] void FindOrCreateJavaVM() { int num_vms; - int rv = LibJVMLoader::JNI_GetCreatedJavaVMs(&g_vm, 1, &num_vms); + int rv = JNI_GetCreatedJavaVMs(&g_vm, 1, &num_vms); if (rv == 0) { - auto classpath = GetDorisJNIClasspath(); - std::string heap_size = fmt::format("-Xmx{}", config::jvm_max_heap_size); - std::string log_path = fmt::format("-DlogPath={}/log/udf-jdbc.log", getenv("DORIS_HOME")); - std::string jvm_name = fmt::format("-Dsun.java.command={}", "DorisBE"); - - JavaVMOption options[] = { - {const_cast<char*>(classpath.c_str()), nullptr}, - {const_cast<char*>(heap_size.c_str()), nullptr}, - {const_cast<char*>(log_path.c_str()), nullptr}, - {const_cast<char*>(jvm_name.c_str()), nullptr}, + std::vector<std::string> options; + + char* java_opts = getenv("JAVA_OPTS"); + if (java_opts == nullptr) { + options = { + GetDorisJNIClasspath(), fmt::format("-Xmx{}", "1g"), + fmt::format("-DlogPath={}/log/jni.log", getenv("DORIS_HOME")), + fmt::format("-Dsun.java.command={}", "DorisBE"), "-XX:-CriticalJNINatives", #ifdef __APPLE__ - // On macOS, we should disable MaxFDLimit, otherwise the RLIMIT_NOFILE - // will be assigned the minimum of OPEN_MAX (10240) and rlim_cur (See src/hotspot/os/bsd/os_bsd.cpp) - // and it can not pass the check performed by storage engine. - // The newer JDK has fixed this issue. - {const_cast<char*>("-XX:-MaxFDLimit"), nullptr}, + // On macOS, we should disable MaxFDLimit, otherwise the RLIMIT_NOFILE + // will be assigned the minimum of OPEN_MAX (10240) and rlim_cur (See src/hotspot/os/bsd/os_bsd.cpp) + // and it can not pass the check performed by storage engine. + // The newer JDK has fixed this issue. + "-XX:-MaxFDLimit" #endif - }; + }; + } else { + std::istringstream stream(java_opts); + options = std::vector<std::string>(std::istream_iterator<std::string> {stream}, + std::istream_iterator<std::string>()); + options.push_back(GetDorisJNIClasspath()); + } + std::unique_ptr<JavaVMOption[]> jvm_options(new JavaVMOption[options.size()]); + for (int i = 0; i < options.size(); ++i) { + jvm_options[i] = {const_cast<char*>(options[i].c_str()), nullptr}; + } + JNIEnv* env; JavaVMInitArgs vm_args; vm_args.version = JNI_VERSION_1_8; - vm_args.options = options; - vm_args.nOptions = sizeof(options) / sizeof(JavaVMOption); + vm_args.options = jvm_options.get(); + vm_args.nOptions = options.size(); // Set it to JNI_FALSE because JNI_TRUE will let JVM ignore the max size config. vm_args.ignoreUnrecognized = JNI_FALSE; - jint res = LibJVMLoader::JNI_CreateJavaVM(&g_vm, (void**)&env, &vm_args); + jint res = JNI_CreateJavaVM(&g_vm, (void**)&env, &vm_args); if (JNI_OK != res) { DCHECK(false) << "Failed to create JVM, code= " << res; } @@ -152,6 +165,7 @@ Status JniLocalFrame::push(JNIEnv* env, int max_local_ref) { Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) { DCHECK(!tls_env_) << "Call GetJNIEnv() fast path"; +#ifdef USE_LIBHDFS3 std::call_once(g_vm_once, FindOrCreateJavaVM); int rc = g_vm->GetEnv(reinterpret_cast<void**>(&tls_env_), JNI_VERSION_1_8); if (rc == JNI_EDETACHED) { @@ -160,6 +174,10 @@ Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) { if (rc != 0 || tls_env_ == nullptr) { return Status::InternalError("Unable to get JVM: {}", rc); } +#else + // the hadoop libhdfs will do all the stuff + tls_env_ = getJNIEnv(); +#endif *env = tls_env_; return Status::OK(); } diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h index 5aa8be9a1f..ec5f6abf6e 100644 --- a/be/src/util/jni-util.h +++ b/be/src/util/jni-util.h @@ -23,6 +23,11 @@ #include "gutil/macros.h" #include "util/thrift_util.h" +#ifdef USE_HADOOP_HDFS +// defined in hadoop_hdfs/hdfs.h +extern "C" JNIEnv* getJNIEnv(void); +#endif + namespace doris { #define RETURN_ERROR_IF_EXC(env) \ diff --git a/be/src/util/libjvm_loader.cpp b/be/src/util/libjvm_loader.cpp index 127d28c2de..6175da6081 100644 --- a/be/src/util/libjvm_loader.cpp +++ b/be/src/util/libjvm_loader.cpp @@ -25,6 +25,15 @@ #include "common/status.h" +_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_GetCreatedJavaVMs(JavaVM** vm_buf, jsize bufLen, + jsize* numVMs) { + return doris::LibJVMLoader::JNI_GetCreatedJavaVMs(vm_buf, bufLen, numVMs); +} + +_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_CreateJavaVM(JavaVM** pvm, void** penv, void* args) { + return doris::LibJVMLoader::JNI_CreateJavaVM(pvm, penv, args); +} + namespace { #ifndef __APPLE__ diff --git a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp index 0856687bbf..c954f98b25 100644 --- a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp +++ b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp @@ -24,6 +24,7 @@ void BoolRLEDecoder::set_data(Slice* slice) { _data = slice; _num_bytes = slice->size; _offset = 0; + if (_num_bytes < 4) { LOG(FATAL) << "Received invalid length : " + std::to_string(_num_bytes) + " (corrupt data page?)"; @@ -62,6 +63,8 @@ Status BoolRLEDecoder::decode_values(MutableColumnPtr& doris_column, DataTypePtr case ColumnSelectVector::CONTENT: { bool value; // Can't use uint8_t directly, we should correct it. for (size_t i = 0; i < run_length; ++i) { + DCHECK(_current_value_idx < max_values) + << _current_value_idx << " vs. " << max_values; value = _values[_current_value_idx++]; column_data[data_index++] = (UInt8)value; } diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp index b74d9c3db0..b08e316c22 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp @@ -233,7 +233,8 @@ void ColumnChunkReader::_reserve_decompress_buf(size_t size) { Status ColumnChunkReader::skip_values(size_t num_values, bool skip_data) { if (UNLIKELY(_remaining_num_values < num_values)) { - return Status::IOError("Skip too many values in current page"); + return Status::IOError("Skip too many values in current page. {} vs. {}", + _remaining_num_values, num_values); } _remaining_num_values -= num_values; if (skip_data) { diff --git a/bin/start_be.sh b/bin/start_be.sh index 7204d65114..bbaea90c02 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -20,6 +20,7 @@ set -eo pipefail curdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +MACHINE_OS=$(uname -s) if [[ "$(uname -s)" == 'Darwin' ]] && command -v brew &>/dev/null; then PATH="$(brew --prefix)/opt/gnu-getopt/bin:${PATH}" export PATH @@ -70,16 +71,36 @@ if [[ "$(uname -s)" != 'Darwin' ]]; then fi fi -# add libs to CLASSPATH +# add java libs for f in "${DORIS_HOME}/lib"/*.jar; do - if [[ -z "${DORIS_JNI_CLASSPATH_PARAMETER}" ]]; then - export DORIS_JNI_CLASSPATH_PARAMETER="${f}" + if [[ -z "${DORIS_CLASSPATH}" ]]; then + export DORIS_CLASSPATH="${f}" else - export DORIS_JNI_CLASSPATH_PARAMETER="${f}:${DORIS_JNI_CLASSPATH_PARAMETER}" + export DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" fi done -# DORIS_JNI_CLASSPATH_PARAMETER is used to configure additional jar path to jvm. e.g. -Djava.class.path=$DORIS_HOME/lib/java-udf.jar -export DORIS_JNI_CLASSPATH_PARAMETER="-Djava.class.path=${DORIS_JNI_CLASSPATH_PARAMETER}" + +if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then + # add hadoop libs + for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do + DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do + DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do + DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do + DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}" + done +fi + +# the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs +# and conf/ dir so that hadoop libhdfs can read .xml config file in conf/ +export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}" +# DORIS_CLASSPATH is for self-managed jni +export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}" jdk_version() { local java_cmd="${1}" @@ -230,11 +251,28 @@ set_tcmalloc_heap_limit() { # set_tcmalloc_heap_limit || exit 1 -## set hdfs conf +## set hdfs3 conf if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml" fi +if [[ -z ${JAVA_OPTS} ]]; then + # set default JAVA_OPTS + CUR_DATE=$(date +%Y%m%d-%H%M%S) + JAVA_OPTS="-Xmx1024m -DlogPath=${DORIS_HOME}/log/jni.log -Xloggc:${DORIS_HOME}/log/be.gc.log.${CUR_DATE} -Dsun.java.command=DorisBE -XX:-CriticalJNINatives" +fi + +if [[ "${MACHINE_OS}" == "Darwin" ]]; then + JAVA_OPTS="${JAVA_OPTS} -XX:-MaxFDLimit" +fi + +# set LIBHDFS_OPTS for hadoop libhdfs +export LIBHDFS_OPTS="${JAVA_OPTS}" + +#echo "CLASSPATH: ${CLASSPATH}" +#echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" +#echo "LIBHDFS_OPTS: ${LIBHDFS_OPTS}" + # see https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof_prefix:jeprof.out" diff --git a/build.sh b/build.sh index 8c5f18fa9a..e89d71ca72 100755 --- a/build.sh +++ b/build.sh @@ -547,6 +547,10 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/ cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/ + if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then + cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" "${DORIS_OUTPUT}/be/lib/" + fi + if [[ "${DISABLE_JAVA_UDF_IN_CONF}" -eq 1 ]]; then echo -e "\033[33;1mWARNNING: \033[37;1mDisable Java UDF support in be.conf due to the BE was built without Java UDF.\033[0m" cat >>"${DORIS_OUTPUT}/be/conf/be.conf" <<EOF diff --git a/conf/be.conf b/conf/be.conf index 30eee9e088..cc1b8f6c59 100644 --- a/conf/be.conf +++ b/conf/be.conf @@ -17,6 +17,9 @@ PPROF_TMPDIR="$DORIS_HOME/log/" +DATE = `date +%Y%m%d-%H%M%S` +JAVA_OPTS="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Dsun.java.command=DorisBE -XX:-CriticalJNINatives" + # since 1.2, the JAVA_HOME need to be set to run BE process. # JAVA_HOME=/path/to/jdk/ diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java index 1f5aa9bcd8..67b9a16d8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java @@ -271,7 +271,7 @@ public class BeLoadRebalancer extends Rebalancer { if (lowBackend == null) { continue; } - if (hosts.contains(lowBackend.getIp())) { + if (!Config.allow_replica_on_same_host && hosts.contains(lowBackend.getIp())) { continue; } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org