This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7e61a85331 [refactor](libhdfs) introduce hadoop libhdfs (#18204)
7e61a85331 is described below

commit 7e61a8533151d80a903d2452119b77bca59b43bc
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Fri Mar 31 18:41:39 2023 +0800

    [refactor](libhdfs) introduce hadoop libhdfs (#18204)
    
    1. Introduce hadoop libhdfs
    2. For Linux-X86 platform, use the hadoop libhdfs
    3. For other platform, use libhdfs3, because currently we don't have  
hadoop libhdfs binary for other platform
    
    Co-authored-by: adonis0147 <adonis0...@gmail.com>
---
 be/CMakeLists.txt                                  | 25 ++++++--
 be/src/common/config.h                             |  2 -
 be/src/io/fs/err_utils.cpp                         | 12 +++-
 be/src/{util/hdfs_util.h => io/fs/hdfs.h}          | 34 ++---------
 be/src/io/fs/hdfs_file_reader.cpp                  |  7 ++-
 be/src/io/fs/hdfs_file_system.cpp                  |  7 +++
 be/src/io/fs/hdfs_file_system.h                    |  4 +-
 be/src/io/hdfs_builder.cpp                         |  6 +-
 be/src/io/hdfs_builder.h                           |  8 ++-
 be/src/tools/meta_tool.cpp                         | 13 +++--
 be/src/util/hdfs_util.h                            |  3 +-
 be/src/util/jni-util.cpp                           | 68 ++++++++++++++--------
 be/src/util/jni-util.h                             |  5 ++
 be/src/util/libjvm_loader.cpp                      |  9 +++
 .../vec/exec/format/parquet/bool_rle_decoder.cpp   |  3 +
 .../parquet/vparquet_column_chunk_reader.cpp       |  3 +-
 bin/start_be.sh                                    | 52 ++++++++++++++---
 build.sh                                           |  4 ++
 conf/be.conf                                       |  3 +
 .../org/apache/doris/clone/BeLoadRebalancer.java   |  2 +-
 20 files changed, 181 insertions(+), 89 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 8bb1100c51..cc0b6bbdc1 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -420,9 +420,6 @@ set_target_properties(k5crypto PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/li
 add_library(gssapi_krb5 STATIC IMPORTED)
 set_target_properties(gssapi_krb5 PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libgssapi_krb5.a)
 
-add_library(hdfs3 STATIC IMPORTED)
-set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libhdfs3.a)
-
 find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin)
 
 if (OS_MACOSX)
@@ -762,12 +759,32 @@ set(COMMON_THIRDPARTY
     # put this after lz4 to avoid using lz4 lib in librdkafka
     librdkafka_cpp
     librdkafka
-    hdfs3
     xml2
     lzma
     simdjson
 )
 
+if (ARCH_AMD64 AND OS_LINUX)
+    add_library(hadoop_hdfs STATIC IMPORTED)
+    set_target_properties(hadoop_hdfs PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/hadoop_hdfs/native/libhdfs.a)
+
+    set(COMMON_THIRDPARTY
+        ${COMMON_THIRDPARTY}
+        hadoop_hdfs
+    )
+    add_definitions(-DUSE_HADOOP_HDFS)
+else()
+    add_library(hdfs3 STATIC IMPORTED)
+    set_target_properties(hdfs3 PROPERTIES IMPORTED_LOCATION 
${THIRDPARTY_DIR}/lib/libhdfs3.a)
+
+    # TODO: use arm hadoop hdfs to replace this
+    set(COMMON_THIRDPARTY
+        ${COMMON_THIRDPARTY}
+        hdfs3
+    )
+    add_definitions(-DUSE_LIBHDFS3)
+endif()
+
 if (absl_FOUND)
     set(COMMON_THIRDPARTY
         ${COMMON_THIRDPARTY}
diff --git a/be/src/common/config.h b/be/src/common/config.h
index b59d9d42e3..8f34155280 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -879,8 +879,6 @@ CONF_Int32(segcompaction_threshold_segment_num, "10");
 // The segment whose row number above the threshold will be compacted during 
segcompaction
 CONF_Int32(segcompaction_small_threshold, "1048576");
 
-CONF_String(jvm_max_heap_size, "1024M");
-
 // enable java udf and jdbc scannode
 CONF_Bool(enable_java_support, "true");
 
diff --git a/be/src/io/fs/err_utils.cpp b/be/src/io/fs/err_utils.cpp
index 1e788165e5..d01c7e7488 100644
--- a/be/src/io/fs/err_utils.cpp
+++ b/be/src/io/fs/err_utils.cpp
@@ -18,10 +18,11 @@
 #include "io/fs/err_utils.h"
 
 #include <fmt/format.h>
-#include <hdfs/hdfs.h>
 
 #include <sstream>
 
+#include "io/fs/hdfs.h"
+
 namespace doris {
 namespace io {
 
@@ -37,8 +38,15 @@ std::string errcode_to_str(const std::error_code& ec) {
 std::string hdfs_error() {
     std::stringstream ss;
     char buf[1024];
-    ss << "(" << errno << "), " << strerror_r(errno, buf, 1024);
+    ss << "(" << errno << "), " << strerror_r(errno, buf, 1024) << ")";
+#ifdef USE_HADOOP_HDFS
+    char* root_cause = hdfsGetLastExceptionRootCause();
+    if (root_cause != nullptr) {
+        ss << ", reason: " << root_cause;
+    }
+#else
     ss << ", reason: " << hdfsGetLastError();
+#endif
     return ss.str();
 }
 
diff --git a/be/src/util/hdfs_util.h b/be/src/io/fs/hdfs.h
similarity index 59%
copy from be/src/util/hdfs_util.h
copy to be/src/io/fs/hdfs.h
index f98bdd5ab3..eb9e1b2c07 100644
--- a/be/src/util/hdfs_util.h
+++ b/be/src/io/fs/hdfs.h
@@ -17,34 +17,8 @@
 
 #pragma once
 
+#ifdef USE_HADOOP_HDFS
+#include <hadoop_hdfs/hdfs.h>
+#else
 #include <hdfs/hdfs.h>
-
-#include <map>
-#include <memory>
-#include <string>
-
-#include "common/status.h"
-#include "io/fs/path.h"
-#include "io/hdfs_builder.h"
-
-namespace doris {
-namespace io {
-
-class HDFSHandle {
-public:
-    ~HDFSHandle() {}
-
-    static HDFSHandle& instance();
-
-    hdfsFS create_hdfs_fs(HDFSCommonBuilder& builder);
-
-private:
-    HDFSHandle() {}
-};
-
-// if the format of path is hdfs://ip:port/path, replace it to /path.
-// path like hdfs://ip:port/path can't be used by libhdfs3.
-Path convert_path(const Path& path, const std::string& namenode);
-
-} // namespace io
-} // namespace doris
+#endif
diff --git a/be/src/io/fs/hdfs_file_reader.cpp 
b/be/src/io/fs/hdfs_file_reader.cpp
index 219410ac18..ddd035213b 100644
--- a/be/src/io/fs/hdfs_file_reader.cpp
+++ b/be/src/io/fs/hdfs_file_reader.cpp
@@ -17,9 +17,11 @@
 
 #include "io/fs/hdfs_file_reader.h"
 
+#include "io/fs/err_utils.h"
 #include "io/fs/hdfs_file_system.h"
 #include "service/backend_options.h"
 #include "util/doris_metrics.h"
+
 namespace doris {
 namespace io {
 HdfsFileReader::HdfsFileReader(Path path, size_t file_size, const std::string& 
name_node,
@@ -66,7 +68,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice 
result, size_t* bytes_r
     int res = hdfsSeek(handle->hdfs_fs, _hdfs_file, offset);
     if (res != 0) {
         return Status::InternalError("Seek to offset failed. (BE: {}) 
offset={}, err: {}",
-                                     BackendOptions::get_localhost(), offset, 
hdfsGetLastError());
+                                     BackendOptions::get_localhost(), offset, 
hdfs_error());
     }
 
     size_t bytes_req = result.size;
@@ -84,8 +86,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice 
result, size_t* bytes_r
         if (loop_read < 0) {
             return Status::InternalError(
                     "Read hdfs file failed. (BE: {}) namenode:{}, path:{}, 
err: {}",
-                    BackendOptions::get_localhost(), _name_node, 
_path.string(),
-                    hdfsGetLastError());
+                    BackendOptions::get_localhost(), _name_node, 
_path.string(), hdfs_error());
         }
         if (loop_read == 0) {
             break;
diff --git a/be/src/io/fs/hdfs_file_system.cpp 
b/be/src/io/fs/hdfs_file_system.cpp
index 3eb5bea4c4..0d33bd30c9 100644
--- a/be/src/io/fs/hdfs_file_system.cpp
+++ b/be/src/io/fs/hdfs_file_system.cpp
@@ -68,6 +68,13 @@ private:
 
 Status HdfsFileSystem::create(const THdfsParams& hdfs_params, const 
std::string& path,
                               std::shared_ptr<HdfsFileSystem>* fs) {
+#ifdef USE_HADOOP_HDFS
+    if (!config::enable_java_support) {
+        return Status::InternalError(
+                "hdfs file system is not enabled, you can change be config 
enable_java_support to "
+                "true.");
+    }
+#endif
     (*fs).reset(new HdfsFileSystem(hdfs_params, path));
     return (*fs)->connect();
 }
diff --git a/be/src/io/fs/hdfs_file_system.h b/be/src/io/fs/hdfs_file_system.h
index 75663bf198..9cec56b86c 100644
--- a/be/src/io/fs/hdfs_file_system.h
+++ b/be/src/io/fs/hdfs_file_system.h
@@ -18,13 +18,13 @@
 #pragma once
 
 #include <gen_cpp/PlanNodes_types.h>
-#include <hdfs/hdfs.h>
 
 #include <atomic>
 
+#include "io/fs/hdfs.h"
 #include "io/fs/remote_file_system.h"
-namespace doris {
 
+namespace doris {
 namespace io {
 
 class HdfsFileSystemHandle {
diff --git a/be/src/io/hdfs_builder.cpp b/be/src/io/hdfs_builder.cpp
index b08b973860..8647a7450a 100644
--- a/be/src/io/hdfs_builder.cpp
+++ b/be/src/io/hdfs_builder.cpp
@@ -26,6 +26,7 @@
 #include "util/string_util.h"
 #include "util/uid_util.h"
 #include "util/url_coding.h"
+
 namespace doris {
 
 Status HDFSCommonBuilder::init_hdfs_builder() {
@@ -35,6 +36,7 @@ Status HDFSCommonBuilder::init_hdfs_builder() {
         return Status::InternalError(
                 "failed to init HDFSCommonBuilder, please check check 
be/conf/hdfs-site.xml");
     }
+    hdfsBuilderSetForceNewInstance(hdfs_builder);
     return Status::OK();
 }
 
@@ -53,7 +55,10 @@ Status HDFSCommonBuilder::run_kinit() {
     if (!rc) {
         return Status::InternalError("Kinit failed, errMsg: " + msg);
     }
+#ifdef USE_LIBHDFS3
+    hdfsBuilderSetPrincipal(hdfs_builder, hdfs_kerberos_principal.c_str());
     hdfsBuilderSetKerbTicketCachePath(hdfs_builder, ticket_path.c_str());
+#endif
     return Status::OK();
 }
 
@@ -100,7 +105,6 @@ Status createHDFSBuilder(const THdfsParams& hdfsParams, 
HDFSCommonBuilder* build
     if (hdfsParams.__isset.hdfs_kerberos_principal) {
         builder->need_kinit = true;
         builder->hdfs_kerberos_principal = hdfsParams.hdfs_kerberos_principal;
-        hdfsBuilderSetPrincipal(builder->get(), 
hdfsParams.hdfs_kerberos_principal.c_str());
     }
     if (hdfsParams.__isset.hdfs_kerberos_keytab) {
         builder->need_kinit = true;
diff --git a/be/src/io/hdfs_builder.h b/be/src/io/hdfs_builder.h
index ecc08d5a71..7d448cb1cb 100644
--- a/be/src/io/hdfs_builder.h
+++ b/be/src/io/hdfs_builder.h
@@ -17,10 +17,9 @@
 
 #pragma once
 
-#include <hdfs/hdfs.h>
-
 #include "common/status.h"
 #include "gen_cpp/PlanNodes_types.h"
+#include "io/fs/hdfs.h"
 
 namespace doris {
 
@@ -38,9 +37,12 @@ class HDFSCommonBuilder {
 public:
     HDFSCommonBuilder() {}
     ~HDFSCommonBuilder() {
+#ifdef USE_LIBHDFS3
+        // for hadoop hdfs, the hdfs_builder will be freed in hdfsConnect
         if (hdfs_builder != nullptr) {
             hdfsFreeBuilder(hdfs_builder);
         }
+#endif
     }
 
     // Must call this to init hdfs_builder first.
@@ -51,7 +53,7 @@ public:
     Status run_kinit();
 
 private:
-    hdfsBuilder* hdfs_builder;
+    hdfsBuilder* hdfs_builder = nullptr;
     bool need_kinit {false};
     std::string hdfs_kerberos_keytab;
     std::string hdfs_kerberos_principal;
diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp
index 02dffcd9c8..452402852f 100644
--- a/be/src/tools/meta_tool.cpp
+++ b/be/src/tools/meta_tool.cpp
@@ -142,7 +142,7 @@ void delete_meta(DataDir* data_dir) {
 
 Status init_data_dir(const std::string& dir, std::unique_ptr<DataDir>* ret) {
     std::string root_path;
-    RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(dir, 
&root_path));
+    RETURN_IF_ERROR(doris::io::global_local_filesystem()->canonicalize(dir, 
&root_path));
     doris::StorePath path;
     auto res = parse_root_path(root_path, &path);
     if (!res.ok()) {
@@ -156,8 +156,8 @@ Status init_data_dir(const std::string& dir, 
std::unique_ptr<DataDir>* ret) {
         std::cout << "new data dir failed" << std::endl;
         return Status::InternalError("new data dir failed");
     }
-    st = p->init();
-    if (!st.ok()) {
+    res = p->init();
+    if (!res.ok()) {
         std::cout << "data_dir load failed" << std::endl;
         return Status::InternalError("data_dir load failed");
     }
@@ -188,7 +188,7 @@ void batch_delete_meta(const std::string& tablet_file) {
         }
         // 1. get dir
         std::string dir;
-        Status st = io::global_local_filesystem()->canonicalize(v[0], &dir);
+        Status st = doris::io::global_local_filesystem()->canonicalize(v[0], 
&dir);
         if (!st.ok()) {
             std::cout << "invalid root dir in tablet_file: " << line << 
std::endl;
             err_num++;
@@ -295,7 +295,7 @@ Status get_segment_footer(doris::io::FileReader* 
file_reader, SegmentFooterPB* f
 
 void show_segment_footer(const std::string& file_name) {
     doris::io::FileReaderSPtr file_reader;
-    Status st = doris::io::global_local_filesystem()->open_file(file_name, 
&file_reader);
+    Status status = doris::io::global_local_filesystem()->open_file(file_name, 
&file_reader);
     if (!status.ok()) {
         std::cout << "open file failed: " << status << std::endl;
         return;
@@ -327,7 +327,8 @@ int main(int argc, char** argv) {
         show_meta();
     } else if (FLAGS_operation == "batch_delete_meta") {
         std::string tablet_file;
-        Status st = 
io::global_local_filesystem()->canonicalize(FLAGS_tablet_file, &tablet_file);
+        Status st =
+                
doris::io::global_local_filesystem()->canonicalize(FLAGS_tablet_file, 
&tablet_file);
         if (!st.ok()) {
             std::cout << "invalid tablet file: " << FLAGS_tablet_file
                       << ", error: " << st.to_string() << std::endl;
diff --git a/be/src/util/hdfs_util.h b/be/src/util/hdfs_util.h
index f98bdd5ab3..2e56181df7 100644
--- a/be/src/util/hdfs_util.h
+++ b/be/src/util/hdfs_util.h
@@ -17,13 +17,12 @@
 
 #pragma once
 
-#include <hdfs/hdfs.h>
-
 #include <map>
 #include <memory>
 #include <string>
 
 #include "common/status.h"
+#include "io/fs/hdfs.h"
 #include "io/fs/path.h"
 #include "io/hdfs_builder.h"
 
diff --git a/be/src/util/jni-util.cpp b/be/src/util/jni-util.cpp
index e7a6eb8b8b..955efb78e7 100644
--- a/be/src/util/jni-util.cpp
+++ b/be/src/util/jni-util.cpp
@@ -25,11 +25,14 @@
 #include <filesystem>
 #include <mutex>
 #include <sstream>
+#include <string>
+#include <vector>
 
 #include "common/config.h"
 #include "gutil/strings/substitute.h"
-#include "jni_native_method.h"
-#include "libjvm_loader.h"
+#include "util/defer_op.h"
+#include "util/jni_native_method.h"
+#include "util/libjvm_loader.h"
 
 using std::string;
 
@@ -37,10 +40,10 @@ namespace doris {
 
 namespace {
 JavaVM* g_vm;
-std::once_flag g_vm_once;
+[[maybe_unused]] std::once_flag g_vm_once;
 
 const std::string GetDorisJNIClasspath() {
-    const auto* classpath = getenv("DORIS_JNI_CLASSPATH_PARAMETER");
+    const auto* classpath = getenv("DORIS_CLASSPATH");
     if (classpath) {
         return classpath;
     } else {
@@ -66,37 +69,47 @@ const std::string GetDorisJNIClasspath() {
     }
 }
 
-void FindOrCreateJavaVM() {
+// Only used on non-x86 platform
+[[maybe_unused]] void FindOrCreateJavaVM() {
     int num_vms;
-    int rv = LibJVMLoader::JNI_GetCreatedJavaVMs(&g_vm, 1, &num_vms);
+    int rv = JNI_GetCreatedJavaVMs(&g_vm, 1, &num_vms);
     if (rv == 0) {
-        auto classpath = GetDorisJNIClasspath();
-        std::string heap_size = fmt::format("-Xmx{}", 
config::jvm_max_heap_size);
-        std::string log_path = fmt::format("-DlogPath={}/log/udf-jdbc.log", 
getenv("DORIS_HOME"));
-        std::string jvm_name = fmt::format("-Dsun.java.command={}", "DorisBE");
-
-        JavaVMOption options[] = {
-                {const_cast<char*>(classpath.c_str()), nullptr},
-                {const_cast<char*>(heap_size.c_str()), nullptr},
-                {const_cast<char*>(log_path.c_str()), nullptr},
-                {const_cast<char*>(jvm_name.c_str()), nullptr},
+        std::vector<std::string> options;
+
+        char* java_opts = getenv("JAVA_OPTS");
+        if (java_opts == nullptr) {
+            options = {
+                    GetDorisJNIClasspath(), fmt::format("-Xmx{}", "1g"),
+                    fmt::format("-DlogPath={}/log/jni.log", 
getenv("DORIS_HOME")),
+                    fmt::format("-Dsun.java.command={}", "DorisBE"), 
"-XX:-CriticalJNINatives",
 #ifdef __APPLE__
-                // On macOS, we should disable MaxFDLimit, otherwise the 
RLIMIT_NOFILE
-                // will be assigned the minimum of OPEN_MAX (10240) and 
rlim_cur (See src/hotspot/os/bsd/os_bsd.cpp)
-                // and it can not pass the check performed by storage engine.
-                // The newer JDK has fixed this issue.
-                {const_cast<char*>("-XX:-MaxFDLimit"), nullptr},
+                    // On macOS, we should disable MaxFDLimit, otherwise the 
RLIMIT_NOFILE
+                    // will be assigned the minimum of OPEN_MAX (10240) and 
rlim_cur (See src/hotspot/os/bsd/os_bsd.cpp)
+                    // and it can not pass the check performed by storage 
engine.
+                    // The newer JDK has fixed this issue.
+                    "-XX:-MaxFDLimit"
 #endif
-        };
+            };
+        } else {
+            std::istringstream stream(java_opts);
+            options = 
std::vector<std::string>(std::istream_iterator<std::string> {stream},
+                                               
std::istream_iterator<std::string>());
+            options.push_back(GetDorisJNIClasspath());
+        }
+        std::unique_ptr<JavaVMOption[]> jvm_options(new 
JavaVMOption[options.size()]);
+        for (int i = 0; i < options.size(); ++i) {
+            jvm_options[i] = {const_cast<char*>(options[i].c_str()), nullptr};
+        }
+
         JNIEnv* env;
         JavaVMInitArgs vm_args;
         vm_args.version = JNI_VERSION_1_8;
-        vm_args.options = options;
-        vm_args.nOptions = sizeof(options) / sizeof(JavaVMOption);
+        vm_args.options = jvm_options.get();
+        vm_args.nOptions = options.size();
         // Set it to JNI_FALSE because JNI_TRUE will let JVM ignore the max 
size config.
         vm_args.ignoreUnrecognized = JNI_FALSE;
 
-        jint res = LibJVMLoader::JNI_CreateJavaVM(&g_vm, (void**)&env, 
&vm_args);
+        jint res = JNI_CreateJavaVM(&g_vm, (void**)&env, &vm_args);
         if (JNI_OK != res) {
             DCHECK(false) << "Failed to create JVM, code= " << res;
         }
@@ -152,6 +165,7 @@ Status JniLocalFrame::push(JNIEnv* env, int max_local_ref) {
 Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) {
     DCHECK(!tls_env_) << "Call GetJNIEnv() fast path";
 
+#ifdef USE_LIBHDFS3
     std::call_once(g_vm_once, FindOrCreateJavaVM);
     int rc = g_vm->GetEnv(reinterpret_cast<void**>(&tls_env_), 
JNI_VERSION_1_8);
     if (rc == JNI_EDETACHED) {
@@ -160,6 +174,10 @@ Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) {
     if (rc != 0 || tls_env_ == nullptr) {
         return Status::InternalError("Unable to get JVM: {}", rc);
     }
+#else
+    // the hadoop libhdfs will do all the stuff
+    tls_env_ = getJNIEnv();
+#endif
     *env = tls_env_;
     return Status::OK();
 }
diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h
index 5aa8be9a1f..ec5f6abf6e 100644
--- a/be/src/util/jni-util.h
+++ b/be/src/util/jni-util.h
@@ -23,6 +23,11 @@
 #include "gutil/macros.h"
 #include "util/thrift_util.h"
 
+#ifdef USE_HADOOP_HDFS
+// defined in hadoop_hdfs/hdfs.h
+extern "C" JNIEnv* getJNIEnv(void);
+#endif
+
 namespace doris {
 
 #define RETURN_ERROR_IF_EXC(env)                                     \
diff --git a/be/src/util/libjvm_loader.cpp b/be/src/util/libjvm_loader.cpp
index 127d28c2de..6175da6081 100644
--- a/be/src/util/libjvm_loader.cpp
+++ b/be/src/util/libjvm_loader.cpp
@@ -25,6 +25,15 @@
 
 #include "common/status.h"
 
+_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_GetCreatedJavaVMs(JavaVM** vm_buf, 
jsize bufLen,
+                                                          jsize* numVMs) {
+    return doris::LibJVMLoader::JNI_GetCreatedJavaVMs(vm_buf, bufLen, numVMs);
+}
+
+_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_CreateJavaVM(JavaVM** pvm, void** 
penv, void* args) {
+    return doris::LibJVMLoader::JNI_CreateJavaVM(pvm, penv, args);
+}
+
 namespace {
 
 #ifndef __APPLE__
diff --git a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp 
b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
index 0856687bbf..c954f98b25 100644
--- a/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/bool_rle_decoder.cpp
@@ -24,6 +24,7 @@ void BoolRLEDecoder::set_data(Slice* slice) {
     _data = slice;
     _num_bytes = slice->size;
     _offset = 0;
+
     if (_num_bytes < 4) {
         LOG(FATAL) << "Received invalid length : " + 
std::to_string(_num_bytes) +
                               " (corrupt data page?)";
@@ -62,6 +63,8 @@ Status BoolRLEDecoder::decode_values(MutableColumnPtr& 
doris_column, DataTypePtr
         case ColumnSelectVector::CONTENT: {
             bool value; // Can't use uint8_t directly, we should correct it.
             for (size_t i = 0; i < run_length; ++i) {
+                DCHECK(_current_value_idx < max_values)
+                        << _current_value_idx << " vs. " << max_values;
                 value = _values[_current_value_idx++];
                 column_data[data_index++] = (UInt8)value;
             }
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
index b74d9c3db0..b08e316c22 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
@@ -233,7 +233,8 @@ void ColumnChunkReader::_reserve_decompress_buf(size_t 
size) {
 
 Status ColumnChunkReader::skip_values(size_t num_values, bool skip_data) {
     if (UNLIKELY(_remaining_num_values < num_values)) {
-        return Status::IOError("Skip too many values in current page");
+        return Status::IOError("Skip too many values in current page. {} vs. 
{}",
+                               _remaining_num_values, num_values);
     }
     _remaining_num_values -= num_values;
     if (skip_data) {
diff --git a/bin/start_be.sh b/bin/start_be.sh
index 7204d65114..bbaea90c02 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -20,6 +20,7 @@ set -eo pipefail
 
 curdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
 
+MACHINE_OS=$(uname -s)
 if [[ "$(uname -s)" == 'Darwin' ]] && command -v brew &>/dev/null; then
     PATH="$(brew --prefix)/opt/gnu-getopt/bin:${PATH}"
     export PATH
@@ -70,16 +71,36 @@ if [[ "$(uname -s)" != 'Darwin' ]]; then
     fi
 fi
 
-# add libs to CLASSPATH
+# add java libs
 for f in "${DORIS_HOME}/lib"/*.jar; do
-    if [[ -z "${DORIS_JNI_CLASSPATH_PARAMETER}" ]]; then
-        export DORIS_JNI_CLASSPATH_PARAMETER="${f}"
+    if [[ -z "${DORIS_CLASSPATH}" ]]; then
+        export DORIS_CLASSPATH="${f}"
     else
-        export 
DORIS_JNI_CLASSPATH_PARAMETER="${f}:${DORIS_JNI_CLASSPATH_PARAMETER}"
+        export DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
     fi
 done
-# DORIS_JNI_CLASSPATH_PARAMETER is used to configure additional jar path to 
jvm. e.g. -Djava.class.path=$DORIS_HOME/lib/java-udf.jar
-export 
DORIS_JNI_CLASSPATH_PARAMETER="-Djava.class.path=${DORIS_JNI_CLASSPATH_PARAMETER}"
+
+if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
+    # add hadoop libs
+    for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
+        DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
+    done
+    for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do
+        DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
+    done
+    for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do
+        DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
+    done
+    for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do
+        DORIS_CLASSPATH="${f}:${DORIS_CLASSPATH}"
+    done
+fi
+
+# the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs
+# and conf/ dir so that hadoop libhdfs can read .xml config file in conf/
+export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}"
+# DORIS_CLASSPATH is for self-managed jni
+export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}"
 
 jdk_version() {
     local java_cmd="${1}"
@@ -230,11 +251,28 @@ set_tcmalloc_heap_limit() {
 
 # set_tcmalloc_heap_limit || exit 1
 
-## set hdfs conf
+## set hdfs3 conf
 if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then
     export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml"
 fi
 
+if [[ -z ${JAVA_OPTS} ]]; then
+    # set default JAVA_OPTS
+    CUR_DATE=$(date +%Y%m%d-%H%M%S)
+    JAVA_OPTS="-Xmx1024m -DlogPath=${DORIS_HOME}/log/jni.log 
-Xloggc:${DORIS_HOME}/log/be.gc.log.${CUR_DATE} -Dsun.java.command=DorisBE 
-XX:-CriticalJNINatives"
+fi
+
+if [[ "${MACHINE_OS}" == "Darwin" ]]; then
+    JAVA_OPTS="${JAVA_OPTS} -XX:-MaxFDLimit"
+fi
+
+# set LIBHDFS_OPTS for hadoop libhdfs
+export LIBHDFS_OPTS="${JAVA_OPTS}"
+
+#echo "CLASSPATH: ${CLASSPATH}"
+#echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
+#echo "LIBHDFS_OPTS: ${LIBHDFS_OPTS}"
+
 # see 
https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
 export 
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof_prefix:jeprof.out"
 
diff --git a/build.sh b/build.sh
index 8c5f18fa9a..e89d71ca72 100755
--- a/build.sh
+++ b/build.sh
@@ -547,6 +547,10 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
     cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/
     cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/
 
+    if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
+        cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" 
"${DORIS_OUTPUT}/be/lib/"
+    fi
+
     if [[ "${DISABLE_JAVA_UDF_IN_CONF}" -eq 1 ]]; then
         echo -e "\033[33;1mWARNNING: \033[37;1mDisable Java UDF support in 
be.conf due to the BE was built without Java UDF.\033[0m"
         cat >>"${DORIS_OUTPUT}/be/conf/be.conf" <<EOF
diff --git a/conf/be.conf b/conf/be.conf
index 30eee9e088..cc1b8f6c59 100644
--- a/conf/be.conf
+++ b/conf/be.conf
@@ -17,6 +17,9 @@
 
 PPROF_TMPDIR="$DORIS_HOME/log/"
 
+DATE = `date +%Y%m%d-%H%M%S`
+JAVA_OPTS="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log 
-Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -Dsun.java.command=DorisBE 
-XX:-CriticalJNINatives"
+
 # since 1.2, the JAVA_HOME need to be set to run BE process.
 # JAVA_HOME=/path/to/jdk/
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java 
b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java
index 1f5aa9bcd8..67b9a16d8e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java
@@ -271,7 +271,7 @@ public class BeLoadRebalancer extends Rebalancer {
                 if (lowBackend == null) {
                     continue;
                 }
-                if (hosts.contains(lowBackend.getIp())) {
+                if (!Config.allow_replica_on_same_host && 
hosts.contains(lowBackend.getIp())) {
                     continue;
                 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to