This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 680199e855 [Fix](multi-catalog) Fix hadoop short circuit reading can 
not enabled in some environments. (#21548)
680199e855 is described below

commit 680199e8557de1a806321ce81981f838b2fb590f
Author: Qi Chen <kaka11.c...@gmail.com>
AuthorDate: Thu Jul 6 13:44:55 2023 +0800

    [Fix](multi-catalog) Fix hadoop short circuit reading can not enabled in 
some environments. (#21548)
    
    Merge #21516 to branch-1.2-lts.
    
    Fix hadoop short circuit reading can not enabled in some environments.
    - Revert #21430 because it will cause performance degradation issue.
    - Add `$HADOOP_CONF_DIR` to `$CLASSPATH`.
    - Remove empty `hdfs-site.xml`. Because in some environments it will cause 
hadoop short circuit reading can not enabled.
    - Copy the hadoop common native libs(which is copied from 
https://github.com/apache/doris-thirdparty/pull/98
    ) and add it to `LD_LIBRARY_PATH`. Because in some environments 
`LD_LIBRARY_PATH` doesn't contain hadoop common native libs, which will cause 
hadoop short circuit reading can not enabled.
---
 be/CMakeLists.txt                               |  1 -
 bin/start_be.sh                                 | 11 +++++++----
 bin/start_fe.sh                                 |  5 ++++-
 build.sh                                        |  2 --
 conf/hdfs-site.xml                              | 23 -----------------------
 docs/en/docs/lakehouse/multi-catalog/hive.md    | 13 +++++++------
 docs/zh-CN/docs/lakehouse/multi-catalog/hive.md | 17 +++++++++--------
 thirdparty/build-thirdparty.sh                  |  3 +++
 8 files changed, 30 insertions(+), 45 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 12621403fb..9307e018f0 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -951,7 +951,6 @@ install(FILES
     ${BASE_DIR}/../conf/be.conf
     ${BASE_DIR}/../conf/odbcinst.ini
     ${BASE_DIR}/../conf/asan_suppr.conf
-    ${BASE_DIR}/../conf/hdfs-site.xml
     DESTINATION ${OUTPUT_DIR}/conf)
 
 get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY 
INCLUDE_DIRECTORIES)
diff --git a/bin/start_be.sh b/bin/start_be.sh
index e4bd71a940..a26241862b 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -102,15 +102,18 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
     done
 fi
 
+if [[ -n "${HADOOP_CONF_DIR}" ]]; then
+    export DORIS_CLASSPATH="${HADOOP_CONF_DIR}:${DORIS_CLASSPATH}"
+fi
+
 # the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs
 # and conf/ dir so that hadoop libhdfs can read .xml config file in conf/
-if command -v hadoop >/dev/null 2>&1; then
-    HADOOP_SYSTEM_CLASSPATH="$(hadoop classpath --glob)"
-fi
-export 
CLASSPATH="${HADOOP_SYSTEM_CLASSPATH}:${DORIS_HOME}/conf/:${DORIS_CLASSPATH}"
+export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}:${CLASSPATH}"
 # DORIS_CLASSPATH is for self-managed jni
 export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}"
 
+export 
LD_LIBRARY_PATH="${DORIS_HOME}/lib/hadoop_hdfs/native:${LD_LIBRARY_PATH}"
+
 jdk_version() {
     local java_cmd="${1}"
     local result
diff --git a/bin/start_fe.sh b/bin/start_fe.sh
index 5028baedaf..03b0e8e8d5 100755
--- a/bin/start_fe.sh
+++ b/bin/start_fe.sh
@@ -179,7 +179,10 @@ done
 # make sure the doris-fe.jar is at first order, so that some classed
 # with same qualified name can be loaded priority from doris-fe.jar
 CLASSPATH="${DORIS_FE_JAR}:${CLASSPATH}"
-export CLASSPATH="${CLASSPATH}:${DORIS_HOME}/lib:${DORIS_HOME}/conf"
+if [[ -n "${HADOOP_CONF_DIR}" ]]; then
+    CLASSPATH="${HADOOP_CONF_DIR}:${CLASSPATH}"
+fi
+export CLASSPATH="${DORIS_HOME}/conf:${CLASSPATH}:${DORIS_HOME}/lib"
 
 pidfile="${PID_DIR}/fe.pid"
 
diff --git a/build.sh b/build.sh
index 531ba4ea13..a7f46b018d 100755
--- a/build.sh
+++ b/build.sh
@@ -507,7 +507,6 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
     cp -r -p "${DORIS_HOME}/bin"/*_fe.sh "${DORIS_OUTPUT}/fe/bin"/
     cp -r -p "${DORIS_HOME}/conf/fe.conf" "${DORIS_OUTPUT}/fe/conf"/
     cp -r -p "${DORIS_HOME}/conf/ldap.conf" "${DORIS_OUTPUT}/fe/conf"/
-    cp -r -p "${DORIS_HOME}/conf"/*.xml "${DORIS_OUTPUT}/fe/conf"/
     cp -r -p "${DORIS_HOME}/conf/mysql_ssl_default_certificate" 
"${DORIS_OUTPUT}/fe/"/
     rm -rf "${DORIS_OUTPUT}/fe/lib"/*
     cp -r -p "${DORIS_HOME}/fe/fe-core/target/lib"/* "${DORIS_OUTPUT}/fe/lib"/
@@ -541,7 +540,6 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
 
     if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
         cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" 
"${DORIS_OUTPUT}/be/lib/"
-        rm -rf "${DORIS_OUTPUT}/be/lib/hadoop_hdfs/native/"
     fi
 
     if [[ "${BUILD_JAVA_UDF}" -eq 0 ]]; then
diff --git a/conf/hdfs-site.xml b/conf/hdfs-site.xml
deleted file mode 100644
index 32235bf8bc..0000000000
--- a/conf/hdfs-site.xml
+++ /dev/null
@@ -1,23 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-</configuration>
diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md 
b/docs/en/docs/lakehouse/multi-catalog/hive.md
index f82df22ab7..1a269d6774 100644
--- a/docs/en/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/en/docs/lakehouse/multi-catalog/hive.md
@@ -34,17 +34,18 @@ Besides Hive, many other systems, such as Iceberg and Hudi, 
use Hive Metastore t
 
 When connnecting to Hive, Doris:
 
-1. Supports Hive version 1/2/3;
-2. Supports both Managed Table and External Table;
-3. Can identify metadata of Hive, Iceberg, and Hudi stored in Hive Metastore;
-4. Supports Hive tables with data stored in JuiceFS, which can be used the 
same way as normal Hive tables (put `juicefs-hadoop-x.x.x.jar` in `fe/lib/` and 
`apache_hdfs_broker/lib/`).
-5. Supports Hive tables with data stored in CHDFS, which can be used the same 
way as normal Hive tables. Follow below steps to prepare doris environment:
+1. Need to put core-site.xml, hdfs-site.xml and hive-site.xml in the conf 
directory of FE and BE. First read the hadoop configuration file in the conf 
directory, and then read the related to the environment variable 
`HADOOP_CONF_DIR` configuration file.
+2. Supports Hive version 1/2/3;
+3. Supports both Managed Table and External Table;
+4. Can identify metadata of Hive, Iceberg, and Hudi stored in Hive Metastore;
+5. Supports Hive tables with data stored in JuiceFS, which can be used the 
same way as normal Hive tables (put `juicefs-hadoop-x.x.x.jar` in `fe/lib/` and 
`apache_hdfs_broker/lib/`).
+6. Supports Hive tables with data stored in CHDFS, which can be used the same 
way as normal Hive tables. Follow below steps to prepare doris environment:
     1. put chdfs_hadoop_plugin_network-x.x.jar in fe/lib/ and 
apache_hdfs_broker/lib/
     2. copy core-site.xml and hdfs-site.xml from hive cluster to fe/conf/ and 
apache_hdfs_broker/conf
 
 <version since="dev">
 
-6. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can 
be used the same way as normal Hive tables. Follow below steps to prepare doris 
environment:
+7. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can 
be used the same way as normal Hive tables. Follow below steps to prepare doris 
environment:
     1. put goosefs-x.x.x-client.jar in fe/lib/ and apache_hdfs_broker/lib/
     2. add extra properties 'fs.AbstractFileSystem.gfs.impl' = 
'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 
'com.qcloud.cos.goosefs.hadoop.FileSystem' when creating catalog
 
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md 
b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
index 98d32b8631..6cc16bdbe1 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
@@ -30,19 +30,20 @@ under the License.
 
 除了 Hive 外,很多其他系统也会使用 Hive Metastore 存储元数据。所以通过 Hive Catalog,我们不仅能访问 
Hive,也能访问使用 Hive Metastore 作为元数据存储的系统。如 Iceberg、Hudi 等。
 
-## 使用限制
-
-1. hive 支持 1/2/3 版本。
-2. 支持 Managed Table 和 External Table。
-3. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。
-4. 支持数据存储在 Juicefs 上的 hive 表,用法如下(需要把juicefs-hadoop-x.x.x.jar放在 fe/lib/ 和 
apache_hdfs_broker/lib/ 下)。
-5. 支持数据存储在 CHDFS 上的 hive 表。需配置环境:
+## 使用须知
+
+1. 将 core-site.xml,hdfs-site.xml 和 hive-site.xml  放到 FE 和 BE 的 conf 目录下。优先读取 
conf 目录下的 hadoop 配置文件,再读取环境变量 `HADOOP_CONF_DIR` 的相关配置文件。 
+2. hive 支持 1/2/3 版本。
+3. 支持 Managed Table 和 External Table。
+4. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。
+5. 支持数据存储在 Juicefs 上的 hive 表,用法如下(需要把juicefs-hadoop-x.x.x.jar放在 fe/lib/ 和 
apache_hdfs_broker/lib/ 下)。
+6. 支持数据存储在 CHDFS 上的 hive 表。需配置环境:
    1. 把chdfs_hadoop_plugin_network-x.x.jar 放在 fe/lib/ 和 
apache_hdfs_broker/lib/ 下
    2. 将 hive 所在 Hadoop 集群的 core-site.xml 和 hdfs-site.xml 复制到 fe/conf/ 和 
apache_hdfs_broker/conf 目录下
 
 <version since="dev">
 
-6. 支持数据存在在 GooseFS(GFS) 上的 hive、iceberg表。需配置环境:
+7. 支持数据存在在 GooseFS(GFS) 上的 hive、iceberg表。需配置环境:
    1. 把 goosefs-x.x.x-client.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下
    2. 创建 catalog 时增加属性:'fs.AbstractFileSystem.gfs.impl' = 
'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 
'com.qcloud.cos.goosefs.hadoop.FileSystem'
    
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index 58b629654a..ac6850a094 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -1576,6 +1576,9 @@ build_hadoop_libs() {
     mkdir -p "${TP_INSTALL_DIR}/lib/hadoop_hdfs/"
     cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/* 
"${TP_INSTALL_DIR}/lib/hadoop_hdfs/"
     cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/include/hdfs.h 
"${TP_INSTALL_DIR}/include/hadoop_hdfs/"
+    rm -rf "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/*.a"
+    find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type f ! -name '*.a' 
-exec cp {} "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \;
+    find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type l -exec cp -P {} 
"${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \;
 }
 
 if [[ "${#packages[@]}" -eq 0 ]]; then


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to