This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new 680199e855 [Fix](multi-catalog) Fix hadoop short circuit reading can not enabled in some environments. (#21548) 680199e855 is described below commit 680199e8557de1a806321ce81981f838b2fb590f Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Thu Jul 6 13:44:55 2023 +0800 [Fix](multi-catalog) Fix hadoop short circuit reading can not enabled in some environments. (#21548) Merge #21516 to branch-1.2-lts. Fix hadoop short circuit reading can not enabled in some environments. - Revert #21430 because it will cause performance degradation issue. - Add `$HADOOP_CONF_DIR` to `$CLASSPATH`. - Remove empty `hdfs-site.xml`. Because in some environments it will cause hadoop short circuit reading can not enabled. - Copy the hadoop common native libs(which is copied from https://github.com/apache/doris-thirdparty/pull/98 ) and add it to `LD_LIBRARY_PATH`. Because in some environments `LD_LIBRARY_PATH` doesn't contain hadoop common native libs, which will cause hadoop short circuit reading can not enabled. --- be/CMakeLists.txt | 1 - bin/start_be.sh | 11 +++++++---- bin/start_fe.sh | 5 ++++- build.sh | 2 -- conf/hdfs-site.xml | 23 ----------------------- docs/en/docs/lakehouse/multi-catalog/hive.md | 13 +++++++------ docs/zh-CN/docs/lakehouse/multi-catalog/hive.md | 17 +++++++++-------- thirdparty/build-thirdparty.sh | 3 +++ 8 files changed, 30 insertions(+), 45 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 12621403fb..9307e018f0 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -951,7 +951,6 @@ install(FILES ${BASE_DIR}/../conf/be.conf ${BASE_DIR}/../conf/odbcinst.ini ${BASE_DIR}/../conf/asan_suppr.conf - ${BASE_DIR}/../conf/hdfs-site.xml DESTINATION ${OUTPUT_DIR}/conf) get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) diff --git a/bin/start_be.sh b/bin/start_be.sh index e4bd71a940..a26241862b 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -102,15 +102,18 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then done fi +if [[ -n "${HADOOP_CONF_DIR}" ]]; then + export DORIS_CLASSPATH="${HADOOP_CONF_DIR}:${DORIS_CLASSPATH}" +fi + # the CLASSPATH and LIBHDFS_OPTS is used for hadoop libhdfs # and conf/ dir so that hadoop libhdfs can read .xml config file in conf/ -if command -v hadoop >/dev/null 2>&1; then - HADOOP_SYSTEM_CLASSPATH="$(hadoop classpath --glob)" -fi -export CLASSPATH="${HADOOP_SYSTEM_CLASSPATH}:${DORIS_HOME}/conf/:${DORIS_CLASSPATH}" +export CLASSPATH="${DORIS_HOME}/conf/:${DORIS_CLASSPATH}:${CLASSPATH}" # DORIS_CLASSPATH is for self-managed jni export DORIS_CLASSPATH="-Djava.class.path=${DORIS_CLASSPATH}" +export LD_LIBRARY_PATH="${DORIS_HOME}/lib/hadoop_hdfs/native:${LD_LIBRARY_PATH}" + jdk_version() { local java_cmd="${1}" local result diff --git a/bin/start_fe.sh b/bin/start_fe.sh index 5028baedaf..03b0e8e8d5 100755 --- a/bin/start_fe.sh +++ b/bin/start_fe.sh @@ -179,7 +179,10 @@ done # make sure the doris-fe.jar is at first order, so that some classed # with same qualified name can be loaded priority from doris-fe.jar CLASSPATH="${DORIS_FE_JAR}:${CLASSPATH}" -export CLASSPATH="${CLASSPATH}:${DORIS_HOME}/lib:${DORIS_HOME}/conf" +if [[ -n "${HADOOP_CONF_DIR}" ]]; then + CLASSPATH="${HADOOP_CONF_DIR}:${CLASSPATH}" +fi +export CLASSPATH="${DORIS_HOME}/conf:${CLASSPATH}:${DORIS_HOME}/lib" pidfile="${PID_DIR}/fe.pid" diff --git a/build.sh b/build.sh index 531ba4ea13..a7f46b018d 100755 --- a/build.sh +++ b/build.sh @@ -507,7 +507,6 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then cp -r -p "${DORIS_HOME}/bin"/*_fe.sh "${DORIS_OUTPUT}/fe/bin"/ cp -r -p "${DORIS_HOME}/conf/fe.conf" "${DORIS_OUTPUT}/fe/conf"/ cp -r -p "${DORIS_HOME}/conf/ldap.conf" "${DORIS_OUTPUT}/fe/conf"/ - cp -r -p "${DORIS_HOME}/conf"/*.xml "${DORIS_OUTPUT}/fe/conf"/ cp -r -p "${DORIS_HOME}/conf/mysql_ssl_default_certificate" "${DORIS_OUTPUT}/fe/"/ rm -rf "${DORIS_OUTPUT}/fe/lib"/* cp -r -p "${DORIS_HOME}/fe/fe-core/target/lib"/* "${DORIS_OUTPUT}/fe/lib"/ @@ -541,7 +540,6 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" "${DORIS_OUTPUT}/be/lib/" - rm -rf "${DORIS_OUTPUT}/be/lib/hadoop_hdfs/native/" fi if [[ "${BUILD_JAVA_UDF}" -eq 0 ]]; then diff --git a/conf/hdfs-site.xml b/conf/hdfs-site.xml deleted file mode 100644 index 32235bf8bc..0000000000 --- a/conf/hdfs-site.xml +++ /dev/null @@ -1,23 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> -<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> - -<!-- Put site-specific property overrides in this file. --> - -<configuration> -</configuration> diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md b/docs/en/docs/lakehouse/multi-catalog/hive.md index f82df22ab7..1a269d6774 100644 --- a/docs/en/docs/lakehouse/multi-catalog/hive.md +++ b/docs/en/docs/lakehouse/multi-catalog/hive.md @@ -34,17 +34,18 @@ Besides Hive, many other systems, such as Iceberg and Hudi, use Hive Metastore t When connnecting to Hive, Doris: -1. Supports Hive version 1/2/3; -2. Supports both Managed Table and External Table; -3. Can identify metadata of Hive, Iceberg, and Hudi stored in Hive Metastore; -4. Supports Hive tables with data stored in JuiceFS, which can be used the same way as normal Hive tables (put `juicefs-hadoop-x.x.x.jar` in `fe/lib/` and `apache_hdfs_broker/lib/`). -5. Supports Hive tables with data stored in CHDFS, which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment: +1. Need to put core-site.xml, hdfs-site.xml and hive-site.xml in the conf directory of FE and BE. First read the hadoop configuration file in the conf directory, and then read the related to the environment variable `HADOOP_CONF_DIR` configuration file. +2. Supports Hive version 1/2/3; +3. Supports both Managed Table and External Table; +4. Can identify metadata of Hive, Iceberg, and Hudi stored in Hive Metastore; +5. Supports Hive tables with data stored in JuiceFS, which can be used the same way as normal Hive tables (put `juicefs-hadoop-x.x.x.jar` in `fe/lib/` and `apache_hdfs_broker/lib/`). +6. Supports Hive tables with data stored in CHDFS, which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment: 1. put chdfs_hadoop_plugin_network-x.x.jar in fe/lib/ and apache_hdfs_broker/lib/ 2. copy core-site.xml and hdfs-site.xml from hive cluster to fe/conf/ and apache_hdfs_broker/conf <version since="dev"> -6. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment: +7. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment: 1. put goosefs-x.x.x-client.jar in fe/lib/ and apache_hdfs_broker/lib/ 2. add extra properties 'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem' when creating catalog diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md index 98d32b8631..6cc16bdbe1 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md @@ -30,19 +30,20 @@ under the License. 除了 Hive 外,很多其他系统也会使用 Hive Metastore 存储元数据。所以通过 Hive Catalog,我们不仅能访问 Hive,也能访问使用 Hive Metastore 作为元数据存储的系统。如 Iceberg、Hudi 等。 -## 使用限制 - -1. hive 支持 1/2/3 版本。 -2. 支持 Managed Table 和 External Table。 -3. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。 -4. 支持数据存储在 Juicefs 上的 hive 表,用法如下(需要把juicefs-hadoop-x.x.x.jar放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下)。 -5. 支持数据存储在 CHDFS 上的 hive 表。需配置环境: +## 使用须知 + +1. 将 core-site.xml,hdfs-site.xml 和 hive-site.xml 放到 FE 和 BE 的 conf 目录下。优先读取 conf 目录下的 hadoop 配置文件,再读取环境变量 `HADOOP_CONF_DIR` 的相关配置文件。 +2. hive 支持 1/2/3 版本。 +3. 支持 Managed Table 和 External Table。 +4. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。 +5. 支持数据存储在 Juicefs 上的 hive 表,用法如下(需要把juicefs-hadoop-x.x.x.jar放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下)。 +6. 支持数据存储在 CHDFS 上的 hive 表。需配置环境: 1. 把chdfs_hadoop_plugin_network-x.x.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下 2. 将 hive 所在 Hadoop 集群的 core-site.xml 和 hdfs-site.xml 复制到 fe/conf/ 和 apache_hdfs_broker/conf 目录下 <version since="dev"> -6. 支持数据存在在 GooseFS(GFS) 上的 hive、iceberg表。需配置环境: +7. 支持数据存在在 GooseFS(GFS) 上的 hive、iceberg表。需配置环境: 1. 把 goosefs-x.x.x-client.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下 2. 创建 catalog 时增加属性:'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem' diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 58b629654a..ac6850a094 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -1576,6 +1576,9 @@ build_hadoop_libs() { mkdir -p "${TP_INSTALL_DIR}/lib/hadoop_hdfs/" cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/* "${TP_INSTALL_DIR}/lib/hadoop_hdfs/" cp -r ./hadoop-dist/target/hadoop-libhdfs-3.3.4/include/hdfs.h "${TP_INSTALL_DIR}/include/hadoop_hdfs/" + rm -rf "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/*.a" + find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type f ! -name '*.a' -exec cp {} "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \; + find ./hadoop-dist/target/hadoop-3.3.4/lib/native/ -type l -exec cp -P {} "${TP_INSTALL_DIR}/lib/hadoop_hdfs/native/" \; } if [[ "${#packages[@]}" -eq 0 ]]; then --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org