This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin-on-parquet-v2 in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this push: new 6f4e356 KYLIN-4858 Support Kylin4 deployment on CDH 6.X (#1535) 6f4e356 is described below commit 6f4e3562d9061bee4ef3288f35e9e53133a9e9cd Author: Xiaoxiang Yu <x...@apache.org> AuthorDate: Mon Jan 4 22:50:13 2021 +0800 KYLIN-4858 Support Kylin4 deployment on CDH 6.X (#1535) * KYLIN-4858 Support Kylin4 deployment on CDH 6.X * KYLIN-4858 Support Kylin4 deployment on CDH 6.X --- build/bin/find-hive-dependency.sh | 5 +- build/bin/kylin.sh | 3 + build/bin/replace-jars-under-spark.sh | 140 ++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 1 deletion(-) diff --git a/build/bin/find-hive-dependency.sh b/build/bin/find-hive-dependency.sh index 22ee8f4..31530e5 100755 --- a/build/bin/find-hive-dependency.sh +++ b/build/bin/find-hive-dependency.sh @@ -197,7 +197,10 @@ else fi hive_lib_dir="$HIVE_LIB" fi -hive_lib=`find -L ${hive_lib_dir} -name '*.jar' ! -name '*druid*' ! -name '*slf4j*' ! -name '*avatica*' ! -name '*calcite*' ! -name '*jackson-datatype-joda*' ! -name '*derby*' -printf '%p:' | sed 's/:$//'` + +hive_lib=`find -L ${hive_lib_dir} -name '*.jar' ! -name '*druid*' ! -name '*slf4j*' ! -name '*avatica*' ! -name '*calcite*' \ + ! -name '*jackson-datatype-joda*' ! -name '*derby*' ! -name "*jetty*" ! -name "*jsp*" ! -name "*servlet*" ! -name "*hbase*" ! -name "*websocket*" \ + -printf '%p:' | sed 's/:$//'` validateDirectory ${hive_conf_path} checkFileExist hive_lib ${hive_lib} diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh index c6048e2..c62fb47 100755 --- a/build/bin/kylin.sh +++ b/build/bin/kylin.sh @@ -62,6 +62,9 @@ function retrieveDependency() { # source ${dir}/find-flink-dependency.sh fi + # Replace jars for different hadoop dist + bash ${dir}/replace-jars-under-spark.sh + # get hdp_version if [ -z "${hdp_version}" ]; then hdp_version=`/bin/bash -x hadoop 2>&1 | sed -n "s/\(.*\)export HDP_VERSION=\(.*\)/\2/"p` diff --git a/build/bin/replace-jars-under-spark.sh b/build/bin/replace-jars-under-spark.sh new file mode 100644 index 0000000..0c5a8cc --- /dev/null +++ b/build/bin/replace-jars-under-spark.sh @@ -0,0 +1,140 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +BYPASS=${KYLIN_HOME}/spark/jars/replace-jars-bypass +cdh_mapreduce_path="/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce" +hadoop_lib_path="/usr/lib/hadoop" + +if [ -f ${BYPASS} ]; then + exit 0 +fi + +if [ ! -d "$KYLIN_HOME/spark" ]; then + echo "Skip spark which not owned by kylin. SPARK_HOME is $SPARK_HOME and KYLIN_HOME is $KYLIN_HOME ." + exit 0 +fi + +echo "Start replacing hadoop jars under ${SPARK_HOME}/jars." + +function check_cdh_hadoop() { + # hadoop-common-3.0.0-cdh6.2.0.jar + hadoop_common_file=$(find ${cdh_mapreduce_path}/../hadoop/ -maxdepth 1 -name "hadoop-common-*.jar" -not -name "*test*" | tail -1) + cdh_version=${hadoop_common_file##*-} + if [[ "${cdh_version}" == cdh6.* ]]; then + export is_cdh6=1 + else + export is_cdh6=0 + fi + if [[ "${cdh_version}" == cdh5.* ]]; then + export is_cdh5=1 + else + export is_cdh5=0 + fi +} + +function check_aws_emr() { + if [ ! -d $hadoop_lib_path ]; then + return 0 + fi + + # hadoop-common-3.2.1-amzn-0.jar + hadoop_common_file=$(find $hadoop_lib_path -maxdepth 1 -name "hadoop-common-*.jar" -not -name "*test*" | tail -1) + emr_version_1=${hadoop_common_file##*common-} + echo $emr_version_1 + arrVersion=(${emr_version_1//-/ }) + + if [[ "${arrVersion[0]}" == 3.* && "${arrVersion[1]}" == *amzn* ]]; then + export is_emr6=1 + else + export is_emr6=0 + fi + + if [[ "${arrVersion[0]}" == 2.* && "${arrVersion[1]}" == *amzn* ]]; then + export is_emr5=1 + else + export is_emr5=0 + fi +} + +check_cdh_hadoop +check_aws_emr + +common_jars= +hdfs_jars= +mr_jars= +yarn_jars= +other_jars= + +if [ $is_cdh6 == 1 ]; then + common_jars=$(find $cdh_mapreduce_path/../hadoop -maxdepth 2 \ + -name "hadoop-annotations-*.jar" -not -name "*test*" \ + -o -name "hadoop-auth-*.jar" -not -name "*test*" \ + -o -name "hadoop-common-*.jar" -not -name "*test*") + + hdfs_jars=$(find $cdh_mapreduce_path/../hadoop-hdfs -maxdepth 1 -name "hadoop-hdfs-*" -not -name "*test*" -not -name "*nfs*") + + mr_jars=$(find $cdh_mapreduce_path -maxdepth 1 \ + -name "hadoop-mapreduce-client-app-*.jar" -not -name "*test*" \ + -o -name "hadoop-mapreduce-client-common-*.jar" -not -name "*test*" \ + -o -name "hadoop-mapreduce-client-jobclient-*.jar" -not -name "*test*" \ + -o -name "hadoop-mapreduce-client-shuffle-*.jar" -not -name "*test*" \ + -o -name "hadoop-mapreduce-client-core-*.jar" -not -name "*test*") + + yarn_jars=$(find $cdh_mapreduce_path/../hadoop-yarn -maxdepth 1 \ + -name "hadoop-yarn-api-*.jar" -not -name "*test*" \ + -o -name "hadoop-yarn-client-*.jar" -not -name "*test*" \ + -o -name "hadoop-yarn-common-*.jar" -not -name "*test*" \ + -o -name "hadoop-yarn-server-common-*.jar" -not -name "*test*" \ + -o -name "hadoop-yarn-server-web-proxy-*.jar" -not -name "*test*") + + other_jars=$(find $cdh_mapreduce_path/../../jars -maxdepth 1 -name "htrace-core4*" || find $cdh_mapreduce_path/../hadoop -maxdepth 2 -name "htrace-core4*") + + if [[ $is_cdh6 == 1 ]]; then + cdh6_jars=$(find ${cdh_mapreduce_path}/../../jars -maxdepth 1 \ + -name "woodstox-core-*.jar" -o -name "commons-configuration2-*.jar" -o -name "re2j-*.jar") + fi +fi + +jar_list="${common_jars} ${hdfs_jars} ${mr_jars} ${yarn_jars} ${other_jars} ${cdh6_jars}" + +echo "Find platform specific jars:${jar_list}, will replace with these jars under ${SPARK_HOME}/jars." + +if [ $is_cdh6 == 1 ]; then + find ${KYLIN_HOME}/spark/jars -name "hadoop-hdfs-*.jar" -exec rm -f {} \; + find ${KYLIN_HOME}/spark/jars -name "hadoop-yarn-*.jar" -exec rm -f {} \; + find ${KYLIN_HOME}/spark/jars -name "hadoop-mapreduce-*.jar" -exec rm -f {} \; + find ${KYLIN_HOME}/spark/jars -name "hive-exec-*.jar" -exec rm -f {} \; +# cp ${KYLIN_HOME}/bin/hadoop3_jars/cdh6/*.jar ${SPARK_HOME}/jars +fi + +for jar_file in ${jar_list}; do + $(cp ${jar_file} ${KYLIN_HOME}/spark/jars) +done + +# Remove all spaces +jar_list=${jar_list// /} + +if [ -z "${jar_list}" ]; then + echo "Please confirm that the corresponding hadoop jars have been replaced. The automatic replacement program cannot be executed correctly." +else + echo "Replace jars under SPARK_HOME/jars finished." + touch ${BYPASS} +fi + +echo "Done hadoop jars replacement under ${SPARK_HOME}/jars." \ No newline at end of file