[kylin] branch kylin-on-parquet-v2 updated: KYLIN-4858 Support Kylin4 deployment on CDH 6.X (#1535)

xxyu Mon, 04 Jan 2021 06:50:35 -0800

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git



The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this 
push:
     new 6f4e356  KYLIN-4858 Support Kylin4 deployment on CDH 6.X (#1535)
6f4e356 is described below

commit 6f4e3562d9061bee4ef3288f35e9e53133a9e9cd
Author: Xiaoxiang Yu <x...@apache.org>
AuthorDate: Mon Jan 4 22:50:13 2021 +0800

    KYLIN-4858 Support Kylin4 deployment on CDH 6.X (#1535)
    
    * KYLIN-4858 Support Kylin4 deployment on CDH 6.X
    
    * KYLIN-4858 Support Kylin4 deployment on CDH 6.X
---
 build/bin/find-hive-dependency.sh     |   5 +-
 build/bin/kylin.sh                    |   3 +
 build/bin/replace-jars-under-spark.sh | 140 ++++++++++++++++++++++++++++++++++
 3 files changed, 147 insertions(+), 1 deletion(-)

diff --git a/build/bin/find-hive-dependency.sh 
b/build/bin/find-hive-dependency.sh
index 22ee8f4..31530e5 100755
--- a/build/bin/find-hive-dependency.sh
+++ b/build/bin/find-hive-dependency.sh
@@ -197,7 +197,10 @@ else
     fi
     hive_lib_dir="$HIVE_LIB"
 fi
-hive_lib=`find -L ${hive_lib_dir} -name '*.jar' ! -name '*druid*' ! -name 
'*slf4j*' ! -name '*avatica*' ! -name '*calcite*' ! -name 
'*jackson-datatype-joda*' ! -name '*derby*' -printf '%p:' | sed 's/:$//'`
+
+hive_lib=`find -L ${hive_lib_dir} -name '*.jar' ! -name '*druid*' ! -name 
'*slf4j*' ! -name '*avatica*' ! -name '*calcite*' \
+    ! -name '*jackson-datatype-joda*' ! -name '*derby*'  ! -name "*jetty*" ! 
-name "*jsp*" ! -name "*servlet*" ! -name "*hbase*" ! -name "*websocket*" \
+    -printf '%p:' | sed 's/:$//'`
 
 validateDirectory ${hive_conf_path}
 checkFileExist hive_lib ${hive_lib}
diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh
index c6048e2..c62fb47 100755
--- a/build/bin/kylin.sh
+++ b/build/bin/kylin.sh
@@ -62,6 +62,9 @@ function retrieveDependency() {
         # source ${dir}/find-flink-dependency.sh
     fi
 
+    # Replace jars for different hadoop dist
+    bash ${dir}/replace-jars-under-spark.sh
+
     # get hdp_version
     if [ -z "${hdp_version}" ]; then
         hdp_version=`/bin/bash -x hadoop 2>&1 | sed -n "s/\(.*\)export 
HDP_VERSION=\(.*\)/\2/"p`
diff --git a/build/bin/replace-jars-under-spark.sh 
b/build/bin/replace-jars-under-spark.sh
new file mode 100644
index 0000000..0c5a8cc
--- /dev/null
+++ b/build/bin/replace-jars-under-spark.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+BYPASS=${KYLIN_HOME}/spark/jars/replace-jars-bypass
+cdh_mapreduce_path="/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce"
+hadoop_lib_path="/usr/lib/hadoop"
+
+if [ -f ${BYPASS} ]; then
+  exit 0
+fi
+
+if [ ! -d "$KYLIN_HOME/spark" ]; then
+  echo "Skip spark which not owned by kylin. SPARK_HOME is $SPARK_HOME and 
KYLIN_HOME is $KYLIN_HOME ."
+  exit 0
+fi
+
+echo "Start replacing hadoop jars under ${SPARK_HOME}/jars."
+
+function check_cdh_hadoop() {
+  # hadoop-common-3.0.0-cdh6.2.0.jar
+  hadoop_common_file=$(find ${cdh_mapreduce_path}/../hadoop/ -maxdepth 1 -name 
"hadoop-common-*.jar" -not -name "*test*" | tail -1)
+  cdh_version=${hadoop_common_file##*-}
+  if [[ "${cdh_version}" == cdh6.* ]]; then
+    export is_cdh6=1
+  else
+    export is_cdh6=0
+  fi
+  if [[ "${cdh_version}" == cdh5.* ]]; then
+    export is_cdh5=1
+  else
+    export is_cdh5=0
+  fi
+}
+
+function check_aws_emr() {
+  if [ ! -d $hadoop_lib_path ]; then
+    return 0
+  fi
+
+  # hadoop-common-3.2.1-amzn-0.jar
+  hadoop_common_file=$(find $hadoop_lib_path -maxdepth 1 -name 
"hadoop-common-*.jar" -not -name "*test*" | tail -1)
+  emr_version_1=${hadoop_common_file##*common-}
+  echo $emr_version_1
+  arrVersion=(${emr_version_1//-/ })
+
+  if [[ "${arrVersion[0]}" == 3.* && "${arrVersion[1]}" == *amzn* ]]; then
+    export is_emr6=1
+  else
+    export is_emr6=0
+  fi
+
+  if [[ "${arrVersion[0]}" == 2.* && "${arrVersion[1]}" == *amzn* ]]; then
+    export is_emr5=1
+  else
+    export is_emr5=0
+  fi
+}
+
+check_cdh_hadoop
+check_aws_emr
+
+common_jars=
+hdfs_jars=
+mr_jars=
+yarn_jars=
+other_jars=
+
+if [ $is_cdh6 == 1 ]; then
+  common_jars=$(find $cdh_mapreduce_path/../hadoop -maxdepth 2 \
+    -name "hadoop-annotations-*.jar" -not -name "*test*" \
+    -o -name "hadoop-auth-*.jar" -not -name "*test*" \
+    -o -name "hadoop-common-*.jar" -not -name "*test*")
+
+  hdfs_jars=$(find $cdh_mapreduce_path/../hadoop-hdfs -maxdepth 1 -name 
"hadoop-hdfs-*" -not -name "*test*" -not -name "*nfs*")
+
+  mr_jars=$(find $cdh_mapreduce_path -maxdepth 1 \
+    -name "hadoop-mapreduce-client-app-*.jar" -not -name "*test*" \
+    -o -name "hadoop-mapreduce-client-common-*.jar" -not -name "*test*" \
+    -o -name "hadoop-mapreduce-client-jobclient-*.jar" -not -name "*test*" \
+    -o -name "hadoop-mapreduce-client-shuffle-*.jar" -not -name "*test*" \
+    -o -name "hadoop-mapreduce-client-core-*.jar" -not -name "*test*")
+
+  yarn_jars=$(find $cdh_mapreduce_path/../hadoop-yarn -maxdepth 1 \
+    -name "hadoop-yarn-api-*.jar" -not -name "*test*" \
+    -o -name "hadoop-yarn-client-*.jar" -not -name "*test*" \
+    -o -name "hadoop-yarn-common-*.jar" -not -name "*test*" \
+    -o -name "hadoop-yarn-server-common-*.jar" -not -name "*test*" \
+    -o -name "hadoop-yarn-server-web-proxy-*.jar" -not -name "*test*")
+
+  other_jars=$(find $cdh_mapreduce_path/../../jars -maxdepth 1 -name 
"htrace-core4*" || find $cdh_mapreduce_path/../hadoop -maxdepth 2 -name 
"htrace-core4*")
+
+  if [[ $is_cdh6 == 1 ]]; then
+    cdh6_jars=$(find ${cdh_mapreduce_path}/../../jars -maxdepth 1 \
+      -name "woodstox-core-*.jar" -o -name "commons-configuration2-*.jar" -o 
-name "re2j-*.jar")
+  fi
+fi
+
+jar_list="${common_jars} ${hdfs_jars} ${mr_jars} ${yarn_jars} ${other_jars} 
${cdh6_jars}"
+
+echo "Find platform specific jars:${jar_list}, will replace with these jars 
under ${SPARK_HOME}/jars."
+
+if [ $is_cdh6 == 1 ]; then
+  find ${KYLIN_HOME}/spark/jars -name "hadoop-hdfs-*.jar" -exec rm -f {} \;
+  find ${KYLIN_HOME}/spark/jars -name "hadoop-yarn-*.jar" -exec rm -f {} \;
+  find ${KYLIN_HOME}/spark/jars -name "hadoop-mapreduce-*.jar" -exec rm -f {} 
\;
+  find ${KYLIN_HOME}/spark/jars -name "hive-exec-*.jar" -exec rm -f {} \;
+#  cp ${KYLIN_HOME}/bin/hadoop3_jars/cdh6/*.jar ${SPARK_HOME}/jars
+fi
+
+for jar_file in ${jar_list}; do
+  $(cp ${jar_file} ${KYLIN_HOME}/spark/jars)
+done
+
+# Remove all spaces
+jar_list=${jar_list// /}
+
+if [ -z "${jar_list}" ]; then
+  echo "Please confirm that the corresponding hadoop jars have been replaced. 
The automatic replacement program cannot be executed correctly."
+else
+  echo "Replace jars under SPARK_HOME/jars finished."
+  touch ${BYPASS}
+fi
+
+echo "Done hadoop jars replacement under ${SPARK_HOME}/jars."
\ No newline at end of file

[kylin] branch kylin-on-parquet-v2 updated: KYLIN-4858 Support Kylin4 deployment on CDH 6.X (#1535)

Reply via email to