KYLIN-2331 refine HADOOP_CONF_DIR search logic
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/2c438602 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/2c438602 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/2c438602 Branch: refs/heads/master Commit: 2c43860240b1c80cef1f14d0baaba989a49aa79d Parents: 32cce58 Author: Hongbin Ma <mahong...@apache.org> Authored: Fri Feb 24 15:05:37 2017 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Fri Feb 24 15:07:08 2017 +0800 ---------------------------------------------------------------------- build/bin/find-hadoop-conf-dir.sh | 77 ++++++++++++++++++++ build/bin/find-hbase-dependency.sh | 2 + build/bin/find-hive-dependency.sh | 3 +- build/bin/find-kafka-dependency.sh | 12 +-- build/bin/find-spark-dependency.sh | 3 +- build/bin/kylin.sh | 14 ++-- build/conf/kylin.properties | 2 +- .../apache/kylin/common/KylinConfigBase.java | 7 +- .../kylin/engine/spark/SparkExecutable.java | 26 ++++--- 9 files changed, 112 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hadoop-conf-dir.sh ---------------------------------------------------------------------- diff --git a/build/bin/find-hadoop-conf-dir.sh b/build/bin/find-hadoop-conf-dir.sh new file mode 100644 index 0000000..5334b8a --- /dev/null +++ b/build/bin/find-hadoop-conf-dir.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh + +echo Retrieving hadoop conf dir... + +override_hadoop_conf_dir=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.env.hadoop-conf-dir` + +if [ -n "$override_hadoop_conf_dir" ]; then + echo "$override_hadoop_conf_dir is override as the kylin_hadoop_conf_dir" + export kylin_hadoop_conf_dir=${override_hadoop_conf_dir} + return +fi + +hbase_classpath=`hbase classpath` + +arr=(`echo $hbase_classpath | cut -d ":" -f 1- | sed 's/:/ /g'`) +kylin_hadoop_conf_dir= + +for data in ${arr[@]} +do + result=`echo $data | grep -v -E ".*jar"` + if [ $result ] + then + valid_conf_dir=true + + if [ ! -f $result/yarn-site.xml ] + then + verbose "$result is not valid hadoop dir conf because yarn-site.xml is missing" + valid_conf_dir=false + continue + fi + + if [ ! -f $result/mapred-site.xml ] + then + verbose "$result is not valid hadoop dir conf because mapred-site.xml is missing" + valid_conf_dir=false + continue + fi + + if [ ! -f $result/hdfs-site.xml ] + then + verbose "$result is not valid hadoop dir conf because hdfs-site.xml is missing" + valid_conf_dir=false + continue + fi + + if [ ! -f $result/core-site.xml ] + then + verbose "$result is not valid hadoop dir conf because core-site.xml is missing" + valid_conf_dir=false + continue + fi + + verbose "$result is chosen as the kylin_hadoop_conf_dir" + export kylin_hadoop_conf_dir=$result + return + fi +done + http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hbase-dependency.sh ---------------------------------------------------------------------- diff --git a/build/bin/find-hbase-dependency.sh b/build/bin/find-hbase-dependency.sh index 7dbb53b..14dde3b 100644 --- a/build/bin/find-hbase-dependency.sh +++ b/build/bin/find-hbase-dependency.sh @@ -19,6 +19,8 @@ source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh +echo Retrieving hbase dependency... + hbase_classpath=`hbase classpath` # special handling for Amazon EMR, to prevent re-init of hbase-setenv http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-hive-dependency.sh ---------------------------------------------------------------------- diff --git a/build/bin/find-hive-dependency.sh b/build/bin/find-hive-dependency.sh index 453a35a..aa39da0 100644 --- a/build/bin/find-hive-dependency.sh +++ b/build/bin/find-hive-dependency.sh @@ -19,10 +19,11 @@ source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh +echo Retrieving hive dependency... + client_mode=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.client` hive_env= -echo Retrieving hive dependency... if [ "${client_mode}" == "beeline" ] then beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params` http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-kafka-dependency.sh ---------------------------------------------------------------------- diff --git a/build/bin/find-kafka-dependency.sh b/build/bin/find-kafka-dependency.sh index d3219e7..999face 100644 --- a/build/bin/find-kafka-dependency.sh +++ b/build/bin/find-kafka-dependency.sh @@ -22,15 +22,17 @@ source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh kafka_home= echo Retrieving kafka dependency... -if [ -n "$KAFKA_HOME" ] + +if [ -z "$KAFKA_HOME" ] then - verbose "KAFKA_HOME is set to: $KAFKA_HOME, use it to locate kafka dependencies." - kafka_home=$KAFKA_HOME + verbose "Couldn't find kafka home. If you want to enable streaming processing, Please set KAFKA_HOME to the path which contains kafka dependencies." + return fi -if [ -z "$KAFKA_HOME" ] +if [ -n "$KAFKA_HOME" ] then - quit "Couldn't find kafka home. Please set KAFKA_HOME to the path which contains kafka dependencies." + verbose "KAFKA_HOME is set to: $KAFKA_HOME, use it to locate kafka dependencies." + kafka_home=$KAFKA_HOME fi # works for kafka 9+ http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/find-spark-dependency.sh ---------------------------------------------------------------------- diff --git a/build/bin/find-spark-dependency.sh b/build/bin/find-spark-dependency.sh index 6f74d8a..4ea5c3e 100644 --- a/build/bin/find-spark-dependency.sh +++ b/build/bin/find-spark-dependency.sh @@ -19,9 +19,10 @@ source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh +echo Retrieving Spark dependency... + spark_home= -verbose Retrieving Spark dependency... if [ -n "$SPARK_HOME" ] then verbose "SPARK_HOME is set to: $SPARK_HOME, use it to locate Spark dependencies." http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/bin/kylin.sh ---------------------------------------------------------------------- diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh index 15e1aed..a87fa78 100644 --- a/build/bin/kylin.sh +++ b/build/bin/kylin.sh @@ -32,6 +32,9 @@ function retrieveDependency() { #retrive $hive_dependency and $hbase_dependency source ${dir}/find-hive-dependency.sh source ${dir}/find-hbase-dependency.sh + source ${dir}/find-hadoop-conf-dir.sh + source ${dir}/find-kafka-dependency.sh + source ${dir}/find-spark-dependency.sh #retrive $KYLIN_EXTRA_START_OPTS if [ -f "${dir}/setenv.sh" ]; then @@ -39,15 +42,7 @@ function retrieveDependency() { fi export HBASE_CLASSPATH_PREFIX=${KYLIN_HOME}/conf:${KYLIN_HOME}/lib/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH_PREFIX} - export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency} - if [ -n "$KAFKA_HOME" ] - then - source ${dir}/find-kafka-dependency.sh - export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${kafka_dependency} - fi - - source ${dir}/find-spark-dependency.sh - export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${spark_dependency} + export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency}:${kafka_dependency}:${spark_dependency} verbose "HBASE_CLASSPATH: ${HBASE_CLASSPATH}" } @@ -114,6 +109,7 @@ then -Dkylin.hbase.dependency=${hbase_dependency} \ -Dkylin.kafka.dependency=${kafka_dependency} \ -Dkylin.spark.dependency=${spark_dependency} \ + -Dkylin.hadoop.conf.dir=${kylin_hadoop_conf_dir} \ -Dspring.profiles.active=${spring_profile} \ org.apache.hadoop.util.RunJar ${tomcat_root}/bin/bootstrap.jar org.apache.catalina.startup.Bootstrap start >> ${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid & http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/build/conf/kylin.properties ---------------------------------------------------------------------- diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties index 12c4c9c..d7ba0b3 100644 --- a/build/conf/kylin.properties +++ b/build/conf/kylin.properties @@ -204,7 +204,7 @@ kylin.security.saml.context-path=/kylin ### Spark Engine Configs ### # Hadoop conf folder, will export this as "HADOOP_CONF_DIR" before run spark-submit -#kylin.engine.spark.env.hadoop-conf-dir=/etc/hive/conf +#kylin.env.hadoop-conf-dir=/etc/hive/conf # Estimate the RDD partition numbers kylin.engine.spark.rdd-partition-cut-mb=10 http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 1c26c63..5317a39 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -773,12 +773,9 @@ abstract public class KylinConfigBase implements Serializable { // ENGINE.SPARK // ============================================================================ - public String getHadoopConfDir() { - return getOptional("kylin.engine.spark.env.hadoop-conf-dir", ""); - } - public void setHadoopConfDir(String hadoopConfDir) { - setProperty("kylin.engine.spark.env.hadoop-conf-dir", hadoopConfDir); + public String getHadoopConfDir() { + return getOptional("kylin.env.hadoop-conf-dir", ""); } public String getSparkAdditionalJars() { http://git-wip-us.apache.org/repos/asf/kylin/blob/2c438602/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java ---------------------------------------------------------------------- diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java index c671a91..cf7438c 100644 --- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java +++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkExecutable.java @@ -79,19 +79,22 @@ public class SparkExecutable extends AbstractExecutable { } String jars = this.getParam(JARS); - String hadoopConf = "/etc/hadoop/conf"; - if (StringUtils.isNotEmpty(config.getHadoopConfDir())) { - hadoopConf = config.getHadoopConfDir(); - } else { - String hiveConf = ClassLoader.getSystemClassLoader().getResource("hive-site.xml").getFile().toString(); - File hiveConfFile = new File(hiveConf); - if (hiveConfFile.exists() == true) { - logger.info("Locate hive-site.xml in " + hiveConfFile); - hadoopConf = hiveConfFile.getParent(); - } + //hadoop conf dir + String hadoopConf = null; + hadoopConf = System.getProperty("kylin.hadoop.conf.dir"); + + if (StringUtils.isEmpty(hadoopConf)) { + throw new RuntimeException("kylin_hadoop_conf_dir is empty, check if there's error in the output of 'kylin.sh start'"); + } + + File hiveConfFile = new File(hadoopConf, "hive-site.xml"); + if (!hiveConfFile.exists()) { + throw new RuntimeException("Cannot find hive-site.xml in kylin_hadoop_conf_dir: " + hadoopConf + // + ". In order to enable spark cubing, you must set kylin.env.hadoop-conf-dir to a dir which contains at least core-site.xml, hdfs-site.xml, hive-site.xml, mapred-site.xml, yarn-site.xml"); } logger.info("Using " + hadoopConf + " as HADOOP_CONF_DIR"); + //hbase-site.xml String hbaseConf = ClassLoader.getSystemClassLoader().getResource("hbase-site.xml").getFile().toString(); logger.info("Get hbase-site.xml location from classpath: " + hbaseConf); File hbaseConfFile = new File(hbaseConf); @@ -114,8 +117,7 @@ public class SparkExecutable extends AbstractExecutable { stringBuilder.append("--files %s --jars %s %s %s"); try { - String cmd = String.format(stringBuilder.toString(), - hadoopConf, config.getSparkHome(), hbaseConfFile.getAbsolutePath(), jars, jobJar, formatArgs()); + String cmd = String.format(stringBuilder.toString(), hadoopConf, config.getSparkHome(), hbaseConfFile.getAbsolutePath(), jars, jobJar, formatArgs()); logger.info("cmd:" + cmd); final StringBuilder output = new StringBuilder(); CliCommandExecutor exec = new CliCommandExecutor();