KYLIN-2344 Package spark into Kylin binary package
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e864cd3b Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e864cd3b Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e864cd3b Branch: refs/heads/master-cdh5.7 Commit: e864cd3b1c06700b1a1054d1f520eaabedc25d82 Parents: b7d87bb Author: shaofengshi <shaofeng...@apache.org> Authored: Tue Jan 3 10:28:38 2017 +0800 Committer: shaofengshi <shaofeng...@apache.org> Committed: Mon Jan 9 16:58:11 2017 +0800 ---------------------------------------------------------------------- build/bin/check-env.sh | 6 ++ build/bin/find-spark-dependency.sh | 45 +++++++++++++++ build/bin/kylin.sh | 6 ++ build/conf/kylin-spark-conf.properties | 2 +- build/conf/kylin.properties | 3 - build/script/compress.sh | 5 +- build/script/download-spark.sh | 52 +++++++++++++++++ build/script/functions.sh | 60 ++++++++++++++++++++ build/script/package.sh | 1 + .../org/apache/kylin/common/KylinConfig.java | 14 +++++ .../apache/kylin/common/KylinConfigBase.java | 27 ++++----- .../spark/SparkBatchCubingJobBuilder2.java | 4 +- 12 files changed, 200 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/bin/check-env.sh ---------------------------------------------------------------------- diff --git a/build/bin/check-env.sh b/build/bin/check-env.sh index a4003c9..e446d66 100644 --- a/build/bin/check-env.sh +++ b/build/bin/check-env.sh @@ -47,3 +47,9 @@ if [ $? != 0 ] then quit "Failed to create $WORKING_DIR. Please make sure the user has right to access $WORKING_DIR" fi + +hadoop fs -mkdir -p $WORKING_DIR/spark-history +if [ $? != 0 ] +then + quit "Failed to create $WORKING_DIR/spark-history. Please make sure the user has right to access $WORKING_DIR" +fi \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/bin/find-spark-dependency.sh ---------------------------------------------------------------------- diff --git a/build/bin/find-spark-dependency.sh b/build/bin/find-spark-dependency.sh new file mode 100644 index 0000000..6f74d8a --- /dev/null +++ b/build/bin/find-spark-dependency.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh + +spark_home= + +verbose Retrieving Spark dependency... +if [ -n "$SPARK_HOME" ] +then + verbose "SPARK_HOME is set to: $SPARK_HOME, use it to locate Spark dependencies." + spark_home=$SPARK_HOME +fi + +if [ -z "$SPARK_HOME" ] +then + verbose "SPARK_HOME wasn't set, use $KYLIN_HOME/spark" + spark_home=$KYLIN_HOME/spark +fi + +spark_dependency=`find -L $spark_home -name 'spark-assembly-[a-z0-9A-Z\.-]*.jar' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` +if [ -z "$spark_dependency" ] +then + quit "spark assembly lib not found" +else + verbose "spark dependency: $spark_dependency" + export spark_dependency +fi + http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/bin/kylin.sh ---------------------------------------------------------------------- diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh index 0cdbbc6..7813b79 100644 --- a/build/bin/kylin.sh +++ b/build/bin/kylin.sh @@ -45,6 +45,11 @@ function retrieveDependency() { source ${dir}/find-kafka-dependency.sh export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${kafka_dependency} fi + + source ${dir}/find-spark-dependency.sh + export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${spark_dependency} + + verbose "HBASE_CLASSPATH: ${HBASE_CLASSPATH}" } # start command @@ -112,6 +117,7 @@ then -Dkylin.hive.dependency=${hive_dependency} \ -Dkylin.hbase.dependency=${hbase_dependency} \ -Dkylin.kafka.dependency=${kafka_dependency} \ + -Dkylin.spark.dependency=${spark_dependency} \ -Dspring.profiles.active=${spring_profile} \ org.apache.hadoop.util.RunJar ${tomcat_root}/bin/bootstrap.jar org.apache.catalina.startup.Bootstrap start >> ${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid & http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/conf/kylin-spark-conf.properties ---------------------------------------------------------------------- diff --git a/build/conf/kylin-spark-conf.properties b/build/conf/kylin-spark-conf.properties index 81567bb..5e6dafe 100644 --- a/build/conf/kylin-spark-conf.properties +++ b/build/conf/kylin-spark-conf.properties @@ -20,7 +20,7 @@ spark.executor.cores=4 spark.executor.instances=8 spark.history.kerberos.keytab=none spark.history.kerberos.principal=none -#spark.yarn.jar=hdfs://sandbox.hortonworks.com:8020/apps/spark/spark-assembly-1.6.3-hadoop2.6.0.jar +#spark.yarn.jar=hdfs://namenode:8020/apps/spark/spark-assembly-1.6.3-hadoop2.6.0.jar spark.driver.extraJavaOptions=-Dhdp.version=current spark.yarn.am.extraJavaOptions=-Dhdp.version=current spark.executor.extraJavaOptions=-Dhdp.version=current http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/conf/kylin.properties ---------------------------------------------------------------------- diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties index 98b66cb..bd0bbd4 100644 --- a/build/conf/kylin.properties +++ b/build/conf/kylin.properties @@ -133,9 +133,6 @@ kylin.engine.mr.mapper-input-rows=1000000 # Hadoop conf folder, will export this as "HADOOP_CONF_DIR" before run spark-submit kylin.engine.spark.env.hadoop-conf-dir=/etc/hadoop/conf -# Spark install home, default be $KYLIN_HOME/spark/ -#kylin.engine.spark.spark-home= - # Spark job submission properties file, default be $KYLIN_HOME/conf/kylin-spark-conf.properties #kylin.engine.spark.properties-file= http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/script/compress.sh ---------------------------------------------------------------------- diff --git a/build/script/compress.sh b/build/script/compress.sh index 4e3592e..39e429c 100755 --- a/build/script/compress.sh +++ b/build/script/compress.sh @@ -34,11 +34,12 @@ package_name=apache-kylin-${version}-bin cd build/ rm -rf ${package_name} mkdir ${package_name} -cp -r lib tool bin conf tomcat ../examples/sample_cube commit_SHA1 ${package_name} -rm -rf lib tomcat commit_SHA1 +cp -r lib tool bin conf tomcat spark ../examples/sample_cube commit_SHA1 ${package_name} +rm -rf lib tomcat spark commit_SHA1 find ${package_name} -type d -exec chmod 755 {} \; find ${package_name} -type f -exec chmod 644 {} \; find ${package_name} -type f -name "*.sh" -exec chmod 755 {} \; +find ${package_name}/spark/bin/ -type f -exec chmod +x {} \; mkdir -p ../dist tar -cvzf ../dist/${package_name}.tar.gz ${package_name} rm -rf ${package_name} http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/script/download-spark.sh ---------------------------------------------------------------------- diff --git a/build/script/download-spark.sh b/build/script/download-spark.sh new file mode 100755 index 0000000..dcbcbe7 --- /dev/null +++ b/build/script/download-spark.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +dir=$(dirname ${0}) +cd ${dir}/../.. + +source build/script/functions.sh + +rm -rf build/spark + +spark_version="1.6.3" +spark_pkg_md5="ce8a2e7529aac0f0175194061769dbd4" + +if [ ! -f "build/spark-${spark_version}-bin-hadoop2.6.tgz" ] +then + echo "no binary file found" + wget --directory-prefix=build/ http://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop2.6.tgz || echo "Download spark failed" +else + if [ `calMd5 build/spark-${spark_version}-bin-hadoop2.6.tgz | awk '{print $1}'` != "${spark_pkg_md5}" ] + then + echo "md5 check failed" + rm build/spark-${spark_version}-bin-hadoop2.6.tgz + wget --directory-prefix=build/ http://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop2.6.tgz || echo "Download spark failed" + + fi +fi + +tar -zxvf build/spark-${spark_version}-bin-hadoop2.6.tgz -C build/ || { exit 1; } +mv build/spark-${spark_version}-bin-hadoop2.6 build/spark + +# Remove unused components in Spark +rm -rf build/spark/lib/spark-examples-* +rm -rf build/spark/examples +rm -rf build/spark/data +rm -rf build/spark/python +rm -rf build/spark/R http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/script/functions.sh ---------------------------------------------------------------------- diff --git a/build/script/functions.sh b/build/script/functions.sh new file mode 100755 index 0000000..2eed617 --- /dev/null +++ b/build/script/functions.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function checkCommandExits() { + echo "Checking ${1}..." + if [ -z "$(command -v ${1})" ] + then + echo "Please install ${1} first so that Kylin packaging can proceed" + exit 1 + else + echo "${1} check passed" + fi +} + +function exportProjectVersions() { + if [ -z "${kylin_versoin}" ]; then + export kylin_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version -f kylin | grep -Ev '(^\[|Download\w+:)'` + echo "Apache Kylin Version: ${kylin_version}" + fi + if [ -z "${release_version}" ]; then + export release_version=$kap_version + fi +} + +function detectOSType() { + OS_TYPE="linux" + if [[ `uname -a` =~ "Darwin" ]]; then + OS_TYPE="mac" + elif [[ `uname -a` =~ "Cygwin" ]]; then + OS_TYPE="windows" + fi + echo $OS_TYPE +} + +function calMd5() { + OS_TYPE=`detectOSType` + if [[ "$OS_TYPE" == "mac" ]]; then + md5 -q $1 + elif [[ "$OS_TYPE" == "windows" ]]; then + md5sum $1 + else + md5sum $1 + fi +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/build/script/package.sh ---------------------------------------------------------------------- diff --git a/build/script/package.sh b/build/script/package.sh index 1f9fbbd..c850ec3 100755 --- a/build/script/package.sh +++ b/build/script/package.sh @@ -76,6 +76,7 @@ git rev-parse HEAD >> build/commit_SHA1 sh build/script/build.sh || { exit 1; } sh build/script/download-tomcat.sh || { exit 1; } +sh build/script/download-spark.sh || { exit 1; } sh build/script/prepare.sh || { exit 1; } sh build/script/compress.sh || { exit 1; } http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java index f169142..4eac92a 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java @@ -187,6 +187,20 @@ public class KylinConfig extends KylinConfigBase { return kylinConfig; } + public static String getKylinConfPath() { + String kylinConfHome = System.getProperty(KYLIN_CONF); + if (!StringUtils.isEmpty(kylinConfHome)) { + logger.info("Use KYLIN_CONF=" + kylinConfHome); + return kylinConfHome; + } + + String kylinHome = getKylinHome(); + if (StringUtils.isEmpty(kylinHome)) + throw new KylinConfigCannotInitException("Didn't find KYLIN_CONF or KYLIN_HOME, please set one of them"); + + return kylinHome + File.separator + "conf"; + } + static File getKylinPropertiesFile() { String kylinConfHome = System.getProperty(KYLIN_CONF); if (!StringUtils.isEmpty(kylinConfHome)) { http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 77b1e1c..7d6ac2b 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -61,6 +61,16 @@ abstract public class KylinConfigBase implements Serializable { return kylinHome; } + public static String getSparkHome() { + String sparkHome = System.getenv("SPARK_HOME"); + if (StringUtils.isNotEmpty(sparkHome)) { + logger.info("SPARK_HOME was set to " + sparkHome); + return sparkHome; + } + + return getKylinHome() + File.separator + "spark"; + } + // backward compatibility check happens when properties is loaded or updated static BackwardCompatibilityConfig BCC = new BackwardCompatibilityConfig(); @@ -729,23 +739,6 @@ abstract public class KylinConfigBase implements Serializable { // ENGINE.SPARK // ============================================================================ - public String getSparkHome() { - String sparkHome = getOptional("kylin.engine.spark.spark-home", "spark"); - File f = new File(sparkHome); - if (f.exists()) { - return f.getAbsolutePath(); - } else { - String home = getKylinHome(); - f = new File(home, sparkHome); - if (f.exists()) { - return f.getAbsolutePath(); - } - } - - throw new IllegalArgumentException("Spark home '" + sparkHome + "' does not exist, check 'kylin.engine.spark.spark-home' in kylin.properties"); - - } - public String getSparkHadoopConfDir() { return getRequired("kylin.engine.spark.env.hadoop-conf-dir"); } http://git-wip-us.apache.org/repos/asf/kylin/blob/e864cd3b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java ---------------------------------------------------------------------- diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java index 9532d31..c5d47e7 100644 --- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java +++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkBatchCubingJobBuilder2.java @@ -19,6 +19,7 @@ package org.apache.kylin.engine.spark; import org.apache.hadoop.util.ClassUtil; +import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.StringUtil; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.engine.EngineFactory; @@ -52,7 +53,7 @@ public class SparkBatchCubingJobBuilder2 extends BatchCubingJobBuilder2 { sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(), flatTableDesc.getTableName()); - sparkExecutable.setParam(SparkCubingByLayer.OPTION_CONF_PATH.getOpt(), "/Users/shishaofeng/workspace/kylin-15/examples/test_case_data/sandbox/"); //FIXME + sparkExecutable.setParam(SparkCubingByLayer.OPTION_CONF_PATH.getOpt(), KylinConfig.getKylinConfPath()); sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath); StringBuilder jars = new StringBuilder(); @@ -65,7 +66,6 @@ public class SparkBatchCubingJobBuilder2 extends BatchCubingJobBuilder2 { StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars()); sparkExecutable.setJars(jars.toString()); - // sparkExecutable.setJars("/Users/shishaofeng/.m2/repository/org/cloudera/htrace/htrace-core/2.01/htrace-core-2.01.jar,/Users/shishaofeng/.m2/repository/org/apache/hbase/hbase-protocol/0.98.8-hadoop2/hbase-protocol-0.98.8-hadoop2.jar,/Users/shishaofeng/.m2/repository/org/apache/hbase/hbase-common/0.98.8-hadoop2/hbase-common-0.98.8-hadoop2.jar,/Users/shishaofeng/.m2//repository/org/apache/hbase/hbase-client/0.98.8-hadoop2/hbase-client-0.98.8-hadoop2.jar"); sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_IN_MEM_CUBE + " with Spark"); return sparkExecutable;