This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push: new b4a4672 KYLIN-4863 Cache files are used globally to speed up the startup after the first time b4a4672 is described below commit b4a4672473db7a665cd57b18962cca7dbdcd0ce6 Author: benjobs <benj...@qq.com> AuthorDate: Thu Mar 4 18:30:36 2021 +0800 KYLIN-4863 Cache files are used globally to speed up the startup after the first time --- build/bin/clean-kylin-dirty-data.sh | 4 +- build/bin/diag.sh | 8 +- build/bin/download-flink.sh | 24 ++-- build/bin/download-spark.sh | 22 +-- build/bin/find-flink-dependency.sh | 59 ++++---- build/bin/find-hadoop-conf-dir.sh | 34 +++-- build/bin/find-hbase-dependency.sh | 56 ++++---- build/bin/find-hive-dependency.sh | 260 ++++++++++++++++++----------------- build/bin/find-kafka-dependency.sh | 44 +++--- build/bin/find-spark-dependency.sh | 84 ++++++----- build/bin/header.sh | 38 ++--- build/bin/health-check.sh | 38 ++--- build/bin/kylin-port-replace-util.sh | 26 ++-- build/bin/kylin.sh | 61 ++------ 14 files changed, 385 insertions(+), 373 deletions(-) diff --git a/build/bin/clean-kylin-dirty-data.sh b/build/bin/clean-kylin-dirty-data.sh index 054b1e6..ab7aa85 100644 --- a/build/bin/clean-kylin-dirty-data.sh +++ b/build/bin/clean-kylin-dirty-data.sh @@ -17,7 +17,7 @@ # limitations under the License. # -source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/set-kylin-home.sh $@ +source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/set-kylin-home.sh $@ today=`date +"%Y-%m-%d"` before7day=`date -d "-7 day" +"%Y-%m-%d"` @@ -30,5 +30,5 @@ ${KYLIN_HOME}/bin/kylin.sh org.apache.kylin.tool.StorageCleanupJob --delete true #clean before 7 day log before7dayfile=${KYLIN_HOME}/logs/clean-kylin-dirty-data.log.$before7day if [ -f "$before7dayfile" ]; then - rm $before7dayfile + rm $before7dayfile fi diff --git a/build/bin/diag.sh b/build/bin/diag.sh index 491f403..f220e50 100755 --- a/build/bin/diag.sh +++ b/build/bin/diag.sh @@ -79,12 +79,12 @@ then export HBASE_CLASSPATH=${HBASE_CLASSPATH}:${hive_dependency} hbase ${KYLIN_EXTRA_START_OPTS} \ - -Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-tools-log4j.properties \ - -Dcatalina.home=${tomcat_root} \ - "$@" + -Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-tools-log4j.properties \ + -Dcatalina.home=${tomcat_root} \ + "$@" exit 0 else echo "usage: diag.sh Project|JobId [target_path]" exit 1 -fi \ No newline at end of file +fi diff --git a/build/bin/download-flink.sh b/build/bin/download-flink.sh index c118a8f..6ef1a26 100755 --- a/build/bin/download-flink.sh +++ b/build/bin/download-flink.sh @@ -17,13 +17,13 @@ # limitations under the License. # -source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh +source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/header.sh if [ -d "${KYLIN_HOME}/flink" ]; then - echo "Flink binary exists" - exit 0; + echo "Flink binary exists" + exit 0; else - echo "Downloading flink package..." + echo "Downloading flink package..." fi flink_package_dir=/tmp/flink_package @@ -56,14 +56,14 @@ flink_shaded_hadoop_jar="flink-shaded-hadoop-3-uber-${flink_shaded_hadoop_versio flink_shaded_hadoop_path="https://repository.cloudera.com/artifactory/libs-release-local/org/apache/flink/flink-shaded-hadoop-3-uber/${flink_shaded_hadoop_version}/${flink_shaded_hadoop_jar}" if [ ! -f $flink_shaded_hadoop_jar ]; then - echo "Start to download $flink_shaded_hadoop_jar" - wget $flink_shaded_hadoop_path || echo "Download flink shaded hadoop jar failed" -else - if [ `md5cmd $flink_shaded_hadoop_jar | awk '{print $1}'` != $flink_shaded_hadoop_md5 ]; then - echo "md5 check failed" - rm $flink_shaded_hadoop_jar + echo "Start to download $flink_shaded_hadoop_jar" wget $flink_shaded_hadoop_path || echo "Download flink shaded hadoop jar failed" - fi +else + if [ `md5cmd $flink_shaded_hadoop_jar | awk '{print $1}'` != $flink_shaded_hadoop_md5 ]; then + echo "md5 check failed" + rm $flink_shaded_hadoop_jar + wget $flink_shaded_hadoop_path || echo "Download flink shaded hadoop jar failed" + fi fi unalias md5cmd @@ -77,4 +77,4 @@ mv flink ${KYLIN_HOME} rm -rf ${flink_package_dir} -echo "Download flink binary done" \ No newline at end of file +echo "Download flink binary done" diff --git a/build/bin/download-spark.sh b/build/bin/download-spark.sh index 7963d15..7d32d41 100755 --- a/build/bin/download-spark.sh +++ b/build/bin/download-spark.sh @@ -80,19 +80,19 @@ echo "Download spark binary done" rm -rf ${spark_package_dir} if [ ! -f "${spark_conf_path}/hive-site.xml" ]; then - echo "Copy hive-site.xml to ${spark_conf_path}" - ln -s ${hive_site_path} ${spark_conf_path}/hive-site.xml + echo "Copy hive-site.xml to ${spark_conf_path}" + ln -s ${hive_site_path} ${spark_conf_path}/hive-site.xml fi if [ -d "${cdh_path}" ]; then - if [ ! -f "${spark_jars_path}/hive-hcatalog-core-1.1.0-cdh${cdh_version}.jar" ]; then - echo "Download hive hcatalog dependency for cdh-${cdh_version}" - wget --directory-prefix=${spark_jars_path} https://repository.cloudera.com/content/repositories/releases/org/apache/hive/hcatalog/hive-hcatalog-core/1.1.0-cdh${cdh_version}/hive-hcatalog-core-1.1.0-cdh${cdh_version}.jar || echo "Download hive hcatalog dependency for cdh-${cdh_version}failed." - fi + if [ ! -f "${spark_jars_path}/hive-hcatalog-core-1.1.0-cdh${cdh_version}.jar" ]; then + echo "Download hive hcatalog dependency for cdh-${cdh_version}" + wget --directory-prefix=${spark_jars_path} https://repository.cloudera.com/content/repositories/releases/org/apache/hive/hcatalog/hive-hcatalog-core/1.1.0-cdh${cdh_version}/hive-hcatalog-core-1.1.0-cdh${cdh_version}.jar || echo "Download hive hcatalog dependency for cdh-${cdh_version}failed." + fi elif [ -d "${hdp_path}" ]; then - if [ ! -f "${spark_jars_path}/hive-hcatalog-core-1.2.1000.${hdp_version}.jar" ]; then - echo "Download hive hcatalog dependency for cdh-${hdp_version}" - wget --directory-prefix=${spark_jars_path} https://repo.hortonworks.com/content/repositories/releases/org/apache/hive/hcatalog/hive-hcatalog-core/1.2.1000.${hdp_version}/hive-hcatalog-core-1.2.1000.${hdp_version}.jar || echo "Download hive hcatalog dependency for hdp-${hdp_version} failed." - fi -fi \ No newline at end of file + if [ ! -f "${spark_jars_path}/hive-hcatalog-core-1.2.1000.${hdp_version}.jar" ]; then + echo "Download hive hcatalog dependency for cdh-${hdp_version}" + wget --directory-prefix=${spark_jars_path} https://repo.hortonworks.com/content/repositories/releases/org/apache/hive/hcatalog/hive-hcatalog-core/1.2.1000.${hdp_version}/hive-hcatalog-core-1.2.1000.${hdp_version}.jar || echo "Download hive hcatalog dependency for hdp-${hdp_version} failed." + fi +fi diff --git a/build/bin/find-flink-dependency.sh b/build/bin/find-flink-dependency.sh index ea09a38..d9646ed 100755 --- a/build/bin/find-flink-dependency.sh +++ b/build/bin/find-flink-dependency.sh @@ -17,36 +17,45 @@ # limitations under the License. # -source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/header.sh +source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/header.sh -echo Retrieving Flink dependency... +if [ -f "${dir}/cached-flink-dependency.sh" ] ; then + source ${dir}/cached-flink-dependency.sh + echo Using flink cached dependency... +fi -flink_home= -if [ -n "$FLINK_HOME" ] -then - verbose "FLINK_HOME is set to: $FLINK_HOME, use it to locate Flink dependencies." - flink_home=$FLINK_HOME -fi +if [ -z "${flink_dependency}" ] ; then -if [ -z "$FLINK_HOME" ] -then - verbose "FLINK_HOME wasn't set, use $KYLIN_HOME/flink" - flink_home=$KYLIN_HOME/flink -fi + echo Retrieving Flink dependency... -if [ ! -d "$flink_home/lib" ] - then - echo `setColor 33 "Optional dependency flink not found, if you need this; set FLINK_HOME, or run bin/download-flink.sh"` - echo "echo 'skip flink_dependency'" > ${dir}/cached-flink-dependency.sh - else - flink_dependency=`find -L $flink_home/lib -name '*.jar' ! -name '*shaded-hadoop*' ! -name 'kafka*' ! -name '*log4j*' ! -name '*slf4j*' ! -name '*calcite*' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` - if [ -z "$flink_dependency" ] + flink_home= + + if [ -n "$FLINK_HOME" ] then - quit "flink jars not found" + verbose "FLINK_HOME is set to: $FLINK_HOME, use it to locate Flink dependencies." + flink_home=$FLINK_HOME + fi + + if [ -z "$FLINK_HOME" ] + then + verbose "FLINK_HOME wasn't set, use $KYLIN_HOME/flink" + flink_home=$KYLIN_HOME/flink + fi + + if [ ! -d "$flink_home/lib" ] + then + echo `setColor 33 "Optional dependency flink not found, if you need this; set FLINK_HOME, or run bin/download-flink.sh"` + echo "echo 'skip flink_dependency'" > ${dir}/cached-flink-dependency.sh else - verbose "flink dependency: $flink_dependency" - export flink_dependency + flink_dependency=`find -L $flink_home/lib -name '*.jar' ! -name '*shaded-hadoop*' ! -name 'kafka*' ! -name '*log4j*' ! -name '*slf4j*' ! -name '*calcite*' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` + if [ -z "$flink_dependency" ] + then + quit "flink jars not found" + else + verbose "flink dependency: $flink_dependency" + export flink_dependency + fi + echo "export flink_dependency=$flink_dependency" > ${dir}/cached-flink-dependency.sh fi - echo "export flink_dependency=$flink_dependency" > ${dir}/cached-flink-dependency.sh -fi \ No newline at end of file +fi diff --git a/build/bin/find-hadoop-conf-dir.sh b/build/bin/find-hadoop-conf-dir.sh index e03f85d..1fee4af 100755 --- a/build/bin/find-hadoop-conf-dir.sh +++ b/build/bin/find-hadoop-conf-dir.sh @@ -19,62 +19,70 @@ source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/header.sh -echo Retrieving hadoop conf dir... - function find_hadoop_conf_dir() { override_hadoop_conf_dir=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.env.hadoop-conf-dir` - + if [ -n "$override_hadoop_conf_dir" ]; then verbose "kylin_hadoop_conf_dir is override as $override_hadoop_conf_dir" export kylin_hadoop_conf_dir=${override_hadoop_conf_dir} return fi - + hbase_classpath=`hbase classpath` - + arr=(`echo $hbase_classpath | cut -d ":" -f 1- | sed 's/:/ /g'`) kylin_hadoop_conf_dir= - + for data in ${arr[@]} do result=`echo $data | grep -v -E ".*jar"` if [ $result ] then valid_conf_dir=true - + if [ ! -f $result/yarn-site.xml ] then verbose "$result is not valid hadoop dir conf because yarn-site.xml is missing" valid_conf_dir=false continue fi - + if [ ! -f $result/mapred-site.xml ] then verbose "$result is not valid hadoop dir conf because mapred-site.xml is missing" valid_conf_dir=false continue fi - + if [ ! -f $result/hdfs-site.xml ] then verbose "$result is not valid hadoop dir conf because hdfs-site.xml is missing" valid_conf_dir=false continue fi - + if [ ! -f $result/core-site.xml ] then verbose "$result is not valid hadoop dir conf because core-site.xml is missing" valid_conf_dir=false continue fi - + verbose "kylin_hadoop_conf_dir is $result" export kylin_hadoop_conf_dir=$result return fi done } -find_hadoop_conf_dir -echo "export kylin_hadoop_conf_dir=$kylin_hadoop_conf_dir" > ${dir}/cached-hadoop-conf-dir.sh \ No newline at end of file + + +if [ -f "${dir}/cached-hadoop-conf-dir.sh" ] ; then + source ${dir}/cached-hadoop-conf-dir.sh + echo Using hadoop conf cached dependency... +fi + +if [ -z "${kylin_hadoop_conf_dir}" ] ; then + echo Retrieving hadoop conf dir... + find_hadoop_conf_dir + echo "export kylin_hadoop_conf_dir=$kylin_hadoop_conf_dir" > ${dir}/cached-hadoop-conf-dir.sh +fi diff --git a/build/bin/find-hbase-dependency.sh b/build/bin/find-hbase-dependency.sh index b4631ba..c86d4bc 100755 --- a/build/bin/find-hbase-dependency.sh +++ b/build/bin/find-hbase-dependency.sh @@ -19,34 +19,42 @@ source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/header.sh -echo Retrieving hbase dependency... +if [ -f "${dir}/cached-hbase-dependency.sh" ] ; then + source ${dir}/cached-hbase-dependency.sh + echo Using hbase cached dependency... +fi -hbase_classpath=`hbase classpath` +if [ -z "${hbase_dependency}" ] ; then -# special handling for Amazon EMR, to prevent re-init of hbase-setenv -is_aws=`uname -r | grep amzn` -if [ -n "$is_aws" ] && [ -d "/usr/lib/oozie/lib" ]; then - export HBASE_ENV_INIT="true" -fi + echo Retrieving hbase dependency... + + hbase_classpath=`hbase classpath` -arr=(`echo $hbase_classpath | cut -d ":" -f 1- | sed 's/:/ /g'`) -hbase_common_path= -for data in ${arr[@]} -do - result=`echo $data | grep -e 'hbase-common[a-z0-9A-Z\.-]*jar' | grep -v tests` - if [ $result ] + # special handling for Amazon EMR, to prevent re-init of hbase-setenv + is_aws=`uname -r | grep amzn` + if [ -n "$is_aws" ] && [ -d "/usr/lib/oozie/lib" ]; then + export HBASE_ENV_INIT="true" + fi + + arr=(`echo $hbase_classpath | cut -d ":" -f 1- | sed 's/:/ /g'`) + hbase_common_path= + for data in ${arr[@]} + do + result=`echo $data | grep -e 'hbase-common[a-z0-9A-Z\.-]*jar' | grep -v tests` + if [ $result ] + then + hbase_common_path=$data + fi + done + + if [ -z "$hbase_common_path" ] then - hbase_common_path=$data + quit "hbase-common lib not found" fi -done -if [ -z "$hbase_common_path" ] -then - quit "hbase-common lib not found" + hbase_dependency=${hbase_common_path} + verbose "hbase dependency: $hbase_dependency" + export hbase_dependency + echo "export HBASE_ENV_INIT=$HBASE_ENV_INIT + export hbase_dependency=$hbase_dependency" > ${dir}/cached-hbase-dependency.sh fi - -hbase_dependency=${hbase_common_path} -verbose "hbase dependency: $hbase_dependency" -export hbase_dependency -echo "export HBASE_ENV_INIT=$HBASE_ENV_INIT -export hbase_dependency=$hbase_dependency" > ${dir}/cached-hbase-dependency.sh diff --git a/build/bin/find-hive-dependency.sh b/build/bin/find-hive-dependency.sh index 1012ffa..30edcc9 100755 --- a/build/bin/find-hive-dependency.sh +++ b/build/bin/find-hive-dependency.sh @@ -19,105 +19,9 @@ source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/header.sh -## ${dir} assigned to $KYLIN_HOME/bin in header.sh -source ${dir}/load-hive-conf.sh - -echo Retrieving hive dependency... - -client_mode=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.client` -hive_env= - -if [ "${client_mode}" == "beeline" ] -then - beeline_shell=`$KYLIN_HOME/bin/get-properties.sh kylin.source.hive.beeline-shell` - beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params` - hive_env=`${beeline_shell} ${hive_conf_properties} ${beeline_params} --outputformat=dsv -e "set;" 2>&1 | grep --text 'env:CLASSPATH' ` -else - source ${dir}/check-hive-usability.sh - hive_env=`hive ${hive_conf_properties} -e set 2>&1 | grep 'env:CLASSPATH'` -fi - -if [ -z $hive_env ] -then - hive_permission=`hive ${hive_conf_properties} -e set 2>&1 | grep 'No valid credentials provided'` - if [ -n "$hive_permission" ] - then - quit "No valid credentials provided for Hive CLI, please check permission of hive. (e.g. check if Kerberos is expired or not)" - else - quit "Something wrong with Hive CLI or Beeline, please execute Hive CLI or Beeline CLI in terminal to find the root cause." - fi -fi - -hive_classpath=`echo $hive_env | grep 'env:CLASSPATH' | awk -F '=' '{print $2}'` -arr=(`echo $hive_classpath | cut -d ":" -f 1- | sed 's/:/ /g'`) -hive_conf_path= -hive_exec_path= - -if [ -n "$HIVE_CONF" ] -then - verbose "HIVE_CONF is set to: $HIVE_CONF, use it to locate hive configurations." - hive_conf_path=$HIVE_CONF -fi - -for data in ${arr[@]} -do - result=`echo $data | grep -e 'hive-exec[a-z0-9A-Z\.-]*.jar' | grep -v 'auxlib'` - # In some cases there are more than one lib dirs, only the first one will be applied. - if [ $result ] && [ -z "$hive_exec_path" ] - then - hive_exec_path=$data - fi - - # in some versions of hive config is not in hive's classpath, find it separately - if [ -z "$hive_conf_path" ] - then - result=`echo $data | grep -e 'hive[^/]*/conf'` - if [ $result ] - then - hive_conf_path=$data - fi - fi -done - -if [ -z "$hive_conf_path" ] -then - quit "Couldn't find hive configuration directory. Please set HIVE_CONF to the path which contains hive-site.xml." -fi - -if [ -z "$hive_exec_path" ] -then - quit "Couldn't find hive executable jar. Please check if hive executable jar exists in HIVE_LIB folder." -fi - -# in some versions of hive hcatalog is not in hive's classpath, find it separately -if [ -z "$HCAT_HOME" ] -then - verbose "HCAT_HOME not found, try to find hcatalog path from hadoop home" - hadoop_home=`echo $hive_exec_path | awk -F '/hive.*/lib/hive-exec[a-z0-9A-Z.-]*.jar' '{print $1}'` - hive_home=`echo $hive_exec_path | awk -F '/lib/hive-exec[a-z0-9A-Z.-]*.jar' '{print $1}'` - is_aws=`uname -r | grep amzn` - if [ -d "${hadoop_home}/hive-hcatalog" ]; then - hcatalog_home=${hadoop_home}/hive-hcatalog - elif [ -d "${hadoop_home}/hive/hcatalog" ]; then - hcatalog_home=${hadoop_home}/hive/hcatalog - elif [ -d "${hive_home}/hcatalog" ]; then - hcatalog_home=${hive_home}/hcatalog - elif [ -n is_aws ] && [ -d "/usr/lib/hive-hcatalog" ]; then - # special handling for Amazon EMR - hcatalog_home=/usr/lib/hive-hcatalog - else - quit "Couldn't locate hcatalog installation, please make sure it is installed and set HCAT_HOME to the path." - fi -else - verbose "HCAT_HOME is set to: $HCAT_HOME, use it to find hcatalog path:" - hcatalog_home=${HCAT_HOME} -fi - -hcatalog=`find -L ${hcatalog_home} -name "hive-hcatalog-core[0-9\.-]*.jar" 2>&1 | grep -m 1 -v 'Permission denied'` - -if [ -z "$hcatalog" ] -then - quit "hcatalog lib not found" +if [ -f "${dir}/cached-hive-dependency.sh" ] ; then + source ${dir}/cached-hive-dependency.sh + echo Using hive cached dependency... fi function checkFileExist() @@ -179,36 +83,142 @@ function validateDirectory() [[ "${find}" == "true" ]] || quit "ERROR, no hive-site.xml found under dir: ${conf_path}!" } -if [ -z "$HIVE_LIB" ] -then - verbose "HIVE_LIB is not set, try to retrieve hive lib from hive_exec_path" - if [[ $hive_exec_path =~ ^\/.*hive.*\/lib\/hive-exec[a-z0-9A-Z\.-]*.jar ]] +if [ -z "${hive_dependency}" ] ; then + + echo Retrieving hive dependency... + + ## ${dir} assigned to $KYLIN_HOME/bin in header.sh + source ${dir}/load-hive-conf.sh + + client_mode=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.client` + hive_env= + + if [ "${client_mode}" == "beeline" ] + then + beeline_shell=`$KYLIN_HOME/bin/get-properties.sh kylin.source.hive.beeline-shell` + beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params` + hive_env=`${beeline_shell} ${hive_conf_properties} ${beeline_params} --outputformat=dsv -e "set;" 2>&1 | grep --text 'env:CLASSPATH' ` + else + source ${dir}/check-hive-usability.sh + hive_env=`hive ${hive_conf_properties} -e set 2>&1 | grep 'env:CLASSPATH'` + fi + + if [ -z $hive_env ] then - hive_lib_dir="$(dirname $hive_exec_path)" + hive_permission=`hive ${hive_conf_properties} -e set 2>&1 | grep 'No valid credentials provided'` + if [ -n "$hive_permission" ] + then + quit "No valid credentials provided for Hive CLI, please check permission of hive. (e.g. check if Kerberos is expired or not)" + else + quit "Something wrong with Hive CLI or Beeline, please execute Hive CLI or Beeline CLI in terminal to find the root cause." + fi + fi + + hive_classpath=`echo $hive_env | grep 'env:CLASSPATH' | awk -F '=' '{print $2}'` + arr=(`echo $hive_classpath | cut -d ":" -f 1- | sed 's/:/ /g'`) + hive_conf_path= + hive_exec_path= + + if [ -n "$HIVE_CONF" ] + then + verbose "HIVE_CONF is set to: $HIVE_CONF, use it to locate hive configurations." + hive_conf_path=$HIVE_CONF + fi + + for data in ${arr[@]} + do + result=`echo $data | grep -e 'hive-exec[a-z0-9A-Z\.-]*.jar' | grep -v 'auxlib'` + # In some cases there are more than one lib dirs, only the first one will be applied. + if [ $result ] && [ -z "$hive_exec_path" ] + then + hive_exec_path=$data + fi + + # in some versions of hive config is not in hive's classpath, find it separately + if [ -z "$hive_conf_path" ] + then + result=`echo $data | grep -e 'hive[^/]*/conf'` + if [ $result ] + then + hive_conf_path=$data + fi + fi + done + + if [ -z "$hive_conf_path" ] + then + quit "Couldn't find hive configuration directory. Please set HIVE_CONF to the path which contains hive-site.xml." + fi + + if [ -z "$hive_exec_path" ] + then + quit "Couldn't find hive executable jar. Please check if hive executable jar exists in HIVE_LIB folder." + fi + + # in some versions of hive hcatalog is not in hive's classpath, find it separately + if [ -z "$HCAT_HOME" ] + then + verbose "HCAT_HOME not found, try to find hcatalog path from hadoop home" + hadoop_home=`echo $hive_exec_path | awk -F '/hive.*/lib/hive-exec[a-z0-9A-Z.-]*.jar' '{print $1}'` + hive_home=`echo $hive_exec_path | awk -F '/lib/hive-exec[a-z0-9A-Z.-]*.jar' '{print $1}'` + is_aws=`uname -r | grep amzn` + if [ -d "${hadoop_home}/hive-hcatalog" ]; then + hcatalog_home=${hadoop_home}/hive-hcatalog + elif [ -d "${hadoop_home}/hive/hcatalog" ]; then + hcatalog_home=${hadoop_home}/hive/hcatalog + elif [ -d "${hive_home}/hcatalog" ]; then + hcatalog_home=${hive_home}/hcatalog + elif [ -n is_aws ] && [ -d "/usr/lib/hive-hcatalog" ]; then + # special handling for Amazon EMR + hcatalog_home=/usr/lib/hive-hcatalog + else + quit "Couldn't locate hcatalog installation, please make sure it is installed and set HCAT_HOME to the path." + fi else - quit "HIVE_LIB not found, please check hive installation or export HIVE_LIB='YOUR_LOCAL_HIVE_LIB'." + verbose "HCAT_HOME is set to: $HCAT_HOME, use it to find hcatalog path:" + hcatalog_home=${HCAT_HOME} + fi + + hcatalog=`find -L ${hcatalog_home} -name "hive-hcatalog-core[0-9\.-]*.jar" 2>&1 | grep -m 1 -v 'Permission denied'` + + if [ -z "$hcatalog" ] + then + quit "hcatalog lib not found" fi -else - if [[ $HIVE_LIB =~ ^\/.*hive.*\/lib[\/]* ]] + + + if [ -z "$HIVE_LIB" ] then - verbose "HIVE_LIB is set to ${HIVE_LIB}" + verbose "HIVE_LIB is not set, try to retrieve hive lib from hive_exec_path" + if [[ $hive_exec_path =~ ^\/.*hive.*\/lib\/hive-exec[a-z0-9A-Z\.-]*.jar ]] + then + hive_lib_dir="$(dirname $hive_exec_path)" + else + quit "HIVE_LIB not found, please check hive installation or export HIVE_LIB='YOUR_LOCAL_HIVE_LIB'." + fi else - echo "WARNING: HIVE_LIB is set to ${HIVE_LIB}, it's advised to set it to the lib dir under hive's installation directory" + if [[ $HIVE_LIB =~ ^\/.*hive.*\/lib[\/]* ]] + then + verbose "HIVE_LIB is set to ${HIVE_LIB}" + else + echo "WARNING: HIVE_LIB is set to ${HIVE_LIB}, it's advised to set it to the lib dir under hive's installation directory" + fi + hive_lib_dir="$HIVE_LIB" fi - hive_lib_dir="$HIVE_LIB" + hive_lib=`find -L ${hive_lib_dir} -name '*.jar' ! -name '*druid*' ! -name '*slf4j*' ! -name '*avatica*' ! -name '*calcite*' ! -name '*jackson-datatype-joda*' ! -name '*derby*' -printf '%p:' | sed 's/:$//'` + + validateDirectory ${hive_conf_path} + checkFileExist hive_lib ${hive_lib} + checkFileExist hcatalog ${hcatalog} + + hive_dependency=${hive_conf_path}:${hive_lib}:${hcatalog} + verbose "hive dependency is $hive_dependency" + export hive_dependency + export hive_conf_path + export hive_warehouse_dir=`hive -e 'set hive.metastore.warehouse.dir;' | awk '{split($0,a,"="); print a[2]}'` + echo "export hive_warehouse_dir=$hive_warehouse_dir" + echo "export hive_warehouse_dir=$hive_warehouse_dir + export hive_dependency=$hive_dependency + export hive_conf_path=$hive_conf_path" > ${dir}/cached-hive-dependency.sh fi -hive_lib=`find -L ${hive_lib_dir} -name '*.jar' ! -name '*druid*' ! -name '*slf4j*' ! -name '*avatica*' ! -name '*calcite*' ! -name '*jackson-datatype-joda*' ! -name '*derby*' -printf '%p:' | sed 's/:$//'` - -validateDirectory ${hive_conf_path} -checkFileExist hive_lib ${hive_lib} -checkFileExist hcatalog ${hcatalog} - -hive_dependency=${hive_conf_path}:${hive_lib}:${hcatalog} -verbose "hive dependency is $hive_dependency" -export hive_dependency -export hive_conf_path -export hive_warehouse_dir=`hive -e 'set hive.metastore.warehouse.dir;' | awk '{split($0,a,"="); print a[2]}'` -echo "export hive_warehouse_dir=$hive_warehouse_dir" -echo "export hive_warehouse_dir=$hive_warehouse_dir -export hive_dependency=$hive_dependency -export hive_conf_path=$hive_conf_path" > ${dir}/cached-hive-dependency.sh + diff --git a/build/bin/find-kafka-dependency.sh b/build/bin/find-kafka-dependency.sh index c5c5ce4..8b0373d 100755 --- a/build/bin/find-kafka-dependency.sh +++ b/build/bin/find-kafka-dependency.sh @@ -20,33 +20,41 @@ source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/header.sh -kafka_home= +if [ -f "${dir}/cached-kafka-dependency.sh" ] ; then + source ${dir}/cached-kafka-dependency.sh + echo Using kafka cached dependency... +fi + +if [ -z "${kafka_dependency}" ] ; then -echo Retrieving kafka dependency... + echo Retrieving kafka dependency... -if [ -z "$KAFKA_HOME" ] -then - echo "Couldn't find kafka home. If you want to enable streaming processing, Please set KAFKA_HOME to the path which contains kafka dependencies." -else - verbose "KAFKA_HOME is set to: $KAFKA_HOME, use it to locate kafka dependencies." - kafka_home=$KAFKA_HOME + kafka_home= - # works for kafka 9+ - kafka_dependency=`find -L $kafka_home -name 'kafka-clients-[a-z0-9A-Z\.-]*.jar' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` - if [ -z "$kafka_dependency" ] + if [ -z "$KAFKA_HOME" ] then - # works for kafka 8 - kafka_dependency=`find -L $kafka_home -name 'kafka_[a-z0-9A-Z\.-]*.jar' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` + echo "Couldn't find kafka home. If you want to enable streaming processing, Please set KAFKA_HOME to the path which contains kafka dependencies." + else + verbose "KAFKA_HOME is set to: $KAFKA_HOME, use it to locate kafka dependencies." + kafka_home=$KAFKA_HOME + + # works for kafka 9+ + kafka_dependency=`find -L $kafka_home -name 'kafka-clients-[a-z0-9A-Z\.-]*.jar' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` if [ -z "$kafka_dependency" ] then - quit "kafka client lib not found" + # works for kafka 8 + kafka_dependency=`find -L $kafka_home -name 'kafka_[a-z0-9A-Z\.-]*.jar' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` + if [ -z "$kafka_dependency" ] + then + quit "kafka client lib not found" + else + verbose "kafka dependency is $kafka_dependency" + export kafka_dependency + fi else verbose "kafka dependency is $kafka_dependency" export kafka_dependency fi - else - verbose "kafka dependency is $kafka_dependency" - export kafka_dependency fi + echo "export kafka_dependency=$kafka_dependency" > ${dir}/cached-kafka-dependency.sh fi -echo "export kafka_dependency=$kafka_dependency" > ${dir}/cached-kafka-dependency.sh diff --git a/build/bin/find-spark-dependency.sh b/build/bin/find-spark-dependency.sh index 0ea7ab2..8c2b8c2 100755 --- a/build/bin/find-spark-dependency.sh +++ b/build/bin/find-spark-dependency.sh @@ -19,54 +19,62 @@ source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/header.sh -echo Retrieving Spark dependency... +if [ -f "${dir}/cached-spark-dependency.sh" ] ; then + source ${dir}/cached-spark-dependency.sh + echo Using spark cached dependency... +fi -spark_home= +if [ -z "${spark_dependency}" ] ; then -if [ -n "$SPARK_HOME" ] -then - verbose "SPARK_HOME is set to: $SPARK_HOME, use it to locate Spark dependencies." - spark_home=$SPARK_HOME -fi + echo Retrieving Spark dependency... -if [ -z "$SPARK_HOME" ] -then - verbose "SPARK_HOME wasn't set, use $KYLIN_HOME/spark" - spark_home=$KYLIN_HOME/spark -fi + spark_home= -SPARK_EVENTLOG_DIR=`bash $KYLIN_HOME/bin/get-properties.sh kylin.engine.spark-conf.spark.eventLog.dir` -if [ -n "$SPARK_EVENTLOG_DIR" ] -then - hadoop ${hadoop_conf_param} fs -mkdir -p $SPARK_EVENTLOG_DIR - if [ $? != 0 ] + if [ -n "$SPARK_HOME" ] then - quit "Failed to create $SPARK_EVENTLOG_DIR. Please make sure the user has right to access $SPARK_EVENTLOG_DIR" + verbose "SPARK_HOME is set to: $SPARK_HOME, use it to locate Spark dependencies." + spark_home=$SPARK_HOME fi -fi -SPARK_HISTORYLOG_DIR=`bash $KYLIN_HOME/bin/get-properties.sh kylin.engine.spark-conf.spark.history.fs.logDirectory` -if [ -n "$SPARK_HISTORYLOG_DIR" ] -then - hadoop ${hadoop_conf_param} fs -mkdir -p $SPARK_HISTORYLOG_DIR - if [ $? != 0 ] + if [ -z "$SPARK_HOME" ] then - quit "Failed to create $SPARK_HISTORYLOG_DIR. Please make sure the user has right to access $SPARK_HISTORYLOG_DIR" + verbose "SPARK_HOME wasn't set, use $KYLIN_HOME/spark" + spark_home=$KYLIN_HOME/spark fi -fi -if [ ! -d "$spark_home/jars" ] - then - echo `setColor 33 "Optional dependency spark not found, if you need this; set SPARK_HOME, or run bin/download-spark.sh"` - echo "echo 'skip spark_dependency'" > ${dir}/cached-spark-dependency.sh - else - spark_dependency=`find -L $spark_home/jars -name '*.jar' ! -name '*slf4j*' ! -name '*calcite*' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` - if [ -z "$spark_dependency" ] + SPARK_EVENTLOG_DIR=`bash $KYLIN_HOME/bin/get-properties.sh kylin.engine.spark-conf.spark.eventLog.dir` + if [ -n "$SPARK_EVENTLOG_DIR" ] then - quit "spark jars not found" - else - verbose "spark dependency: $spark_dependency" - export spark_dependency + hadoop ${hadoop_conf_param} fs -mkdir -p $SPARK_EVENTLOG_DIR + if [ $? != 0 ] + then + quit "Failed to create $SPARK_EVENTLOG_DIR. Please make sure the user has right to access $SPARK_EVENTLOG_DIR" + fi + fi + + SPARK_HISTORYLOG_DIR=`bash $KYLIN_HOME/bin/get-properties.sh kylin.engine.spark-conf.spark.history.fs.logDirectory` + if [ -n "$SPARK_HISTORYLOG_DIR" ] + then + hadoop ${hadoop_conf_param} fs -mkdir -p $SPARK_HISTORYLOG_DIR + if [ $? != 0 ] + then + quit "Failed to create $SPARK_HISTORYLOG_DIR. Please make sure the user has right to access $SPARK_HISTORYLOG_DIR" + fi + fi + + if [ ! -d "$spark_home/jars" ] + then + echo `setColor 33 "Optional dependency spark not found, if you need this; set SPARK_HOME, or run bin/download-spark.sh"` + echo "echo 'skip spark_dependency'" > ${dir}/cached-spark-dependency.sh + else + spark_dependency=`find -L $spark_home/jars -name '*.jar' ! -name '*slf4j*' ! -name '*calcite*' ! -name '*doc*' ! -name '*test*' ! -name '*sources*' ''-printf '%p:' | sed 's/:$//'` + if [ -z "$spark_dependency" ] + then + quit "spark jars not found" + else + verbose "spark dependency: $spark_dependency" + export spark_dependency + fi + echo "export spark_dependency=$spark_dependency" > ${dir}/cached-spark-dependency.sh fi - echo "export spark_dependency=$spark_dependency" > ${dir}/cached-spark-dependency.sh fi diff --git a/build/bin/header.sh b/build/bin/header.sh index c725853..e85fcdd 100755 --- a/build/bin/header.sh +++ b/build/bin/header.sh @@ -36,25 +36,25 @@ done if [[ "$dir" == "" ]] then - dir=$(cd -P -- "$(dirname -- "$0")" && pwd -P) - - # set KYLIN_HOME with consideration for multiple instances that are on the same node - KYLIN_HOME=${KYLIN_HOME:-"${dir}/../"} - export KYLIN_HOME=`cd "$KYLIN_HOME"; pwd` - dir="$KYLIN_HOME/bin" - - function quit { - echo "$@" - exit 1 - } - - function verbose { - if [[ -n "$verbose" ]]; then - echo "$@" - fi - } - - function setColor() { + dir=$(cd -P -- "$(dirname -- "$0")" && pwd -P) + + # set KYLIN_HOME with consideration for multiple instances that are on the same node + KYLIN_HOME=${KYLIN_HOME:-"${dir}/../"} + export KYLIN_HOME=`cd "$KYLIN_HOME"; pwd` + dir="$KYLIN_HOME/bin" + + function quit { + echo "$@" + exit 1 + } + + function verbose { + if [[ -n "$verbose" ]]; then + echo "$@" + fi + } + + function setColor() { echo -e "\033[$1m$2\033[0m" } fi diff --git a/build/bin/health-check.sh b/build/bin/health-check.sh index 3e18631..c63a8f8 100755 --- a/build/bin/health-check.sh +++ b/build/bin/health-check.sh @@ -20,11 +20,11 @@ ALERT="y...@email.com" OUTPUT=$( - curl --max-time 20 -# \ - --data '{"sql":"select count(*) from test_kylin_fact","offset":0,"limit":50000,"acceptPartial":true,"project":"default"}' \ - -H "Authorization:Basic QURNSU46S1lMSU4=" \ - -H "Content-Type:application/json;charset=UTF-8" \ - http://localhost:7070/kylin/api/query \ + curl --max-time 20 -# \ + --data '{"sql":"select count(*) from test_kylin_fact","offset":0,"limit":50000,"acceptPartial":true,"project":"default"}' \ + -H "Authorization:Basic QURNSU46S1lMSU4=" \ + -H "Content-Type:application/json;charset=UTF-8" \ + http://localhost:7070/kylin/api/query \ ) # ---------------------------------------------------------------------------- @@ -32,19 +32,19 @@ OUTPUT=$( date if [[ $OUTPUT == *"results"* ]]; then - echo "Good." + echo "Good." else - echo "Bad." - TS_FILE=/tmp/kylin_healthmon_ts - LAST_TS=`stat -c%Y $TS_FILE 2>/dev/null` - CURR_TS=`date +%s` - echo last: $LAST_TS - echo curr: $CURR_TS - if (( ${LAST_TS:-"0"} < $CURR_TS - 3600 )); then - echo "Sending mail..." - echo "Kylin Prod health check failed as of $(date)." | mail -s "KYLIN PROD DOWN" $ALERT - if [ "$?" == "0" ]; then - touch $TS_FILE - fi - fi + echo "Bad." + TS_FILE=/tmp/kylin_healthmon_ts + LAST_TS=`stat -c%Y $TS_FILE 2>/dev/null` + CURR_TS=`date +%s` + echo last: $LAST_TS + echo curr: $CURR_TS + if (( ${LAST_TS:-"0"} < $CURR_TS - 3600 )); then + echo "Sending mail..." + echo "Kylin Prod health check failed as of $(date)." | mail -s "KYLIN PROD DOWN" $ALERT + if [ "$?" == "0" ]; then + touch $TS_FILE + fi + fi fi diff --git a/build/bin/kylin-port-replace-util.sh b/build/bin/kylin-port-replace-util.sh index 03f1a34..7dbf89e 100755 --- a/build/bin/kylin-port-replace-util.sh +++ b/build/bin/kylin-port-replace-util.sh @@ -20,24 +20,24 @@ #exit if find error # ============================================================================ -source $(cd -P -- "$(dirname -- "$0")" && pwd -P)/set-kylin-home.sh $@ +source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/set-kylin-home.sh $@ set -o pipefail # trace ERR through pipes set -o errtrace # trace ERR through 'time command' and other functions function error() { - SCRIPT="$0" # script name - LASTLINE="$1" # line of error occurrence - LASTERR="$2" # error code - echo "ERROR exit from ${SCRIPT} : line ${LASTLINE} with exit code ${LASTERR}" - exit 1 + SCRIPT="$0" # script name + LASTLINE="$1" # line of error occurrence + LASTERR="$2" # error code + echo "ERROR exit from ${SCRIPT} : line ${LASTLINE} with exit code ${LASTERR}" + exit 1 } trap 'error ${LINENO} ${?}' ERR #check input parameters if [ $# -eq 0 ]; then - echo "Usage : kylin-port-replace-util.sh set PORT_OFFSET --> Modify all conflict ports base on a offset" - echo "Usage : kylin-port-replace-util.sh reset --> Recover to original setting" - exit 0 + echo "Usage : kylin-port-replace-util.sh set PORT_OFFSET --> Modify all conflict ports base on a offset" + echo "Usage : kylin-port-replace-util.sh reset --> Recover to original setting" + exit 0 fi #check kylin home @@ -59,7 +59,7 @@ BUILD_CUBE_FILE="${KYLIN_HOME}/bin/build-incremental-cube.sh" TOMCAT_PORT_LIST=(9005 7070 9443 7443 9009) KYLIN_DEFAULT_PORT=7070 -if [ "$1" == "set" ] +if [ "$1" == "set" ] then OFFSET=$2 echo "Port offset is : ${OFFSET}" @@ -104,9 +104,9 @@ then for port in ${TOMCAT_PORT_LIST[@]} do - new_port=`expr ${port} + ${OFFSET} ` - #echo "Replace old port : ${port} to new port : ${new_port}" - sed -i "s/$port/${new_port}/g" ${TOMCAT_CONFIG_FILE} + new_port=`expr ${port} + ${OFFSET} ` + #echo "Replace old port : ${port} to new port : ${new_port}" + sed -i "s/$port/${new_port}/g" ${TOMCAT_CONFIG_FILE} done diff --git a/build/bin/kylin.sh b/build/bin/kylin.sh index 7abe145..4079525 100755 --- a/build/bin/kylin.sh +++ b/build/bin/kylin.sh @@ -33,54 +33,13 @@ mkdir -p ${KYLIN_HOME}/ext source ${dir}/set-java-home.sh function retrieveDependency() { - #retrive $hive_dependency and $hbase_dependency - if [[ -z $reload_dependency && `ls -1 ${dir}/cached-* 2>/dev/null | wc -l` -eq 6 ]] - then - echo "Using cached dependency..." - source ${dir}/cached-hive-dependency.sh - if [ -z "${hive_warehouse_dir}" ] || [ -z "${hive_dependency}" ] || [ -z "${hive_conf_path}" ]; then - echo "WARNING: Using ${dir}/cached-hive-dependency.sh failed,will be use ${dir}/find-hive-dependency.sh" - source ${dir}/find-hive-dependency.sh - fi - - source ${dir}/cached-hbase-dependency.sh - if [ -z "${hbase_dependency}" ]; then - echo "WARNING: Using ${dir}/cached-hbase-dependency.sh failed,will be use ${dir}/find-hbase-dependency.sh" - source ${dir}/find-hbase-dependency.sh - fi - - source ${dir}/cached-hadoop-conf-dir.sh - if [ -z "${kylin_hadoop_conf_dir}" ]; then - echo "WARNING: Using ${dir}/cached-hadoop-conf-dir.sh failed,will be use ${dir}/find-hadoop-conf-dir.sh" - source ${dir}/find-hadoop-conf-dir.sh - fi - - source ${dir}/cached-kafka-dependency.sh - if [ -z "${kafka_dependency}" ]; then - echo "WARNING: Using ${dir}/cached-kafka-dependency.sh failed,will be use ${dir}/find-kafka-dependency.sh" - source ${dir}/find-kafka-dependency.sh - fi - - source ${dir}/cached-spark-dependency.sh - if [ -z "${spark_dependency}" ]; then - echo "WARNING: Using ${dir}/cached-spark-dependency.sh failed,will be use ${dir}/find-spark-dependency.sh" - source ${dir}/find-spark-dependency.sh - fi - - source ${dir}/cached-flink-dependency.sh - if [ -z "${flink_dependency}" ]; then - echo "WARNING: Using ${dir}/cached-flink-dependency.sh failed,will be use ${dir}/find-flink-dependency.sh" - source ${dir}/find-flink-dependency.sh - fi - else - source ${dir}/find-hive-dependency.sh - source ${dir}/find-hbase-dependency.sh - source ${dir}/find-hadoop-conf-dir.sh - source ${dir}/find-kafka-dependency.sh - source ${dir}/find-spark-dependency.sh - source ${dir}/find-flink-dependency.sh - fi + source ${dir}/find-hive-dependency.sh + source ${dir}/find-hbase-dependency.sh + source ${dir}/find-hadoop-conf-dir.sh + source ${dir}/find-kafka-dependency.sh + source ${dir}/find-spark-dependency.sh + source ${dir}/find-flink-dependency.sh #retrive $KYLIN_EXTRA_START_OPTS if [ -f "${dir}/setenv.sh" ]; then @@ -104,7 +63,7 @@ function retrieveStartCommand() { PID=`cat $KYLIN_HOME/pid` if ps -p $PID > /dev/null then - quit "Kylin is running, stop it first" + quit "Kylin is running, stop it first" fi fi @@ -220,7 +179,7 @@ function retrieveStopCommand() { sleep 1 #give kill -9 sometime to "kill" if ps -p $PID > /dev/null then - quit "Warning, even kill -9 failed, giving up! Sorry..." + quit "Warning, even kill -9 failed, giving up! Sorry..." fi fi @@ -279,12 +238,14 @@ then echo "Restarting kylin..." echo "--> Stopping kylin first if it's running..." retrieveStopCommand + if [[ $? != 0 ]] then echo "Kylin is not running, now start it" fi echo "--> Start kylin..." retrieveStartCommand + ${start_command} >> ${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid & rm -f $lockfile @@ -402,7 +363,7 @@ then sleep 1 #give kill -9 sometime to "kill" if ps -p $PID > /dev/null then - quit "Warning, even kill -9 failed, giving up! Sorry..." + quit "Warning, even kill -9 failed, giving up! Sorry..." fi fi