This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin-on-parquet-v2 in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this push: new e1f7157 KYLIN-4913 Update docker image for Kylin 4.0 Beta e1f7157 is described below commit e1f7157dd5dd52b8f6e3e76d7dc873fe9e973f67 Author: Zhichao Zhang <441586...@qq.com> AuthorDate: Wed Feb 24 19:42:09 2021 +0800 KYLIN-4913 Update docker image for Kylin 4.0 Beta --- docker/build_standalone_image.sh | 4 +- docker/dockerfile/standalone/Dockerfile | 15 +- .../standalone}/build_standalone_image.sh | 4 +- docker/dockerfile/standalone/conf/bin/kylin.sh | 504 +++++++++++++++++++++ .../standalone/conf/hadoop/capacity-scheduler.xml | 134 ++++++ .../dockerfile/standalone/conf/hive/hive-site.xml | 6 +- .../standalone/conf/kylin/kylin.properties | 377 +++++++++++++++ .../standalone/conf/spark/spark-defaults.conf | 55 +++ .../dockerfile/standalone/conf/spark/spark-env.sh | 77 ++++ docker/dockerfile/standalone/conf/zk/zoo.cfg | 45 ++ docker/setup_standalone.sh | 2 +- 11 files changed, 1211 insertions(+), 12 deletions(-) diff --git a/docker/build_standalone_image.sh b/docker/build_standalone_image.sh index 9c0b925..749ebbc 100755 --- a/docker/build_standalone_image.sh +++ b/docker/build_standalone_image.sh @@ -23,5 +23,5 @@ echo "build image in dir "${DIR} echo "start build Hadoop docker image" -docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 . -docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha . +docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4-beta . +docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-beta . diff --git a/docker/dockerfile/standalone/Dockerfile b/docker/dockerfile/standalone/Dockerfile index a168e6c..1d1ee3b 100644 --- a/docker/dockerfile/standalone/Dockerfile +++ b/docker/dockerfile/standalone/Dockerfile @@ -16,17 +16,20 @@ # # Docker image for apache kylin, based on the Hadoop image -FROM hadoop2.7-all-in-one-for-kylin4 +FROM hadoop2.7-all-in-one-for-kylin4-beta -ENV KYLIN_VERSION 4.0.0-alpha -ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2 +ENV KYLIN_VERSION 4.0.0-beta +ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin # Download Kylin -RUN wget https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz \ - && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz \ - && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz +RUN wget https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin.tar.gz \ + && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz \ + && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz RUN rm -f $KYLIN_HOME/conf/kylin.properties COPY conf/kylin/* $KYLIN_HOME/conf/ +RUN rm -f $KYLIN_HOME/bin/kylin.sh +COPY conf/bin/kylin.sh $KYLIN_HOME/bin/ +RUN chmod +x $KYLIN_HOME/bin/kylin.sh RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $KYLIN_HOME/lib/ RUN sed -i "s/hbase/java/g" $KYLIN_HOME/bin/set-java-home.sh diff --git a/docker/build_standalone_image.sh b/docker/dockerfile/standalone/build_standalone_image.sh similarity index 97% copy from docker/build_standalone_image.sh copy to docker/dockerfile/standalone/build_standalone_image.sh index 9c0b925..749ebbc 100755 --- a/docker/build_standalone_image.sh +++ b/docker/dockerfile/standalone/build_standalone_image.sh @@ -23,5 +23,5 @@ echo "build image in dir "${DIR} echo "start build Hadoop docker image" -docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 . -docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha . +docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4-beta . +docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-beta . diff --git a/docker/dockerfile/standalone/conf/bin/kylin.sh b/docker/dockerfile/standalone/conf/bin/kylin.sh new file mode 100755 index 0000000..a691cec --- /dev/null +++ b/docker/dockerfile/standalone/conf/bin/kylin.sh @@ -0,0 +1,504 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# set verbose=true to print more logs during start up + + + + +source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd -P)/../"}/bin/header.sh $@ +if [ "$verbose" = true ]; then + shift +fi + +mkdir -p ${KYLIN_HOME}/logs +mkdir -p ${KYLIN_HOME}/ext + +source ${dir}/set-java-home.sh + +function retrieveDependency() { + #retrive $hive_dependency and $hbase_dependency + if [[ -z $reload_dependency && `ls -1 ${dir}/cached-* 2>/dev/null | wc -l` -eq 6 ]] + then + echo "Using cached dependency..." + source ${dir}/cached-hive-dependency.sh + #retrive $hbase_dependency + metadataUrl=`${dir}/get-properties.sh kylin.metadata.url` + if [[ "${metadataUrl##*@}" == "hbase" ]] + then + source ${dir}/cached-hbase-dependency.sh + fi + source ${dir}/cached-hadoop-conf-dir.sh + # source ${dir}/cached-kafka-dependency.sh + source ${dir}/cached-spark-dependency.sh + # source ${dir}/cached-flink-dependency.sh + else + source ${dir}/find-hive-dependency.sh + #retrive $hbase_dependency + metadataUrl=`${dir}/get-properties.sh kylin.metadata.url` + if [[ "${metadataUrl##*@}" == "hbase" ]] + then + source ${dir}/find-hbase-dependency.sh + fi + source ${dir}/find-hadoop-conf-dir.sh + # source ${dir}/find-kafka-dependency.sh + source ${dir}/find-spark-dependency.sh + # source ${dir}/find-flink-dependency.sh + fi + + # Replace jars for different hadoop dist + bash ${dir}/replace-jars-under-spark.sh + + # get hdp_version + if [ -z "${hdp_version}" ]; then + hdp_version=`/bin/bash -x hadoop 2>&1 | sed -n "s/\(.*\)export HDP_VERSION=\(.*\)/\2/"p` + verbose "hdp_version is ${hdp_version}" + fi + + # Replace jars for HDI + KYLIN_SPARK_JARS_HOME="${KYLIN_HOME}/spark/jars" + if [[ -d "/usr/hdp/current/hdinsight-zookeeper" && $hdp_version == "2"* ]] + then + echo "The current Hadoop environment is HDI3, will replace some jars package for ${KYLIN_HOME}/spark/jars" + if [[ -f ${KYLIN_HOME}/tomcat/webapps/kylin.war ]] + then + if [[ ! -d ${KYLIN_HOME}/tomcat/webapps/kylin ]] + then + mkdir ${KYLIN_HOME}/tomcat/webapps/kylin + fi + mv ${KYLIN_HOME}/tomcat/webapps/kylin.war ${KYLIN_HOME}/tomcat/webapps/kylin + cd ${KYLIN_HOME}/tomcat/webapps/kylin + jar -xf ${KYLIN_HOME}/tomcat/webapps/kylin/kylin.war + if [[ -f ${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar ]] + then + echo "Remove ${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar to avoid version conflicts" + rm -rf ${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar + rm -rf ${KYLIN_HOME}/tomcat/webapps/kylin/kylin.war + cd ${KYLIN_HOME}/ + fi + fi + + if [[ -d "${KYLIN_SPARK_JARS_HOME}" ]] + then + if [[ -f ${KYLIN_HOME}/hdi3_spark_jars_flag ]] + then + echo "Required jars have been added to ${KYLIN_HOME}/spark/jars, skip this step." + else + rm -rf ${KYLIN_HOME}/spark/jars/hadoop-* + cp /usr/hdp/current/spark2-client/jars/hadoop-* $KYLIN_SPARK_JARS_HOME + cp /usr/hdp/current/spark2-client/jars/azure-* $KYLIN_SPARK_JARS_HOME + cp /usr/hdp/current/hadoop-client/lib/microsoft-log4j-etwappender-1.0.jar $KYLIN_SPARK_JARS_HOME + cp /usr/hdp/current/hadoop-client/lib/hadoop-lzo-0.6.0.${hdp_version}.jar $KYLIN_SPARK_JARS_HOME + + rm -rf $KYLIN_HOME/spark/jars/guava-14.0.1.jar + cp /usr/hdp/current/spark2-client/jars/guava-24.1.1-jre.jar $KYLIN_SPARK_JARS_HOME + + echo "Upload spark jars to HDFS" + hdfs dfs -test -d /spark2_jars + if [ $? -eq 1 ] + then + hdfs dfs -mkdir /spark2_jars + fi + hdfs dfs -put $KYLIN_SPARK_JARS_HOME/* /spark2_jars + + touch ${KYLIN_HOME}/hdi3_spark_jars_flag + fi + else + echo "${KYLIN_HOME}/spark/jars dose not exist. You can run ${KYLIN_HOME}/download-spark.sh to download spark." + fi + fi + + tomcat_root=${dir}/../tomcat + export tomcat_root + + # get KYLIN_REST_ADDRESS + if [ -z "$KYLIN_REST_ADDRESS" ] + then + KYLIN_REST_ADDRESS=`hostname -f`":"`grep "<Connector port=" ${tomcat_root}/conf/server.xml |grep protocol=\"HTTP/1.1\" | cut -d '=' -f 2 | cut -d \" -f 2` + export KYLIN_REST_ADDRESS + verbose "KYLIN_REST_ADDRESS is ${KYLIN_REST_ADDRESS}" + fi + # the number of Spring active profiles can be greater than 1. Additional profiles + # can be added by setting kylin.security.additional-profiles + additional_security_profiles=`bash ${dir}/get-properties.sh kylin.security.additional-profiles` + if [[ "x${additional_security_profiles}" != "x" ]]; then + spring_profile="${spring_profile},${additional_security_profiles}" + fi + + # compose hadoop_dependencies + hadoop_dependencies=${hadoop_dependencies}:`hadoop classpath` +# if [ -n "${hbase_dependency}" ]; then +# hadoop_dependencies=${hadoop_dependencies}:${hbase_dependency} +# fi + if [ -n "${hive_dependency}" ]; then + #hadoop_dependencies=${hadoop_dependencies}:${hive_dependency} + hadoop_dependencies=${hive_dependency}:${hadoop_dependencies} + fi + if [ -n "${kafka_dependency}" ]; then + hadoop_dependencies=${hadoop_dependencies}:${kafka_dependency} + fi + if [ -n "${spark_dependency}" ]; then + #hadoop_dependencies=${hadoop_dependencies}:${spark_dependency} + hadoop_dependencies=${spark_dependency}:${hadoop_dependencies} + fi + + # compose KYLIN_TOMCAT_CLASSPATH + tomcat_classpath=${tomcat_root}/bin/bootstrap.jar:${tomcat_root}/bin/tomcat-juli.jar:${tomcat_root}/lib/* + export KYLIN_TOMCAT_CLASSPATH=${tomcat_classpath}:${KYLIN_HOME}/conf:${KYLIN_HOME}/lib/*:${KYLIN_HOME}/ext/*:${hadoop_dependencies}:${flink_dependency} + + # compose KYLIN_TOOL_CLASSPATH + export KYLIN_TOOL_CLASSPATH=${KYLIN_HOME}/conf:${KYLIN_HOME}/tool/*:${KYLIN_HOME}/ext/*:${hadoop_dependencies} + + # compose kylin_common_opts + kylin_common_opts="-Dkylin.hive.dependency=${hive_dependency} \ + -Dkylin.kafka.dependency=${kafka_dependency} \ + -Dkylin.hadoop.conf.dir=${kylin_hadoop_conf_dir} \ + -Dkylin.server.host-address=${KYLIN_REST_ADDRESS} \ + -Dspring.profiles.active=${spring_profile} \ + -Dhdp.version=${hdp_version}" + + # compose KYLIN_TOMCAT_OPTS + KYLIN_TOMCAT_OPTS="-Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-server-log4j.properties \ + -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager \ + -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true \ + -Dorg.apache.catalina.connector.CoyoteAdapter.ALLOW_BACKSLASH=true \ + -Djava.endorsed.dirs=${tomcat_root}/endorsed \ + -Dcatalina.base=${tomcat_root} \ + -Dcatalina.home=${tomcat_root} \ + -Djava.io.tmpdir=${tomcat_root}/temp ${kylin_common_opts}" + export KYLIN_TOMCAT_OPTS + + # compose KYLIN_TOOL_OPTS + KYLIN_TOOL_OPTS="-Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-tools-log4j.properties ${kylin_common_opts}" + export KYLIN_TOOL_OPTS +} + +function checkBasicKylinProps() { + spring_profile=`${dir}/get-properties.sh kylin.security.profile` + if [ -z "$spring_profile" ] + then + quit 'Please set kylin.security.profile in kylin.properties, options are: testing, ldap, saml.' + else + verbose "kylin.security.profile is $spring_profile" + fi +} + +function prepareFairScheduler() { + cat > ${KYLIN_HOME}/conf/fairscheduler.xml <<EOL +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<allocations> + <pool name="query_pushdown"> + <schedulingMode>FAIR</schedulingMode> + <weight>1</weight> + <minShare>1</minShare> + </pool> + <pool name="heavy_tasks"> + <schedulingMode>FAIR</schedulingMode> + <weight>5</weight> + <minShare>1</minShare> + </pool> + <pool name="lightweight_tasks"> + <schedulingMode>FAIR</schedulingMode> + <weight>10</weight> + <minShare>1</minShare> + </pool> + <pool name="vip_tasks"> + <schedulingMode>FAIR</schedulingMode> + <weight>15</weight> + <minShare>1</minShare> + </pool> +</allocations> +EOL +} + +function checkRestPort() { + kylin_rest_address_arr=(${KYLIN_REST_ADDRESS//:/ }) + inuse=`netstat -tlpn | grep "\b${kylin_rest_address_arr[1]}\b"` + [[ -z ${inuse} ]] || quit "Port ${kylin_rest_address_arr[1]} is not available. Another kylin server is running?" +} + + +function classpathDebug() { + if [ "${KYLIN_CLASSPATH_DEBUG}" != "" ]; then + echo "Finding ${KYLIN_CLASSPATH_DEBUG} on classpath" $@ + $JAVA -classpath $@ org.apache.kylin.common.util.ClasspathScanner ${KYLIN_CLASSPATH_DEBUG} + fi +} + +function runTool() { + + retrieveDependency + + # get KYLIN_EXTRA_START_OPTS + if [ -f "${KYLIN_HOME}/conf/setenv-tool.sh" ]; then + source ${KYLIN_HOME}/conf/setenv-tool.sh + fi + + verbose "java opts for tool is ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOOL_OPTS}" + verbose "java classpath for tool is ${KYLIN_TOOL_CLASSPATH}" + classpathDebug ${KYLIN_TOOL_CLASSPATH} + + exec $JAVA ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOOL_OPTS} -classpath ${KYLIN_TOOL_CLASSPATH} "$@" +} + +if [ "$2" == "--reload-dependency" ] +then + reload_dependency=1 +fi + +# start command +if [ "$1" == "start" ] +then + if [ -f "${KYLIN_HOME}/pid" ] + then + PID=`cat $KYLIN_HOME/pid` + if ps -p $PID > /dev/null + then + quit "Kylin is running, stop it first" + fi + fi + + checkBasicKylinProps + + source ${dir}/check-env.sh + + retrieveDependency + + checkRestPort + + prepareFairScheduler + + ${KYLIN_HOME}/bin/check-migration-acl.sh || { exit 1; } + + # get KYLIN_EXTRA_START_OPTS + if [ -f "${KYLIN_HOME}/conf/setenv.sh" ]; then + source ${KYLIN_HOME}/conf/setenv.sh + fi + + security_ldap_truststore=`bash ${dir}/get-properties.sh kylin.security.ldap.connection-truststore` + if [ -f "${security_ldap_truststore}" ]; then + KYLIN_EXTRA_START_OPTS="$KYLIN_EXTRA_START_OPTS -Djavax.net.ssl.trustStore=$security_ldap_truststore" + fi + + verbose "java opts is ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOMCAT_OPTS}" + verbose "java classpath is ${KYLIN_TOMCAT_CLASSPATH}" + classpathDebug ${KYLIN_TOMCAT_CLASSPATH} + $JAVA ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOMCAT_OPTS} -classpath ${KYLIN_TOMCAT_CLASSPATH} org.apache.catalina.startup.Bootstrap start >> ${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid & + + echo "" + echo "A new Kylin instance is started by $USER. To stop it, run 'kylin.sh stop'" + echo "Check the log at ${KYLIN_HOME}/logs/kylin.log" + echo "Web UI is at http://${KYLIN_REST_ADDRESS}/kylin" + exit 0 + +# run command +elif [ "$1" == "run" ] +then + retrieveStartCommand + ${start_command} + +# stop command +elif [ "$1" == "stop" ] +then + if [ -f "${KYLIN_HOME}/pid" ] + then + PID=`cat $KYLIN_HOME/pid` + WAIT_TIME=2 + LOOP_COUNTER=10 + if ps -p $PID > /dev/null + then + echo "Stopping Kylin: $PID" + kill $PID + + for ((i=0; i<$LOOP_COUNTER; i++)) + do + # wait to process stopped + sleep $WAIT_TIME + if ps -p $PID > /dev/null ; then + echo "Stopping in progress. Will check after $WAIT_TIME secs again..." + continue; + else + break; + fi + done + + # if process is still around, use kill -9 + if ps -p $PID > /dev/null + then + echo "Initial kill failed, getting serious now..." + kill -9 $PID + sleep 1 #give kill -9 sometime to "kill" + if ps -p $PID > /dev/null + then + quit "Warning, even kill -9 failed, giving up! Sorry..." + fi + fi + + # process is killed , remove pid file + rm -rf ${KYLIN_HOME}/pid + echo "Kylin with pid ${PID} has been stopped." + exit 0 + else + quit "Kylin with pid ${PID} is not running" + fi + else + quit "Kylin is not running" + fi + +# streaming command +elif [ "$1" == "streaming" ] +then + if [ $# -lt 2 ] + then + echo "Invalid input args $@" + exit -1 + fi + if [ "$2" == "start" ] + then + if [ -f "${KYLIN_HOME}/streaming_receiver_pid" ] + then + PID=`cat $KYLIN_HOME/streaming_receiver_pid` + if ps -p $PID > /dev/null + then + echo "Kylin streaming receiver is running, stop it first" + exit 1 + fi + fi + #retrive $hbase_dependency + metadataUrl=`${dir}/get-properties.sh kylin.metadata.url` + if [[ "${metadataUrl##*@}" == "hbase" ]] + then + source ${dir}/find-hbase-dependency.sh + fi + #retrive $KYLIN_EXTRA_START_OPTS + if [ -f "${KYLIN_HOME}/conf/setenv.sh" ] + then source ${KYLIN_HOME}/conf/setenv.sh + fi + + mkdir -p ${KYLIN_HOME}/ext + HBASE_CLASSPATH=`hbase classpath` + #echo "hbase class path:"$HBASE_CLASSPATH + STREAM_CLASSPATH=${KYLIN_HOME}/lib/streaming/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH} + + # KYLIN_EXTRA_START_OPTS is for customized settings, checkout bin/setenv.sh + $JAVA -cp $STREAM_CLASSPATH ${KYLIN_EXTRA_START_OPTS} \ + -Dlog4j.configuration=stream-receiver-log4j.properties\ + -DKYLIN_HOME=${KYLIN_HOME}\ + -Dkylin.hbase.dependency=${hbase_dependency} \ + org.apache.kylin.stream.server.StreamingReceiver $@ > ${KYLIN_HOME}/logs/streaming_receiver.out 2>&1 & echo $! > ${KYLIN_HOME}/streaming_receiver_pid & + exit 0 + elif [ "$2" == "stop" ] + then + if [ ! -f "${KYLIN_HOME}/streaming_receiver_pid" ] + then + echo "Streaming receiver is not running, please check" + exit 1 + fi + PID=`cat ${KYLIN_HOME}/streaming_receiver_pid` + if [ "$PID" = "" ] + then + echo "Streaming receiver is not running, please check" + exit 1 + else + echo "Stopping streaming receiver: $PID" + WAIT_TIME=2 + LOOP_COUNTER=20 + if ps -p $PID > /dev/null + then + kill $PID + + for ((i=0; i<$LOOP_COUNTER; i++)) + do + # wait to process stopped + sleep $WAIT_TIME + if ps -p $PID > /dev/null ; then + echo "Stopping in progress. Will check after $WAIT_TIME secs again..." + continue; + else + break; + fi + done + + # if process is still around, use kill -9 + if ps -p $PID > /dev/null + then + echo "Initial kill failed, getting serious now..." + kill -9 $PID + sleep 1 #give kill -9 sometime to "kill" + if ps -p $PID > /dev/null + then + quit "Warning, even kill -9 failed, giving up! Sorry..." + fi + fi + + # process is killed , remove pid file + rm -rf ${KYLIN_HOME}/streaming_receiver_pid + echo "Kylin streaming receiver with pid ${PID} has been stopped." + exit 0 + else + quit "Kylin streaming receiver with pid ${PID} is not running" + fi + fi + elif [[ "$2" = org.apache.kylin.* ]] + then + source ${KYLIN_HOME}/conf/setenv.sh + HBASE_CLASSPATH=`hbase classpath` + #echo "hbase class path:"$HBASE_CLASSPATH + STREAM_CLASSPATH=${KYLIN_HOME}/lib/streaming/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH} + + shift + # KYLIN_EXTRA_START_OPTS is for customized settings, checkout bin/setenv.sh + $JAVA -cp $STREAM_CLASSPATH ${KYLIN_EXTRA_START_OPTS} \ + -Dlog4j.configuration=stream-receiver-log4j.properties\ + -DKYLIN_HOME=${KYLIN_HOME}\ + -Dkylin.hbase.dependency=${hbase_dependency} \ + "$@" + exit 0 + fi + +elif [ "$1" = "version" ] +then + runTool org.apache.kylin.common.KylinVersion + +elif [ "$1" = "diag" ] +then + echo "'kylin.sh diag' no longer supported, use diag.sh instead" + exit 0 + +# tool command +elif [[ "$1" = org.apache.kylin.* ]] +then + runTool "$@" +else + quit "Usage: 'kylin.sh [-v] start' or 'kylin.sh [-v] stop'" +fi diff --git a/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml b/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml new file mode 100644 index 0000000..8f016e2 --- /dev/null +++ b/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml @@ -0,0 +1,134 @@ +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> +<configuration> + + <property> + <name>yarn.scheduler.capacity.maximum-applications</name> + <value>4</value> + <description> + Maximum number of applications that can be pending and running. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.maximum-am-resource-percent</name> + <value>0.5</value> + <description> + Maximum percent of resources in the cluster which can be used to run + application masters i.e. controls number of concurrent running + applications. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.resource-calculator</name> + <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value> + <description> + The ResourceCalculator implementation to be used to compare + Resources in the scheduler. + The default i.e. DefaultResourceCalculator only uses Memory while + DominantResourceCalculator uses dominant-resource to compare + multi-dimensional resources such as Memory, CPU etc. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.queues</name> + <value>default</value> + <description> + The queues at the this level (root is the root queue). + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.capacity</name> + <value>100</value> + <description>Default queue target capacity.</description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.user-limit-factor</name> + <value>1</value> + <description> + Default queue user limit a percentage from 0.0 to 1.0. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.maximum-capacity</name> + <value>100</value> + <description> + The maximum capacity of the default queue. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.state</name> + <value>RUNNING</value> + <description> + The state of the default queue. State can be one of RUNNING or STOPPED. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name> + <value>*</value> + <description> + The ACL of who can submit jobs to the default queue. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name> + <value>*</value> + <description> + The ACL of who can administer jobs on the default queue. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.node-locality-delay</name> + <value>40</value> + <description> + Number of missed scheduling opportunities after which the CapacityScheduler + attempts to schedule rack-local containers. + Typically this should be set to number of nodes in the cluster, By default is setting + approximately number of nodes in one rack which is 40. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.queue-mappings</name> + <value></value> + <description> + A list of mappings that will be used to assign jobs to queues + The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]* + Typically this list will be used to map users to queues, + for example, u:%user:%user maps all users to queues with the same name + as the user. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.queue-mappings-override.enable</name> + <value>false</value> + <description> + If a queue mapping is present, will it override the value specified + by the user? This can be used by administrators to place jobs in queues + that are different than the one specified by the user. + The default is false. + </description> + </property> + +</configuration> diff --git a/docker/dockerfile/standalone/conf/hive/hive-site.xml b/docker/dockerfile/standalone/conf/hive/hive-site.xml index fc51985..589e40f 100644 --- a/docker/dockerfile/standalone/conf/hive/hive-site.xml +++ b/docker/dockerfile/standalone/conf/hive/hive-site.xml @@ -37,4 +37,8 @@ <value>123456</value> <description>password to use against metastore database</description> </property> -</configuration> \ No newline at end of file + <property> + <name>hive.metastore.schema.verification</name> + <value>false</value> + </property> +</configuration> diff --git a/docker/dockerfile/standalone/conf/kylin/kylin.properties b/docker/dockerfile/standalone/conf/kylin/kylin.properties new file mode 100644 index 0000000..280b846 --- /dev/null +++ b/docker/dockerfile/standalone/conf/kylin/kylin.properties @@ -0,0 +1,377 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + + + +# The below commented values will effect as default settings +# Uncomment and override them if necessary + + + +# +#### METADATA | ENV ### +# +## The metadata store has two implementations(RDBMS/HBase), while RDBMS is recommended in Kylin 4.X +## Please refer to https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0 if you prefer HBase +#kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10 +# +## metadata cache sync retry times +#kylin.metadata.sync-retries=3 +# +## Working folder in HDFS, better be qualified absolute path, make sure user has the right permission to this directory +#kylin.env.hdfs-working-dir=/kylin +# +## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no difference in terms of functions. +#kylin.env=QA +# +## kylin zk base path +#kylin.env.zookeeper-base-path=/kylin +# +## Run a TestingServer for curator locally +#kylin.env.zookeeper-is-local=false +# +## Connect to a remote zookeeper with the url, should set kylin.env.zookeeper-is-local to false +#kylin.env.zookeeper-connect-string=sandbox.hortonworks.com +# +#### SERVER | WEB | RESTCLIENT ### +# +## Kylin server mode, valid value [all, query, job] +#kylin.server.mode=all +# +### Kylin server port +#server.port=7070 +# +## List of web servers in use, this enables one web server instance to sync up with other servers. +#kylin.server.cluster-servers=localhost:7070 +# +## Display timezone on UI,format like[GMT+N or GMT-N] +#kylin.web.timezone= +# +## Timeout value for the queries submitted through the Web UI, in milliseconds +#kylin.web.query-timeout=300000 +# +#kylin.web.cross-domain-enabled=true +# +##allow user to export query result +#kylin.web.export-allow-admin=true +#kylin.web.export-allow-other=true +# +## Hide measures in measure list of cube designer, separate by comma +#kylin.web.hide-measures=RAW +# +##max connections of one route +#kylin.restclient.connection.default-max-per-route=20 +# +##max connections of one rest-client +#kylin.restclient.connection.max-total=200 +# +#### PUBLIC CONFIG ### +#kylin.engine.default=6 +#kylin.storage.default=4 +#kylin.web.hive-limit=20 +#kylin.web.help.length=4 +#kylin.web.help.0=start|Getting Started|http://kylin.apache.org/docs/tutorial/kylin_sample.html +#kylin.web.help.1=odbc|ODBC Driver|http://kylin.apache.org/docs/tutorial/odbc.html +#kylin.web.help.2=tableau|Tableau Guide|http://kylin.apache.org/docs/tutorial/tableau_91.html +#kylin.web.help.3=onboard|Cube Design Tutorial|http://kylin.apache.org/docs/howto/howto_optimize_cubes.html +#kylin.web.link-streaming-guide=http://kylin.apache.org/ +#kylin.htrace.show-gui-trace-toggle=false +#kylin.web.link-hadoop= +#kylin.web.link-diagnostic= +#kylin.web.contact-mail= +#kylin.server.external-acl-provider= +# +## Default time filter for job list, 0->current day, 1->last one day, 2->last one week, 3->last one year, 4->all +#kylin.web.default-time-filter=1 +# +#### SOURCE ### +## Define how to access to hive metadata +## When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead +#kylin.source.hive.metadata-type=hcatalog +# +## Hive client, valid value [cli, beeline] +#kylin.source.hive.client=cli +# +## Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH +#kylin.source.hive.beeline-shell=beeline +# +## Hive database name for putting the intermediate flat tables +#kylin.source.hive.database-for-flat-table=default +# +#### STORAGE ### +# +## The storage for final cube file in hbase +#kylin.storage.url=hbase +# +## clean real storage after delete operation +## if you want to delete the real storage like htable of deleting segment, you can set it to true +#kylin.storage.clean-after-delete-operation=false +# +#### JOB ### +# +## Max job retry on error, default 0: no retry +#kylin.job.retry=0 +# +## Max count of concurrent jobs running +#kylin.job.max-concurrent-jobs=10 +# +## The percentage of the sampling, default 100% +#kylin.job.sampling-percentage=100 +# +## If true, will send email notification on job complete +##kylin.job.notification-enabled=true +##kylin.job.notification-mail-enable-starttls=true +##kylin.job.notification-mail-host=smtp.office365.com +##kylin.job.notification-mail-port=587 +##kylin.job.notification-mail-username=ky...@example.com +##kylin.job.notification-mail-password=mypassword +##kylin.job.notification-mail-sender=ky...@example.com +#kylin.job.scheduler.provider.100=org.apache.kylin.job.impl.curator.CuratorScheduler +#kylin.job.scheduler.default=0 +# +#### CUBE | DICTIONARY ### +# +#kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler +#kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor +# +## 'auto', 'inmem', 'layer' or 'random' for testing +#kylin.cube.algorithm=layer +# +## A smaller threshold prefers layer, a larger threshold prefers in-mem +#kylin.cube.algorithm.layer-or-inmem-threshold=7 +# +## auto use inmem algorithm: +## 1, cube planner optimize job +## 2, no source record +#kylin.cube.algorithm.inmem-auto-optimize=true +# +#kylin.cube.aggrgroup.max-combination=32768 +# +#kylin.cube.cubeplanner.enabled=false +#kylin.cube.cubeplanner.enabled-for-existing-cube=false +#kylin.cube.cubeplanner.expansion-threshold=15.0 +#kylin.cube.cubeplanner.recommend-cache-max-size=200 +#kylin.cube.cubeplanner.mandatory-rollup-threshold=1000 +#kylin.cube.cubeplanner.algorithm-threshold-greedy=8 +#kylin.cube.cubeplanner.algorithm-threshold-genetic=23 +# +#### QUERY ### +# +## Controls the maximum number of bytes a query is allowed to scan storage. +## The default value 0 means no limit. +## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per coprocessor. +#kylin.query.max-scan-bytes=0 +# +#kylin.query.cache-enabled=true +#kylin.query.cache-threshold-scan-count=10240 +#kylin.query.cache-threshold-duration=2000 +#kylin.query.cache-threshold-scan-bytes=1048576 +#kylin.query.large-query-threshold=1000000 +# +## Controls extras properties for Calcite jdbc driver +## all extras properties should undder prefix "kylin.query.calcite.extras-props." +## case sensitive, default: true, to enable case insensitive set it to false +## @see org.apache.calcite.config.CalciteConnectionProperty.CASE_SENSITIVE +#kylin.query.calcite.extras-props.caseSensitive=true +## how to handle unquoted identity, defualt: TO_UPPER, available options: UNCHANGED, TO_UPPER, TO_LOWER +## @see org.apache.calcite.config.CalciteConnectionProperty.UNQUOTED_CASING +#kylin.query.calcite.extras-props.unquotedCasing=TO_UPPER +## quoting method, default: DOUBLE_QUOTE, available options: DOUBLE_QUOTE, BACK_TICK, BRACKET +## @see org.apache.calcite.config.CalciteConnectionProperty.QUOTING +#kylin.query.calcite.extras-props.quoting=DOUBLE_QUOTE +## change SqlConformance from DEFAULT to LENIENT to enable group by ordinal +## @see org.apache.calcite.sql.validate.SqlConformance.SqlConformanceEnum +#kylin.query.calcite.extras-props.conformance=LENIENT +# +## TABLE ACL +#kylin.query.security.table-acl-enabled=true +# +## Usually should not modify this +#kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor +# +#kylin.query.escape-default-keyword=false +# +## Usually should not modify this +#kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack +# +#### SECURITY ### +# +## Spring security profile, options: testing, ldap, saml +## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN to login +#kylin.security.profile=testing +# +## Admin roles in LDAP, for ldap and saml +#kylin.security.acl.admin-role=admin +# +## LDAP authentication configuration +#kylin.security.ldap.connection-server=ldap://ldap_server:389 +#kylin.security.ldap.connection-username= +#kylin.security.ldap.connection-password= +## When you use the customized CA certificate library for user authentication based on LDAPs, you need to configure this item. +## The value of this item will be added to the JVM parameter javax.net.ssl.trustStore. +#kylin.security.ldap.connection-truststore= +# +## LDAP user account directory; +#kylin.security.ldap.user-search-base= +#kylin.security.ldap.user-search-pattern= +#kylin.security.ldap.user-group-search-base= +#kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1})) +# +## LDAP service account directory +#kylin.security.ldap.service-search-base= +#kylin.security.ldap.service-search-pattern= +#kylin.security.ldap.service-group-search-base= +# +### SAML configurations for SSO +## SAML IDP metadata file location +#kylin.security.saml.metadata-file=classpath:sso_metadata.xml +#kylin.security.saml.metadata-entity-base-url=https://hostname/kylin +#kylin.security.saml.keystore-file=classpath:samlKeystore.jks +#kylin.security.saml.context-scheme=https +#kylin.security.saml.context-server-name=hostname +#kylin.security.saml.context-server-port=443 +#kylin.security.saml.context-path=/kylin +# +#### SPARK BUILD ENGINE CONFIGS ### +# +## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit +## This must contain site xmls of core, yarn, hive, and hbase in one folder +##kylin.env.hadoop-conf-dir=/etc/hadoop/conf +# +## Spark conf (default is in spark/conf/spark-defaults.conf) +#kylin.engine.spark-conf.spark.master=yarn +##kylin.engine.spark-conf.spark.submit.deployMode=client +#kylin.engine.spark-conf.spark.yarn.queue=default +##kylin.engine.spark-conf.spark.executor.cores=1 +##kylin.engine.spark-conf.spark.executor.memory=4G +##kylin.engine.spark-conf.spark.executor.instances=1 +##kylin.engine.spark-conf.spark.executor.memoryOverhead=1024M +#kylin.engine.spark-conf.spark.driver.cores=1 +#kylin.engine.spark-conf.spark.driver.memory=1G +#kylin.engine.spark-conf.spark.shuffle.service.enabled=true +#kylin.engine.spark-conf.spark.eventLog.enabled=true +#kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history +#kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history +#kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false +#kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dfile.encoding=UTF-8 -Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug -Dkylin.hdfs.working.dir=${hdfs.working.dir} -Dkylin.metadata.identifier=${kylin.metadata.url.identifier} -Dkylin.spark.category=job -Dkylin.spark.project=${job.project} -Dkylin.spark.identifier=${job.id} -Dkylin.spark.jobName=${job.stepId} -Duser.timezone=${user.timezone} +##kylin.engine.spark-conf.spark.sql.shuffle.partitions=1 +# +## manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime +##kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* +##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec +# +## uncomment for HDP +##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current +##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current +# +#### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ### +## Enlarge cores and memory to improve query performance in production env, please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X +# +#kylin.query.spark-conf.spark.master=yarn +##kylin.query.spark-conf.spark.submit.deployMode=client +#kylin.query.spark-conf.spark.driver.cores=1 +#kylin.query.spark-conf.spark.driver.memory=4G +#kylin.query.spark-conf.spark.driver.memoryOverhead=1G +#kylin.query.spark-conf.spark.executor.cores=1 +#kylin.query.spark-conf.spark.executor.instances=1 +#kylin.query.spark-conf.spark.executor.memory=4G +#kylin.query.spark-conf.spark.executor.memoryOverhead=1G +#kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer +##kylin.query.spark-conf.spark.sql.shuffle.partitions=40 +##kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* +# +#kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug -Dkylin.hdfs.working.dir=${kylin.env.hdfs-working-dir} -Dkylin.metadata.identifier=${kylin.metadata.url.identifier} -Dkylin.spark.category=sparder -Dkylin.spark.project=${job.project} +## uncomment for HDP +##kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current +##kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current +# +#### QUERY PUSH DOWN ### +# +##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl +##kylin.query.pushdown.update-enabled=false + +kylin.env=QA +kylin.server.mode=all +kylin.server.host-address=127.0.0.1:7070 +server.port=7070 +# Display timezone on UI,format like[GMT+N or GMT-N] +kylin.web.timezone=GMT+8 + +kylin.source.hive.client=cli +kylin.source.hive.database-for-flat-table=kylin4 + +kylin.engine.spark-conf.spark.eventLog.enabled=true +kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history +kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history +kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false + +kylin.engine.spark-conf.spark.yarn.submit.file.replication=1 +kylin.engine.spark-conf.spark.master=yarn +kylin.engine.spark-conf.spark.driver.memory=512M +kylin.engine.spark-conf.spark.driver.memoryOverhead=512M +kylin.engine.spark-conf.spark.executor.memory=1G +kylin.engine.spark-conf.spark.executor.instances=1 +kylin.engine.spark-conf.spark.executor.memoryOverhead=512M +kylin.engine.spark-conf.spark.executor.cores=1 +kylin.engine.spark-conf.spark.sql.shuffle.partitions=1 +kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* + +kylin.storage.columnar.shard-rowcount=2500000 +kylin.storage.columnar.shard-countdistinct-rowcount=1000000 +kylin.storage.columnar.repartition-threshold-size-mb=128 +kylin.storage.columnar.shard-size-mb=128 + +kylin.query.auto-sparder-context=true +kylin.query.sparder-context.app-name=sparder_on_docker +kylin.query.spark-conf.spark.master=yarn +kylin.query.spark-conf.spark.driver.memory=512M +kylin.query.spark-conf.spark.driver.memoryOverhead=512M +kylin.query.spark-conf.spark.executor.memory=1G +kylin.query.spark-conf.spark.executor.instances=1 +kylin.query.spark-conf.spark.executor.memoryOverhead=512M +kylin.query.spark-conf.spark.executor.cores=1 +kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer +kylin.query.spark-conf.spark.sql.shuffle.partitions=1 +kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* +kylin.query.spark-conf.spark.eventLog.enabled=true +kylin.query.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history +kylin.query.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history + +# for local cache +kylin.query.cache-enabled=false + +# for pushdown query +kylin.query.pushdown.update-enabled=false +kylin.query.pushdown.enabled=true +kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl + +# for Cube Planner +kylin.cube.cubeplanner.enabled=true +kylin.server.query-metrics2-enabled=false +kylin.metrics.reporter-query-enabled=false +kylin.metrics.reporter-job-enabled=false +kylin.metrics.monitor-enabled=false +kylin.web.dashboard-enabled=false + +# metadata for mysql +kylin.metadata.url=kylin4@jdbc,url=jdbc:mysql://localhost:3306/kylin4,username=root,password=123456,maxActive=10,maxIdle=10 +kylin.env.hdfs-working-dir=/kylin4_metadata +kylin.env.zookeeper-base-path=/kylin4 +kylin.env.zookeeper-connect-string=127.0.0.1 + +kylin.storage.clean-after-delete-operation=true diff --git a/docker/dockerfile/standalone/conf/spark/spark-defaults.conf b/docker/dockerfile/standalone/conf/spark/spark-defaults.conf new file mode 100644 index 0000000..dac2e3c --- /dev/null +++ b/docker/dockerfile/standalone/conf/spark/spark-defaults.conf @@ -0,0 +1,55 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default system properties included when running spark-submit. +# This is useful for setting default environmental settings. + +# Example: +# spark.master spark://master:7077 +# spark.eventLog.enabled true +# spark.eventLog.dir hdfs://namenode:8021/directory +# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.driver.memory 5g +# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" + +spark.sql.catalogImplementation hive +spark.driver.maxResultSize 1g +spark.sql.hive.thriftServer.singleSession false + +spark.serializer org.apache.spark.serializer.JavaSerializer + +spark.memory.useLegacyMode false +spark.memory.fraction 0.3 +spark.memory.storageFraction 0.3 + +spark.rdd.compress true +spark.io.compression.codec snappy + +spark.locality.wait 100ms +spark.speculation false + +spark.task.maxFailures 4 + +spark.scheduler.minRegisteredResourcesRatio 1.0 +spark.scheduler.maxRegisteredResourcesWaitingTime 60s + +spark.yarn.jars hdfs://localhost:9000/spark2_jars/* + + + + + diff --git a/docker/dockerfile/standalone/conf/spark/spark-env.sh b/docker/dockerfile/standalone/conf/spark/spark-env.sh new file mode 100755 index 0000000..3c5837d --- /dev/null +++ b/docker/dockerfile/standalone/conf/spark/spark-env.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is sourced when running various Spark programs. +# Copy it as spark-env.sh and edit that to configure Spark for your site. + +# Options read when launching programs locally with +# ./bin/run-example or ./bin/spark-submit +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program + +# Options read by executors and drivers running inside the cluster +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program +# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data +# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos + +# Options read in YARN client/cluster mode +# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf) +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN +# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). +# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) +# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) + +# Options for the daemons used in the standalone deploy mode +# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname +# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master +# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") +# - SPARK_WORKER_CORES, to set the number of cores to use on this machine +# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) +# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker +# - SPARK_WORKER_DIR, to set the working directory of worker processes +# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") +# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). +# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") +# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") +# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") +# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons +# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers + +# Generic options for the daemons used in the standalone deploy mode +# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) +# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) +# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) +# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) +# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) +# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. +# Options for native BLAS, like Intel MKL, OpenBLAS, and so on. +# You might get better performance to enable these options if using native BLAS (see SPARK-21305). +# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL +# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS + +export JAVA_HOME=/home/admin/jdk1.8.0_141 +export CLASSPATH=.:$JAVA_HOME/lib +export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native + +export SPARK_PID_DIR=${SPARK_HOME}/ + diff --git a/docker/dockerfile/standalone/conf/zk/zoo.cfg b/docker/dockerfile/standalone/conf/zk/zoo.cfg new file mode 100644 index 0000000..1a576de --- /dev/null +++ b/docker/dockerfile/standalone/conf/zk/zoo.cfg @@ -0,0 +1,45 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# The number of milliseconds of each tick +tickTime=2000 +# The number of ticks that the initial +# synchronization phase can take +initLimit=10 +# The number of ticks that can pass between +# sending a request and getting an acknowledgement +syncLimit=5 +# the directory where the snapshot is stored. +# do not use /tmp for storage, /tmp here is just +# example sakes. +dataDir=/data/zookeeper +# the port at which the clients will connect +clientPort=2181 +# the maximum number of client connections. +# increase this if you need to handle more clients +#maxClientCnxns=60 +# +# Be sure to read the maintenance section of the +# administrator guide before turning on autopurge. +# +# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance +# +# The number of snapshots to retain in dataDir +#autopurge.snapRetainCount=3 +# Purge task interval in hours +# Set to "0" to disable auto purge feature +#autopurge.purgeInterval=1 diff --git a/docker/setup_standalone.sh b/docker/setup_standalone.sh index 3ed32ce..a6289af 100755 --- a/docker/setup_standalone.sh +++ b/docker/setup_standalone.sh @@ -23,4 +23,4 @@ docker run -d \ -p 8032:8032 \ -p 8042:8042 \ -p 2181:2181 \ -apachekylin/apache-kylin-standalone:4.0.0-alpha +apachekylin/apache-kylin-standalone:4.0.0-beta