[kylin] branch kylin-on-parquet-v2 updated: KYLIN-4913 Update docker image for Kylin 4.0 Beta

xxyu Thu, 25 Feb 2021 04:26:36 -0800

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git



The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this 
push:
     new e1f7157  KYLIN-4913 Update docker image for Kylin 4.0 Beta
e1f7157 is described below

commit e1f7157dd5dd52b8f6e3e76d7dc873fe9e973f67
Author: Zhichao Zhang <441586...@qq.com>
AuthorDate: Wed Feb 24 19:42:09 2021 +0800

    KYLIN-4913 Update docker image for Kylin 4.0 Beta
---
 docker/build_standalone_image.sh                   |   4 +-
 docker/dockerfile/standalone/Dockerfile            |  15 +-
 .../standalone}/build_standalone_image.sh          |   4 +-
 docker/dockerfile/standalone/conf/bin/kylin.sh     | 504 +++++++++++++++++++++
 .../standalone/conf/hadoop/capacity-scheduler.xml  | 134 ++++++
 .../dockerfile/standalone/conf/hive/hive-site.xml  |   6 +-
 .../standalone/conf/kylin/kylin.properties         | 377 +++++++++++++++
 .../standalone/conf/spark/spark-defaults.conf      |  55 +++
 .../dockerfile/standalone/conf/spark/spark-env.sh  |  77 ++++
 docker/dockerfile/standalone/conf/zk/zoo.cfg       |  45 ++
 docker/setup_standalone.sh                         |   2 +-
 11 files changed, 1211 insertions(+), 12 deletions(-)

diff --git a/docker/build_standalone_image.sh b/docker/build_standalone_image.sh
index 9c0b925..749ebbc 100755
--- a/docker/build_standalone_image.sh
+++ b/docker/build_standalone_image.sh
@@ -23,5 +23,5 @@ echo "build image in dir "${DIR}
 
 
 echo "start build Hadoop docker image"
-docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 .
-docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha .
+docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4-beta .
+docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-beta .
diff --git a/docker/dockerfile/standalone/Dockerfile 
b/docker/dockerfile/standalone/Dockerfile
index a168e6c..1d1ee3b 100644
--- a/docker/dockerfile/standalone/Dockerfile
+++ b/docker/dockerfile/standalone/Dockerfile
@@ -16,17 +16,20 @@
 #
 
 # Docker image for apache kylin, based on the Hadoop image
-FROM hadoop2.7-all-in-one-for-kylin4
+FROM hadoop2.7-all-in-one-for-kylin4-beta
 
-ENV KYLIN_VERSION 4.0.0-alpha
-ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2
+ENV KYLIN_VERSION 4.0.0-beta
+ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin
 
 # Download Kylin
-RUN wget 
https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz
 \
-    && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz \
-    && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz
+RUN wget 
https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin.tar.gz
 \
+    && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz \
+    && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz
 RUN rm -f $KYLIN_HOME/conf/kylin.properties
 COPY conf/kylin/* $KYLIN_HOME/conf/
+RUN rm -f $KYLIN_HOME/bin/kylin.sh
+COPY conf/bin/kylin.sh $KYLIN_HOME/bin/
+RUN chmod +x $KYLIN_HOME/bin/kylin.sh
 RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $KYLIN_HOME/lib/
 RUN sed -i "s/hbase/java/g" $KYLIN_HOME/bin/set-java-home.sh
 
diff --git a/docker/build_standalone_image.sh 
b/docker/dockerfile/standalone/build_standalone_image.sh
similarity index 97%
copy from docker/build_standalone_image.sh
copy to docker/dockerfile/standalone/build_standalone_image.sh
index 9c0b925..749ebbc 100755
--- a/docker/build_standalone_image.sh
+++ b/docker/dockerfile/standalone/build_standalone_image.sh
@@ -23,5 +23,5 @@ echo "build image in dir "${DIR}
 
 
 echo "start build Hadoop docker image"
-docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 .
-docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha .
+docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4-beta .
+docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-beta .
diff --git a/docker/dockerfile/standalone/conf/bin/kylin.sh 
b/docker/dockerfile/standalone/conf/bin/kylin.sh
new file mode 100755
index 0000000..a691cec
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/bin/kylin.sh
@@ -0,0 +1,504 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# set verbose=true to print more logs during start up
+
+
+
+
+source ${KYLIN_HOME:-"$(cd -P -- "$(dirname -- "$0")" && pwd 
-P)/../"}/bin/header.sh $@
+if [ "$verbose" = true ]; then
+    shift
+fi
+
+mkdir -p ${KYLIN_HOME}/logs
+mkdir -p ${KYLIN_HOME}/ext
+
+source ${dir}/set-java-home.sh
+
+function retrieveDependency() {
+    #retrive $hive_dependency and $hbase_dependency
+    if [[ -z $reload_dependency && `ls -1 ${dir}/cached-* 2>/dev/null | wc -l` 
-eq 6 ]]
+    then
+        echo "Using cached dependency..."
+        source ${dir}/cached-hive-dependency.sh
+        #retrive $hbase_dependency
+        metadataUrl=`${dir}/get-properties.sh kylin.metadata.url`
+        if [[ "${metadataUrl##*@}" == "hbase" ]]
+        then
+            source ${dir}/cached-hbase-dependency.sh
+        fi
+        source ${dir}/cached-hadoop-conf-dir.sh
+        # source ${dir}/cached-kafka-dependency.sh
+        source ${dir}/cached-spark-dependency.sh
+        # source ${dir}/cached-flink-dependency.sh
+    else
+        source ${dir}/find-hive-dependency.sh
+        #retrive $hbase_dependency
+        metadataUrl=`${dir}/get-properties.sh kylin.metadata.url`
+        if [[ "${metadataUrl##*@}" == "hbase" ]]
+        then
+            source ${dir}/find-hbase-dependency.sh
+        fi
+        source ${dir}/find-hadoop-conf-dir.sh
+        # source ${dir}/find-kafka-dependency.sh
+        source ${dir}/find-spark-dependency.sh
+        # source ${dir}/find-flink-dependency.sh
+    fi
+
+    # Replace jars for different hadoop dist
+    bash ${dir}/replace-jars-under-spark.sh
+
+    # get hdp_version
+    if [ -z "${hdp_version}" ]; then
+        hdp_version=`/bin/bash -x hadoop 2>&1 | sed -n "s/\(.*\)export 
HDP_VERSION=\(.*\)/\2/"p`
+        verbose "hdp_version is ${hdp_version}"
+    fi
+
+    # Replace jars for HDI
+    KYLIN_SPARK_JARS_HOME="${KYLIN_HOME}/spark/jars"
+    if [[ -d "/usr/hdp/current/hdinsight-zookeeper" && $hdp_version == "2"* ]]
+    then
+       echo "The current Hadoop environment is HDI3, will replace some jars 
package for ${KYLIN_HOME}/spark/jars"
+       if [[ -f ${KYLIN_HOME}/tomcat/webapps/kylin.war ]]
+       then
+          if [[ ! -d ${KYLIN_HOME}/tomcat/webapps/kylin ]]
+          then
+             mkdir ${KYLIN_HOME}/tomcat/webapps/kylin
+          fi
+          mv ${KYLIN_HOME}/tomcat/webapps/kylin.war 
${KYLIN_HOME}/tomcat/webapps/kylin
+          cd ${KYLIN_HOME}/tomcat/webapps/kylin
+          jar -xf ${KYLIN_HOME}/tomcat/webapps/kylin/kylin.war
+          if [[ -f 
${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar ]]
+          then
+             echo "Remove 
${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar to avoid version 
conflicts"
+             rm -rf 
${KYLIN_HOME}/tomcat/webapps/kylin/WEB-INF/lib/guava-14.0.jar
+             rm -rf ${KYLIN_HOME}/tomcat/webapps/kylin/kylin.war
+             cd ${KYLIN_HOME}/
+          fi
+       fi
+
+       if [[ -d "${KYLIN_SPARK_JARS_HOME}" ]]
+       then
+          if [[ -f ${KYLIN_HOME}/hdi3_spark_jars_flag ]]
+          then
+          echo "Required jars have been added to ${KYLIN_HOME}/spark/jars, 
skip this step."
+          else
+             rm -rf ${KYLIN_HOME}/spark/jars/hadoop-*
+             cp /usr/hdp/current/spark2-client/jars/hadoop-* 
$KYLIN_SPARK_JARS_HOME
+             cp /usr/hdp/current/spark2-client/jars/azure-* 
$KYLIN_SPARK_JARS_HOME
+             cp 
/usr/hdp/current/hadoop-client/lib/microsoft-log4j-etwappender-1.0.jar 
$KYLIN_SPARK_JARS_HOME
+             cp 
/usr/hdp/current/hadoop-client/lib/hadoop-lzo-0.6.0.${hdp_version}.jar 
$KYLIN_SPARK_JARS_HOME
+
+             rm -rf $KYLIN_HOME/spark/jars/guava-14.0.1.jar
+             cp /usr/hdp/current/spark2-client/jars/guava-24.1.1-jre.jar 
$KYLIN_SPARK_JARS_HOME
+
+             echo "Upload spark jars to HDFS"
+             hdfs dfs -test -d /spark2_jars
+             if [ $? -eq 1 ]
+             then
+                hdfs dfs -mkdir /spark2_jars
+             fi
+             hdfs dfs -put $KYLIN_SPARK_JARS_HOME/* /spark2_jars
+
+             touch ${KYLIN_HOME}/hdi3_spark_jars_flag
+          fi
+       else
+          echo "${KYLIN_HOME}/spark/jars dose not exist. You can run 
${KYLIN_HOME}/download-spark.sh to download spark."
+       fi
+    fi
+
+    tomcat_root=${dir}/../tomcat
+    export tomcat_root
+
+    # get KYLIN_REST_ADDRESS
+    if [ -z "$KYLIN_REST_ADDRESS" ]
+    then
+        KYLIN_REST_ADDRESS=`hostname -f`":"`grep "<Connector port=" 
${tomcat_root}/conf/server.xml |grep protocol=\"HTTP/1.1\" | cut -d '=' -f 2 | 
cut -d \" -f 2`
+        export KYLIN_REST_ADDRESS
+        verbose "KYLIN_REST_ADDRESS is ${KYLIN_REST_ADDRESS}"
+    fi
+    # the number of Spring active profiles can be greater than 1. Additional 
profiles
+    # can be added by setting kylin.security.additional-profiles
+    additional_security_profiles=`bash ${dir}/get-properties.sh 
kylin.security.additional-profiles`
+    if [[ "x${additional_security_profiles}" != "x" ]]; then
+        spring_profile="${spring_profile},${additional_security_profiles}"
+    fi
+
+    # compose hadoop_dependencies
+    hadoop_dependencies=${hadoop_dependencies}:`hadoop classpath`
+#    if [ -n "${hbase_dependency}" ]; then
+#        hadoop_dependencies=${hadoop_dependencies}:${hbase_dependency}
+#    fi
+    if [ -n "${hive_dependency}" ]; then
+        #hadoop_dependencies=${hadoop_dependencies}:${hive_dependency}
+        hadoop_dependencies=${hive_dependency}:${hadoop_dependencies}
+    fi
+    if [ -n "${kafka_dependency}" ]; then
+        hadoop_dependencies=${hadoop_dependencies}:${kafka_dependency}
+    fi
+    if [ -n "${spark_dependency}" ]; then
+        #hadoop_dependencies=${hadoop_dependencies}:${spark_dependency}
+        hadoop_dependencies=${spark_dependency}:${hadoop_dependencies}
+    fi
+
+    # compose KYLIN_TOMCAT_CLASSPATH
+    
tomcat_classpath=${tomcat_root}/bin/bootstrap.jar:${tomcat_root}/bin/tomcat-juli.jar:${tomcat_root}/lib/*
+    export 
KYLIN_TOMCAT_CLASSPATH=${tomcat_classpath}:${KYLIN_HOME}/conf:${KYLIN_HOME}/lib/*:${KYLIN_HOME}/ext/*:${hadoop_dependencies}:${flink_dependency}
+
+    # compose KYLIN_TOOL_CLASSPATH
+    export 
KYLIN_TOOL_CLASSPATH=${KYLIN_HOME}/conf:${KYLIN_HOME}/tool/*:${KYLIN_HOME}/ext/*:${hadoop_dependencies}
+
+    # compose kylin_common_opts
+    kylin_common_opts="-Dkylin.hive.dependency=${hive_dependency} \
+    -Dkylin.kafka.dependency=${kafka_dependency} \
+    -Dkylin.hadoop.conf.dir=${kylin_hadoop_conf_dir} \
+    -Dkylin.server.host-address=${KYLIN_REST_ADDRESS} \
+    -Dspring.profiles.active=${spring_profile} \
+    -Dhdp.version=${hdp_version}"
+
+    # compose KYLIN_TOMCAT_OPTS
+    
KYLIN_TOMCAT_OPTS="-Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-server-log4j.properties
 \
+    -Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager \
+    -Dorg.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH=true \
+    -Dorg.apache.catalina.connector.CoyoteAdapter.ALLOW_BACKSLASH=true \
+    -Djava.endorsed.dirs=${tomcat_root}/endorsed  \
+    -Dcatalina.base=${tomcat_root} \
+    -Dcatalina.home=${tomcat_root} \
+    -Djava.io.tmpdir=${tomcat_root}/temp ${kylin_common_opts}"
+    export KYLIN_TOMCAT_OPTS
+
+    # compose KYLIN_TOOL_OPTS
+    
KYLIN_TOOL_OPTS="-Dlog4j.configuration=file:${KYLIN_HOME}/conf/kylin-tools-log4j.properties
 ${kylin_common_opts}"
+    export KYLIN_TOOL_OPTS
+}
+
+function checkBasicKylinProps() {
+    spring_profile=`${dir}/get-properties.sh kylin.security.profile`
+    if [ -z "$spring_profile" ]
+    then
+        quit 'Please set kylin.security.profile in kylin.properties, options 
are: testing, ldap, saml.'
+    else
+        verbose "kylin.security.profile is $spring_profile"
+    fi
+}
+
+function prepareFairScheduler() {
+    cat > ${KYLIN_HOME}/conf/fairscheduler.xml <<EOL
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<allocations>
+  <pool name="query_pushdown">
+    <schedulingMode>FAIR</schedulingMode>
+    <weight>1</weight>
+    <minShare>1</minShare>
+  </pool>
+  <pool name="heavy_tasks">
+    <schedulingMode>FAIR</schedulingMode>
+    <weight>5</weight>
+    <minShare>1</minShare>
+  </pool>
+  <pool name="lightweight_tasks">
+    <schedulingMode>FAIR</schedulingMode>
+    <weight>10</weight>
+    <minShare>1</minShare>
+  </pool>
+  <pool name="vip_tasks">
+    <schedulingMode>FAIR</schedulingMode>
+    <weight>15</weight>
+    <minShare>1</minShare>
+  </pool>
+</allocations>
+EOL
+}
+
+function checkRestPort() {
+    kylin_rest_address_arr=(${KYLIN_REST_ADDRESS//:/ })
+    inuse=`netstat -tlpn | grep "\b${kylin_rest_address_arr[1]}\b"`
+    [[ -z ${inuse} ]] || quit "Port ${kylin_rest_address_arr[1]} is not 
available. Another kylin server is running?"
+}
+
+
+function classpathDebug() {
+    if [ "${KYLIN_CLASSPATH_DEBUG}" != "" ]; then
+        echo "Finding ${KYLIN_CLASSPATH_DEBUG} on classpath" $@
+        $JAVA -classpath $@ org.apache.kylin.common.util.ClasspathScanner 
${KYLIN_CLASSPATH_DEBUG}
+    fi
+}
+
+function runTool() {
+
+    retrieveDependency
+
+    # get KYLIN_EXTRA_START_OPTS
+    if [ -f "${KYLIN_HOME}/conf/setenv-tool.sh" ]; then
+        source ${KYLIN_HOME}/conf/setenv-tool.sh
+    fi
+
+    verbose "java opts for tool is ${KYLIN_EXTRA_START_OPTS} 
${KYLIN_TOOL_OPTS}"
+    verbose "java classpath for tool is ${KYLIN_TOOL_CLASSPATH}"
+    classpathDebug ${KYLIN_TOOL_CLASSPATH}
+
+    exec $JAVA ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOOL_OPTS} -classpath 
${KYLIN_TOOL_CLASSPATH}  "$@"
+}
+
+if [ "$2" == "--reload-dependency" ]
+then
+    reload_dependency=1
+fi
+
+# start command
+if [ "$1" == "start" ]
+then
+    if [ -f "${KYLIN_HOME}/pid" ]
+    then
+        PID=`cat $KYLIN_HOME/pid`
+        if ps -p $PID > /dev/null
+        then
+          quit "Kylin is running, stop it first"
+        fi
+    fi
+
+    checkBasicKylinProps
+
+    source ${dir}/check-env.sh
+
+    retrieveDependency
+
+    checkRestPort
+
+    prepareFairScheduler
+
+    ${KYLIN_HOME}/bin/check-migration-acl.sh || { exit 1; }
+
+    # get KYLIN_EXTRA_START_OPTS
+    if [ -f "${KYLIN_HOME}/conf/setenv.sh" ]; then
+        source ${KYLIN_HOME}/conf/setenv.sh
+    fi
+
+    security_ldap_truststore=`bash ${dir}/get-properties.sh 
kylin.security.ldap.connection-truststore`
+    if [ -f "${security_ldap_truststore}" ]; then
+        KYLIN_EXTRA_START_OPTS="$KYLIN_EXTRA_START_OPTS 
-Djavax.net.ssl.trustStore=$security_ldap_truststore"
+    fi
+
+    verbose "java opts is ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOMCAT_OPTS}"
+    verbose "java classpath is ${KYLIN_TOMCAT_CLASSPATH}"
+    classpathDebug ${KYLIN_TOMCAT_CLASSPATH}
+    $JAVA ${KYLIN_EXTRA_START_OPTS} ${KYLIN_TOMCAT_OPTS} -classpath 
${KYLIN_TOMCAT_CLASSPATH}  org.apache.catalina.startup.Bootstrap start >> 
${KYLIN_HOME}/logs/kylin.out 2>&1 & echo $! > ${KYLIN_HOME}/pid &
+
+    echo ""
+    echo "A new Kylin instance is started by $USER. To stop it, run 'kylin.sh 
stop'"
+    echo "Check the log at ${KYLIN_HOME}/logs/kylin.log"
+    echo "Web UI is at http://${KYLIN_REST_ADDRESS}/kylin";
+    exit 0
+    
+# run command
+elif [ "$1" == "run" ]
+then
+    retrieveStartCommand
+    ${start_command}
+
+# stop command
+elif [ "$1" == "stop" ]
+then
+    if [ -f "${KYLIN_HOME}/pid" ]
+    then
+        PID=`cat $KYLIN_HOME/pid`
+        WAIT_TIME=2
+        LOOP_COUNTER=10
+        if ps -p $PID > /dev/null
+        then
+            echo "Stopping Kylin: $PID"
+            kill $PID
+
+            for ((i=0; i<$LOOP_COUNTER; i++))
+            do
+                # wait to process stopped 
+                sleep $WAIT_TIME
+                if ps -p $PID > /dev/null ; then
+                    echo "Stopping in progress. Will check after $WAIT_TIME 
secs again..."
+                    continue;
+                else
+                    break;
+                fi
+            done
+
+            # if process is still around, use kill -9
+            if ps -p $PID > /dev/null
+            then
+                echo "Initial kill failed, getting serious now..."
+                kill -9 $PID
+                sleep 1 #give kill -9  sometime to "kill"
+                if ps -p $PID > /dev/null
+                then
+                   quit "Warning, even kill -9 failed, giving up! Sorry..."
+                fi
+            fi
+
+            # process is killed , remove pid file              
+            rm -rf ${KYLIN_HOME}/pid
+            echo "Kylin with pid ${PID} has been stopped."
+            exit 0
+        else
+           quit "Kylin with pid ${PID} is not running"
+        fi
+    else
+        quit "Kylin is not running"
+    fi
+
+# streaming command
+elif [ "$1" == "streaming" ]
+then
+    if [ $# -lt 2 ]
+    then
+        echo "Invalid input args $@"
+        exit -1
+    fi
+    if [ "$2" == "start" ]
+    then
+        if [ -f "${KYLIN_HOME}/streaming_receiver_pid" ]
+        then
+            PID=`cat $KYLIN_HOME/streaming_receiver_pid`
+            if ps -p $PID > /dev/null
+            then
+              echo "Kylin streaming receiver is running, stop it first"
+            exit 1
+            fi
+        fi
+        #retrive $hbase_dependency
+        metadataUrl=`${dir}/get-properties.sh kylin.metadata.url`
+        if [[ "${metadataUrl##*@}" == "hbase" ]]
+        then
+            source ${dir}/find-hbase-dependency.sh
+        fi
+        #retrive $KYLIN_EXTRA_START_OPTS
+        if [ -f "${KYLIN_HOME}/conf/setenv.sh" ]
+            then source ${KYLIN_HOME}/conf/setenv.sh
+        fi
+
+        mkdir -p ${KYLIN_HOME}/ext
+        HBASE_CLASSPATH=`hbase classpath`
+        #echo "hbase class path:"$HBASE_CLASSPATH
+        
STREAM_CLASSPATH=${KYLIN_HOME}/lib/streaming/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH}
+
+        # KYLIN_EXTRA_START_OPTS is for customized settings, checkout 
bin/setenv.sh
+        $JAVA -cp $STREAM_CLASSPATH ${KYLIN_EXTRA_START_OPTS} \
+        -Dlog4j.configuration=stream-receiver-log4j.properties\
+        -DKYLIN_HOME=${KYLIN_HOME}\
+        -Dkylin.hbase.dependency=${hbase_dependency} \
+        org.apache.kylin.stream.server.StreamingReceiver $@ > 
${KYLIN_HOME}/logs/streaming_receiver.out 2>&1 & echo $! > 
${KYLIN_HOME}/streaming_receiver_pid &
+        exit 0
+    elif [ "$2" == "stop" ]
+    then
+        if [ ! -f "${KYLIN_HOME}/streaming_receiver_pid" ]
+        then
+            echo "Streaming receiver is not running, please check"
+            exit 1
+        fi
+        PID=`cat ${KYLIN_HOME}/streaming_receiver_pid`
+        if [ "$PID" = "" ]
+        then
+            echo "Streaming receiver is not running, please check"
+            exit 1
+        else
+            echo "Stopping streaming receiver: $PID"
+            WAIT_TIME=2
+            LOOP_COUNTER=20
+            if ps -p $PID > /dev/null
+            then
+                kill $PID
+
+                for ((i=0; i<$LOOP_COUNTER; i++))
+                do
+                    # wait to process stopped
+                    sleep $WAIT_TIME
+                    if ps -p $PID > /dev/null ; then
+                        echo "Stopping in progress. Will check after 
$WAIT_TIME secs again..."
+                        continue;
+                    else
+                        break;
+                    fi
+                done
+
+                # if process is still around, use kill -9
+                if ps -p $PID > /dev/null
+                then
+                    echo "Initial kill failed, getting serious now..."
+                    kill -9 $PID
+                    sleep 1 #give kill -9  sometime to "kill"
+                    if ps -p $PID > /dev/null
+                    then
+                       quit "Warning, even kill -9 failed, giving up! Sorry..."
+                    fi
+                fi
+
+                # process is killed , remove pid file
+                rm -rf ${KYLIN_HOME}/streaming_receiver_pid
+                echo "Kylin streaming receiver with pid ${PID} has been 
stopped."
+                exit 0
+            else
+               quit "Kylin streaming receiver with pid ${PID} is not running"
+            fi
+        fi
+    elif [[ "$2" = org.apache.kylin.* ]]
+    then
+        source ${KYLIN_HOME}/conf/setenv.sh
+        HBASE_CLASSPATH=`hbase classpath`
+        #echo "hbase class path:"$HBASE_CLASSPATH
+        
STREAM_CLASSPATH=${KYLIN_HOME}/lib/streaming/*:${KYLIN_HOME}/ext/*:${HBASE_CLASSPATH}
+
+        shift
+        # KYLIN_EXTRA_START_OPTS is for customized settings, checkout 
bin/setenv.sh
+        $JAVA -cp $STREAM_CLASSPATH ${KYLIN_EXTRA_START_OPTS} \
+        -Dlog4j.configuration=stream-receiver-log4j.properties\
+        -DKYLIN_HOME=${KYLIN_HOME}\
+        -Dkylin.hbase.dependency=${hbase_dependency} \
+        "$@"
+        exit 0
+    fi
+
+elif [ "$1" = "version" ]
+then
+    runTool org.apache.kylin.common.KylinVersion
+
+elif [ "$1" = "diag" ]
+then
+    echo "'kylin.sh diag' no longer supported, use diag.sh instead"
+    exit 0
+
+# tool command
+elif [[ "$1" = org.apache.kylin.* ]]
+then
+    runTool "$@"
+else
+    quit "Usage: 'kylin.sh [-v] start' or 'kylin.sh [-v] stop'"
+fi
diff --git a/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml 
b/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml
new file mode 100644
index 0000000..8f016e2
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/hadoop/capacity-scheduler.xml
@@ -0,0 +1,134 @@
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-applications</name>
+    <value>4</value>
+    <description>
+      Maximum number of applications that can be pending and running.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+    <value>0.5</value>
+    <description>
+      Maximum percent of resources in the cluster which can be used to run 
+      application masters i.e. controls number of concurrent running
+      applications.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.resource-calculator</name>
+    
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+    <description>
+      The ResourceCalculator implementation to be used to compare 
+      Resources in the scheduler.
+      The default i.e. DefaultResourceCalculator only uses Memory while
+      DominantResourceCalculator uses dominant-resource to compare 
+      multi-dimensional resources such as Memory, CPU etc.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.queues</name>
+    <value>default</value>
+    <description>
+      The queues at the this level (root is the root queue).
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.capacity</name>
+    <value>100</value>
+    <description>Default queue target capacity.</description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+    <value>1</value>
+    <description>
+      Default queue user limit a percentage from 0.0 to 1.0.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+    <value>100</value>
+    <description>
+      The maximum capacity of the default queue. 
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.state</name>
+    <value>RUNNING</value>
+    <description>
+      The state of the default queue. State can be one of RUNNING or STOPPED.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+    <value>*</value>
+    <description>
+      The ACL of who can submit jobs to the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+    <value>*</value>
+    <description>
+      The ACL of who can administer jobs on the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.node-locality-delay</name>
+    <value>40</value>
+    <description>
+      Number of missed scheduling opportunities after which the 
CapacityScheduler 
+      attempts to schedule rack-local containers. 
+      Typically this should be set to number of nodes in the cluster, By 
default is setting 
+      approximately number of nodes in one rack which is 40.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.queue-mappings</name>
+    <value></value>
+    <description>
+      A list of mappings that will be used to assign jobs to queues
+      The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
+      Typically this list will be used to map users to queues,
+      for example, u:%user:%user maps all users to queues with the same name
+      as the user.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
+    <value>false</value>
+    <description>
+      If a queue mapping is present, will it override the value specified
+      by the user? This can be used by administrators to place jobs in queues
+      that are different than the one specified by the user.
+      The default is false.
+    </description>
+  </property>
+
+</configuration>
diff --git a/docker/dockerfile/standalone/conf/hive/hive-site.xml 
b/docker/dockerfile/standalone/conf/hive/hive-site.xml
index fc51985..589e40f 100644
--- a/docker/dockerfile/standalone/conf/hive/hive-site.xml
+++ b/docker/dockerfile/standalone/conf/hive/hive-site.xml
@@ -37,4 +37,8 @@
         <value>123456</value>
         <description>password to use against metastore database</description>
     </property>
-</configuration>
\ No newline at end of file
+    <property>
+        <name>hive.metastore.schema.verification</name>
+        <value>false</value>
+    </property>
+</configuration>
diff --git a/docker/dockerfile/standalone/conf/kylin/kylin.properties 
b/docker/dockerfile/standalone/conf/kylin/kylin.properties
new file mode 100644
index 0000000..280b846
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/kylin/kylin.properties
@@ -0,0 +1,377 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
+# The below commented values will effect as default settings
+# Uncomment and override them if necessary
+
+
+
+#
+#### METADATA | ENV ###
+#
+## The metadata store has two implementations(RDBMS/HBase), while RDBMS is 
recommended in Kylin 4.X
+## Please refer to 
https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0
 if you prefer HBase
+#kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10
+#
+## metadata cache sync retry times
+#kylin.metadata.sync-retries=3
+#
+## Working folder in HDFS, better be qualified absolute path, make sure user 
has the right permission to this directory
+#kylin.env.hdfs-working-dir=/kylin
+#
+## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no 
difference in terms of functions.
+#kylin.env=QA
+#
+## kylin zk base path
+#kylin.env.zookeeper-base-path=/kylin
+#
+## Run a TestingServer for curator locally
+#kylin.env.zookeeper-is-local=false
+#
+## Connect to a remote zookeeper with the url, should set 
kylin.env.zookeeper-is-local to false
+#kylin.env.zookeeper-connect-string=sandbox.hortonworks.com
+#
+#### SERVER | WEB | RESTCLIENT ###
+#
+## Kylin server mode, valid value [all, query, job]
+#kylin.server.mode=all
+#
+### Kylin server port
+#server.port=7070
+#
+## List of web servers in use, this enables one web server instance to sync up 
with other servers.
+#kylin.server.cluster-servers=localhost:7070
+#
+## Display timezone on UI,format like[GMT+N or GMT-N]
+#kylin.web.timezone=
+#
+## Timeout value for the queries submitted through the Web UI, in milliseconds
+#kylin.web.query-timeout=300000
+#
+#kylin.web.cross-domain-enabled=true
+#
+##allow user to export query result
+#kylin.web.export-allow-admin=true
+#kylin.web.export-allow-other=true
+#
+## Hide measures in measure list of cube designer, separate by comma
+#kylin.web.hide-measures=RAW
+#
+##max connections of one route
+#kylin.restclient.connection.default-max-per-route=20
+#
+##max connections of one rest-client
+#kylin.restclient.connection.max-total=200
+#
+#### PUBLIC CONFIG ###
+#kylin.engine.default=6
+#kylin.storage.default=4
+#kylin.web.hive-limit=20
+#kylin.web.help.length=4
+#kylin.web.help.0=start|Getting 
Started|http://kylin.apache.org/docs/tutorial/kylin_sample.html
+#kylin.web.help.1=odbc|ODBC 
Driver|http://kylin.apache.org/docs/tutorial/odbc.html
+#kylin.web.help.2=tableau|Tableau 
Guide|http://kylin.apache.org/docs/tutorial/tableau_91.html
+#kylin.web.help.3=onboard|Cube Design 
Tutorial|http://kylin.apache.org/docs/howto/howto_optimize_cubes.html
+#kylin.web.link-streaming-guide=http://kylin.apache.org/
+#kylin.htrace.show-gui-trace-toggle=false
+#kylin.web.link-hadoop=
+#kylin.web.link-diagnostic=
+#kylin.web.contact-mail=
+#kylin.server.external-acl-provider=
+#
+## Default time filter for job list, 0->current day, 1->last one day, 2->last 
one week, 3->last one year, 4->all
+#kylin.web.default-time-filter=1
+#
+#### SOURCE ###
+## Define how to access to hive metadata
+## When user deploy kylin on AWS EMR and Glue is used as external metadata, 
use gluecatalog instead
+#kylin.source.hive.metadata-type=hcatalog
+#
+## Hive client, valid value [cli, beeline]
+#kylin.source.hive.client=cli
+#
+## Absolute path to beeline shell, can be set to spark beeline instead of the 
default hive beeline on PATH
+#kylin.source.hive.beeline-shell=beeline
+#
+## Hive database name for putting the intermediate flat tables
+#kylin.source.hive.database-for-flat-table=default
+#
+#### STORAGE ###
+#
+## The storage for final cube file in hbase
+#kylin.storage.url=hbase
+#
+## clean real storage after delete operation
+## if you want to delete the real storage like htable of deleting segment, you 
can set it to true
+#kylin.storage.clean-after-delete-operation=false
+#
+#### JOB ###
+#
+## Max job retry on error, default 0: no retry
+#kylin.job.retry=0
+#
+## Max count of concurrent jobs running
+#kylin.job.max-concurrent-jobs=10
+#
+## The percentage of the sampling, default 100%
+#kylin.job.sampling-percentage=100
+#
+## If true, will send email notification on job complete
+##kylin.job.notification-enabled=true
+##kylin.job.notification-mail-enable-starttls=true
+##kylin.job.notification-mail-host=smtp.office365.com
+##kylin.job.notification-mail-port=587
+##kylin.job.notification-mail-username=ky...@example.com
+##kylin.job.notification-mail-password=mypassword
+##kylin.job.notification-mail-sender=ky...@example.com
+#kylin.job.scheduler.provider.100=org.apache.kylin.job.impl.curator.CuratorScheduler
+#kylin.job.scheduler.default=0
+#
+#### CUBE | DICTIONARY ###
+#
+#kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler
+#kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor
+#
+## 'auto', 'inmem', 'layer' or 'random' for testing 
+#kylin.cube.algorithm=layer
+#
+## A smaller threshold prefers layer, a larger threshold prefers in-mem
+#kylin.cube.algorithm.layer-or-inmem-threshold=7
+#
+## auto use inmem algorithm:
+## 1, cube planner optimize job
+## 2, no source record
+#kylin.cube.algorithm.inmem-auto-optimize=true
+#
+#kylin.cube.aggrgroup.max-combination=32768
+#
+#kylin.cube.cubeplanner.enabled=false
+#kylin.cube.cubeplanner.enabled-for-existing-cube=false
+#kylin.cube.cubeplanner.expansion-threshold=15.0
+#kylin.cube.cubeplanner.recommend-cache-max-size=200
+#kylin.cube.cubeplanner.mandatory-rollup-threshold=1000
+#kylin.cube.cubeplanner.algorithm-threshold-greedy=8
+#kylin.cube.cubeplanner.algorithm-threshold-genetic=23
+#
+#### QUERY ###
+#
+## Controls the maximum number of bytes a query is allowed to scan storage.
+## The default value 0 means no limit.
+## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per 
coprocessor.
+#kylin.query.max-scan-bytes=0
+#
+#kylin.query.cache-enabled=true
+#kylin.query.cache-threshold-scan-count=10240
+#kylin.query.cache-threshold-duration=2000
+#kylin.query.cache-threshold-scan-bytes=1048576
+#kylin.query.large-query-threshold=1000000
+#
+## Controls extras properties for Calcite jdbc driver
+## all extras properties should undder prefix 
"kylin.query.calcite.extras-props."
+## case sensitive, default: true, to enable case insensitive set it to false
+## @see org.apache.calcite.config.CalciteConnectionProperty.CASE_SENSITIVE
+#kylin.query.calcite.extras-props.caseSensitive=true
+## how to handle unquoted identity, defualt: TO_UPPER, available options: 
UNCHANGED, TO_UPPER, TO_LOWER
+## @see org.apache.calcite.config.CalciteConnectionProperty.UNQUOTED_CASING
+#kylin.query.calcite.extras-props.unquotedCasing=TO_UPPER
+## quoting method, default: DOUBLE_QUOTE, available options: DOUBLE_QUOTE, 
BACK_TICK, BRACKET
+## @see org.apache.calcite.config.CalciteConnectionProperty.QUOTING
+#kylin.query.calcite.extras-props.quoting=DOUBLE_QUOTE
+## change SqlConformance from DEFAULT to LENIENT to enable group by ordinal
+## @see org.apache.calcite.sql.validate.SqlConformance.SqlConformanceEnum
+#kylin.query.calcite.extras-props.conformance=LENIENT
+#
+## TABLE ACL
+#kylin.query.security.table-acl-enabled=true
+#
+## Usually should not modify this
+#kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor
+#
+#kylin.query.escape-default-keyword=false
+#
+## Usually should not modify this
+#kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack
+#
+#### SECURITY ###
+#
+## Spring security profile, options: testing, ldap, saml
+## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN 
to login
+#kylin.security.profile=testing
+#
+## Admin roles in LDAP, for ldap and saml
+#kylin.security.acl.admin-role=admin
+#
+## LDAP authentication configuration
+#kylin.security.ldap.connection-server=ldap://ldap_server:389
+#kylin.security.ldap.connection-username=
+#kylin.security.ldap.connection-password=
+## When you use the customized CA certificate library for user authentication 
based on LDAPs, you need to configure this item.
+## The value of this item will be added to the JVM parameter 
javax.net.ssl.trustStore.
+#kylin.security.ldap.connection-truststore=
+#
+## LDAP user account directory;
+#kylin.security.ldap.user-search-base=
+#kylin.security.ldap.user-search-pattern=
+#kylin.security.ldap.user-group-search-base=
+#kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1}))
+#
+## LDAP service account directory
+#kylin.security.ldap.service-search-base=
+#kylin.security.ldap.service-search-pattern=
+#kylin.security.ldap.service-group-search-base=
+#
+### SAML configurations for SSO
+## SAML IDP metadata file location
+#kylin.security.saml.metadata-file=classpath:sso_metadata.xml
+#kylin.security.saml.metadata-entity-base-url=https://hostname/kylin
+#kylin.security.saml.keystore-file=classpath:samlKeystore.jks
+#kylin.security.saml.context-scheme=https
+#kylin.security.saml.context-server-name=hostname
+#kylin.security.saml.context-server-port=443
+#kylin.security.saml.context-path=/kylin
+#
+#### SPARK BUILD ENGINE CONFIGS ###
+#
+## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run 
spark-submit
+## This must contain site xmls of core, yarn, hive, and hbase in one folder
+##kylin.env.hadoop-conf-dir=/etc/hadoop/conf
+#
+## Spark conf (default is in spark/conf/spark-defaults.conf)
+#kylin.engine.spark-conf.spark.master=yarn
+##kylin.engine.spark-conf.spark.submit.deployMode=client
+#kylin.engine.spark-conf.spark.yarn.queue=default
+##kylin.engine.spark-conf.spark.executor.cores=1
+##kylin.engine.spark-conf.spark.executor.memory=4G
+##kylin.engine.spark-conf.spark.executor.instances=1
+##kylin.engine.spark-conf.spark.executor.memoryOverhead=1024M
+#kylin.engine.spark-conf.spark.driver.cores=1
+#kylin.engine.spark-conf.spark.driver.memory=1G
+#kylin.engine.spark-conf.spark.shuffle.service.enabled=true
+#kylin.engine.spark-conf.spark.eventLog.enabled=true
+#kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
+#kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
+#kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
+#kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dfile.encoding=UTF-8 
-Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties 
-Dlog4j.debug -Dkylin.hdfs.working.dir=${hdfs.working.dir} 
-Dkylin.metadata.identifier=${kylin.metadata.url.identifier} 
-Dkylin.spark.category=job -Dkylin.spark.project=${job.project} 
-Dkylin.spark.identifier=${job.id} -Dkylin.spark.jobName=${job.stepId} 
-Duser.timezone=${user.timezone}
+##kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
+#
+## manually upload spark-assembly jar to HDFS and then set this property will 
avoid repeatedly uploading jar at runtime
+##kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec
+#
+## uncomment for HDP
+##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+#
+#### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ###
+## Enlarge cores and memory to improve query performance in production env, 
please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X
+#
+#kylin.query.spark-conf.spark.master=yarn
+##kylin.query.spark-conf.spark.submit.deployMode=client
+#kylin.query.spark-conf.spark.driver.cores=1
+#kylin.query.spark-conf.spark.driver.memory=4G
+#kylin.query.spark-conf.spark.driver.memoryOverhead=1G
+#kylin.query.spark-conf.spark.executor.cores=1
+#kylin.query.spark-conf.spark.executor.instances=1
+#kylin.query.spark-conf.spark.executor.memory=4G
+#kylin.query.spark-conf.spark.executor.memoryOverhead=1G
+#kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
+##kylin.query.spark-conf.spark.sql.shuffle.partitions=40
+##kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+#
+#kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current 
-Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug 
-Dkylin.hdfs.working.dir=${kylin.env.hdfs-working-dir} 
-Dkylin.metadata.identifier=${kylin.metadata.url.identifier} 
-Dkylin.spark.category=sparder -Dkylin.spark.project=${job.project}
+## uncomment for HDP
+##kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+##kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+#
+#### QUERY PUSH DOWN ###
+#
+##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl
+##kylin.query.pushdown.update-enabled=false
+
+kylin.env=QA
+kylin.server.mode=all
+kylin.server.host-address=127.0.0.1:7070
+server.port=7070
+# Display timezone on UI,format like[GMT+N or GMT-N]
+kylin.web.timezone=GMT+8
+
+kylin.source.hive.client=cli
+kylin.source.hive.database-for-flat-table=kylin4
+
+kylin.engine.spark-conf.spark.eventLog.enabled=true
+kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history
+kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history
+kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
+
+kylin.engine.spark-conf.spark.yarn.submit.file.replication=1
+kylin.engine.spark-conf.spark.master=yarn
+kylin.engine.spark-conf.spark.driver.memory=512M
+kylin.engine.spark-conf.spark.driver.memoryOverhead=512M
+kylin.engine.spark-conf.spark.executor.memory=1G
+kylin.engine.spark-conf.spark.executor.instances=1
+kylin.engine.spark-conf.spark.executor.memoryOverhead=512M
+kylin.engine.spark-conf.spark.executor.cores=1
+kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
+kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+
+kylin.storage.columnar.shard-rowcount=2500000
+kylin.storage.columnar.shard-countdistinct-rowcount=1000000
+kylin.storage.columnar.repartition-threshold-size-mb=128
+kylin.storage.columnar.shard-size-mb=128
+
+kylin.query.auto-sparder-context=true
+kylin.query.sparder-context.app-name=sparder_on_docker
+kylin.query.spark-conf.spark.master=yarn
+kylin.query.spark-conf.spark.driver.memory=512M
+kylin.query.spark-conf.spark.driver.memoryOverhead=512M
+kylin.query.spark-conf.spark.executor.memory=1G
+kylin.query.spark-conf.spark.executor.instances=1
+kylin.query.spark-conf.spark.executor.memoryOverhead=512M
+kylin.query.spark-conf.spark.executor.cores=1
+kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
+kylin.query.spark-conf.spark.sql.shuffle.partitions=1
+kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+kylin.query.spark-conf.spark.eventLog.enabled=true
+kylin.query.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history
+kylin.query.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history
+
+# for local cache
+kylin.query.cache-enabled=false
+
+# for pushdown query
+kylin.query.pushdown.update-enabled=false
+kylin.query.pushdown.enabled=true
+kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl
+
+# for Cube Planner
+kylin.cube.cubeplanner.enabled=true
+kylin.server.query-metrics2-enabled=false
+kylin.metrics.reporter-query-enabled=false
+kylin.metrics.reporter-job-enabled=false
+kylin.metrics.monitor-enabled=false
+kylin.web.dashboard-enabled=false
+
+# metadata for mysql
+kylin.metadata.url=kylin4@jdbc,url=jdbc:mysql://localhost:3306/kylin4,username=root,password=123456,maxActive=10,maxIdle=10
+kylin.env.hdfs-working-dir=/kylin4_metadata
+kylin.env.zookeeper-base-path=/kylin4
+kylin.env.zookeeper-connect-string=127.0.0.1
+
+kylin.storage.clean-after-delete-operation=true
diff --git a/docker/dockerfile/standalone/conf/spark/spark-defaults.conf 
b/docker/dockerfile/standalone/conf/spark/spark-defaults.conf
new file mode 100644
index 0000000..dac2e3c
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/spark/spark-defaults.conf
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master                     spark://master:7077
+# spark.eventLog.enabled           true
+# spark.eventLog.dir               hdfs://namenode:8021/directory
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value 
-Dnumbers="one two three"
+
+spark.sql.catalogImplementation         hive
+spark.driver.maxResultSize              1g
+spark.sql.hive.thriftServer.singleSession  false
+
+spark.serializer                      
org.apache.spark.serializer.JavaSerializer
+
+spark.memory.useLegacyMode            false
+spark.memory.fraction                 0.3
+spark.memory.storageFraction          0.3
+
+spark.rdd.compress                    true
+spark.io.compression.codec            snappy
+
+spark.locality.wait                     100ms
+spark.speculation                       false
+
+spark.task.maxFailures                  4
+
+spark.scheduler.minRegisteredResourcesRatio         1.0
+spark.scheduler.maxRegisteredResourcesWaitingTime      60s
+
+spark.yarn.jars                        hdfs://localhost:9000/spark2_jars/*
+
+
+
+
+
diff --git a/docker/dockerfile/standalone/conf/spark/spark-env.sh 
b/docker/dockerfile/standalone/conf/spark/spark-env.sh
new file mode 100755
index 0000000..3c5837d
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/spark/spark-env.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is sourced when running various Spark programs.
+# Copy it as spark-env.sh and edit that to configure Spark for your site.
+
+# Options read when launching programs locally with
+# ./bin/run-example or ./bin/spark-submit
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
+
+# Options read by executors and drivers running inside the cluster
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
+# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and 
RDD data
+# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
+
+# Options read in YARN client/cluster mode
+# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you 
use YARN
+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
+
+# Options for the daemons used in the standalone deploy mode
+# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for 
the master
+# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. 
"-Dx=y")
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give 
executors (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for 
the worker
+# - SPARK_WORKER_DIR, to set the working directory of worker processes
+# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. 
"-Dx=y")
+# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server 
themselves (default: 1g).
+# - SPARK_HISTORY_OPTS, to set config properties only for the history server 
(e.g. "-Dx=y")
+# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle 
service (e.g. "-Dx=y")
+# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. 
"-Dx=y")
+# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
+# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR       Where log files are stored.  (Default: 
${SPARK_HOME}/logs)
+# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING  A string representing this instance of spark. 
(Default: $USER)
+# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
+# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will 
not output a PID file.
+# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
+# You might get better performance to enable these options if using native 
BLAS (see SPARK-21305).
+# - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
+# - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS
+
+export JAVA_HOME=/home/admin/jdk1.8.0_141
+export CLASSPATH=.:$JAVA_HOME/lib
+export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native
+
+export SPARK_PID_DIR=${SPARK_HOME}/
+
diff --git a/docker/dockerfile/standalone/conf/zk/zoo.cfg 
b/docker/dockerfile/standalone/conf/zk/zoo.cfg
new file mode 100644
index 0000000..1a576de
--- /dev/null
+++ b/docker/dockerfile/standalone/conf/zk/zoo.cfg
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial 
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between 
+# sending a request and getting an acknowledgement
+syncLimit=5
+# the directory where the snapshot is stored.
+# do not use /tmp for storage, /tmp here is just 
+# example sakes.
+dataDir=/data/zookeeper
+# the port at which the clients will connect
+clientPort=2181
+# the maximum number of client connections.
+# increase this if you need to handle more clients
+#maxClientCnxns=60
+#
+# Be sure to read the maintenance section of the 
+# administrator guide before turning on autopurge.
+#
+# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
+#
+# The number of snapshots to retain in dataDir
+#autopurge.snapRetainCount=3
+# Purge task interval in hours
+# Set to "0" to disable auto purge feature
+#autopurge.purgeInterval=1
diff --git a/docker/setup_standalone.sh b/docker/setup_standalone.sh
index 3ed32ce..a6289af 100755
--- a/docker/setup_standalone.sh
+++ b/docker/setup_standalone.sh
@@ -23,4 +23,4 @@ docker run -d \
 -p 8032:8032 \
 -p 8042:8042 \
 -p 2181:2181 \
-apachekylin/apache-kylin-standalone:4.0.0-alpha
+apachekylin/apache-kylin-standalone:4.0.0-beta

[kylin] branch kylin-on-parquet-v2 updated: KYLIN-4913 Update docker image for Kylin 4.0 Beta

Reply via email to