[kylin] branch kylin-on-parquet-v2 updated: KYLIN-4452 Kylin 4 with Docker

xxyu Wed, 16 Sep 2020 06:03:46 -0700

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git



The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this 
push:
     new e6565f8  KYLIN-4452 Kylin 4 with Docker
e6565f8 is described below

commit e6565f87fdeaa9e6bb8e2a5951ff7f3046997b96
Author: Zhichao Zhang <441586...@qq.com>
AuthorDate: Tue Sep 15 13:36:27 2020 +0800

    KYLIN-4452 Kylin 4 with Docker
    
    Docker image for Apache Kylin 4 Alpha
---
 docker/Dockerfile                         |  29 +--
 docker/Dockerfile_dev                     |  43 ----
 docker/Dockerfile_hadoop                  |  43 ++--
 docker/README.md                          |  15 +-
 docker/build_image.sh                     |   4 +-
 docker/conf/hadoop/capacity-scheduler.xml | 134 +++++++++++
 docker/conf/hbase/hbase-site.xml          |  32 ---
 docker/conf/kylin/kylin.properties        | 371 ++++++++++++++++++++++++++++++
 docker/conf/spark/spark-defaults.conf     |  55 +++++
 docker/conf/spark/spark-env.sh            |  77 +++++++
 docker/conf/zk/zoo.cfg                    |  45 ++++
 docker/entrypoint.sh                      |  66 ++++--
 docker/run_container.sh                   |   5 +-
 13 files changed, 760 insertions(+), 159 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 4ca399f..a168e6c 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -16,26 +16,19 @@
 #
 
 # Docker image for apache kylin, based on the Hadoop image
-FROM hadoop2.7-all-in-one
+FROM hadoop2.7-all-in-one-for-kylin4
 
-ENV KYLIN_VERSION 3.0.1
-ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x
-
-# Download released Kylin
-RUN wget 
https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz
 \
-    && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz \
-    && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz
-
-RUN echo "kylin.engine.spark-conf.spark.executor.memory=1G" >> 
$KYLIN_HOME/conf/kylin.properties \
-    && echo "kylin.engine.spark-conf-mergedict.spark.executor.memory=1.5G" >>  
$KYLIN_HOME/conf/kylin.properties \
-    && echo "kylin.engine.livy-conf.livy-url=http://127.0.0.1:8998"; >>  
$KYLIN_HOME/conf/kylin.properties \
-    && echo 
kylin.engine.livy-conf.livy-key.file=hdfs://localhost:9000/kylin/livy/kylin-job-$KYLIN_VERSION.jar
 >> $KYLIN_HOME/conf/kylin.properties \
-    && echo 
kylin.engine.livy-conf.livy-arr.jars=hdfs://localhost:9000/kylin/livy/hbase-client-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-common-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop2-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-server-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/htrace-core-*-incubating.jar,hdfs://localhost:9000/kylin/livy/metrics-
 [...]
-    && echo kylin.source.hive.quote-enabled=false >> 
$KYLIN_HOME/conf/kylin.properties \
-    && echo 
kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin/spark-history
 >> $KYLIN_HOME/conf/kylin.properties \
-    && echo 
kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin/spark-history
 >> $KYLIN_HOME/conf/kylin.properties \
-    && echo kylin.source.hive.redistribute-flat-table=false >> 
$KYLIN_HOME/conf/kylin.properties
+ENV KYLIN_VERSION 4.0.0-alpha
+ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2
 
+# Download Kylin
+RUN wget 
https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz
 \
+    && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz \
+    && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz
+RUN rm -f $KYLIN_HOME/conf/kylin.properties
+COPY conf/kylin/* $KYLIN_HOME/conf/
+RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $KYLIN_HOME/lib/
+RUN sed -i "s/hbase/java/g" $KYLIN_HOME/bin/set-java-home.sh
 
 COPY ./entrypoint.sh /home/admin/entrypoint.sh
 RUN chmod u+x /home/admin/entrypoint.sh
diff --git a/docker/Dockerfile_dev b/docker/Dockerfile_dev
deleted file mode 100644
index c0a8933..0000000
--- a/docker/Dockerfile_dev
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Docker image for apache kylin, based on the Hadoop image
-FROM hadoop2.7-all-in-one
-
-## for dev
-ENV KYLIN_VERSION 4.0.0-SNAPSHOT
-ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin
-# Copy compiled Kylin from local
-COPY apache-kylin-$KYLIN_VERSION-bin.tar.gz /home/admin/
-RUN tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz \
-    && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz
-
-RUN echo "kylin.engine.spark-conf.spark.executor.memory=1G" >> 
$KYLIN_HOME/conf/kylin.properties \
-    && echo "kylin.engine.spark-conf-mergedict.spark.executor.memory=1.5G" >>  
$KYLIN_HOME/conf/kylin.properties \
-    && echo "kylin.engine.livy-conf.livy-url=http://127.0.0.1:8998"; >>  
$KYLIN_HOME/conf/kylin.properties \
-    && echo 
kylin.engine.livy-conf.livy-key.file=hdfs://localhost:9000/kylin/livy/kylin-job-$KYLIN_VERSION.jar
 >> $KYLIN_HOME/conf/kylin.properties \
-    && echo 
kylin.engine.livy-conf.livy-arr.jars=hdfs://localhost:9000/kylin/livy/hbase-client-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-common-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop2-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-server-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/htrace-core-*-incubating.jar,hdfs://localhost:9000/kylin/livy/metrics-
 [...]
-    && echo kylin.source.hive.quote-enabled=false >> 
$KYLIN_HOME/conf/kylin.properties \
-    && echo 
kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin/spark-history
 >> $KYLIN_HOME/conf/kylin.properties \
-    && echo 
kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin/spark-history
 >> $KYLIN_HOME/conf/kylin.properties \
-    && echo kylin.source.hive.redistribute-flat-table=false >> 
$KYLIN_HOME/conf/kylin.properties
-
-
-COPY ./entrypoint.sh /home/admin/entrypoint.sh
-RUN chmod u+x /home/admin/entrypoint.sh
-
-ENTRYPOINT ["/home/admin/entrypoint.sh"]
diff --git a/docker/Dockerfile_hadoop b/docker/Dockerfile_hadoop
index a0312f4..8e76855 100644
--- a/docker/Dockerfile_hadoop
+++ b/docker/Dockerfile_hadoop
@@ -15,16 +15,14 @@
 # limitations under the License.
 #
 
-# Docker image with Hadoop/Hive/HBase/Spark/ZK/Kafka/Livy installed
+# Docker image with Hadoop/Spark/Hive/ZK/Kafka installed
 FROM centos:6.9
 
 ENV HIVE_VERSION 1.2.1
 ENV HADOOP_VERSION 2.7.0
-ENV HBASE_VERSION 1.1.2
-ENV SPARK_VERSION 2.3.1
+ENV SPARK_VERSION 2.4.6
 ENV ZK_VERSION 3.4.6
 ENV KAFKA_VERSION 1.1.1
-ENV LIVY_VERSION 0.6.0
 
 ENV JAVA_HOME /home/admin/jdk1.8.0_141
 ENV MVN_HOME /home/admin/apache-maven-3.6.1
@@ -32,13 +30,11 @@ ENV HADOOP_HOME /home/admin/hadoop-$HADOOP_VERSION
 ENV HIVE_HOME /home/admin/apache-hive-$HIVE_VERSION-bin
 ENV HADOOP_CONF $HADOOP_HOME/etc/hadoop
 ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
-ENV HBASE_HOME /home/admin/hbase-$HBASE_VERSION
-ENV SPARK_HOME /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6
-ENV SPARK_CONF_DIR /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6/conf
+ENV SPARK_HOME /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7
+ENV SPARK_CONF_DIR $SPARK_HOME/conf
 ENV ZK_HOME /home/admin/zookeeper-$ZK_VERSION
 ENV KAFKA_HOME /home/admin/kafka_2.11-$KAFKA_VERSION
-ENV LIVY_HOME /home/admin/apache-livy-$LIVY_VERSION-incubating-bin
-ENV PATH 
$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$HBASE_HOME/bin:$MVN_HOME/bin:spark-$SPARK_VERSION-bin-hadoop2.6/bin:$KAFKA_HOME/bin
+ENV PATH 
$PATH:$JAVA_HOME/bin:$ZK_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$MVN_HOME/bin:$KAFKA_HOME/bin
 
 USER root
 
@@ -69,37 +65,32 @@ RUN wget 
https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hado
     && mkdir -p /data/hadoop
 COPY conf/hadoop/* $HADOOP_CONF/
 
-# setup hbase
-RUN wget 
https://archive.apache.org/dist/hbase/$HBASE_VERSION/hbase-$HBASE_VERSION-bin.tar.gz
 \
-    && tar -zxvf /home/admin/hbase-$HBASE_VERSION-bin.tar.gz \
-    && rm -f /home/admin/hbase-$HBASE_VERSION-bin.tar.gz \
-    && mkdir -p /data/hbase \
-    && mkdir -p /data/zookeeper
-COPY conf/hbase/hbase-site.xml $HBASE_HOME/conf
-
 # setup hive
 RUN wget 
https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
 \
     && tar -zxvf /home/admin/apache-hive-$HIVE_VERSION-bin.tar.gz \
     && rm -f /home/admin/apache-hive-$HIVE_VERSION-bin.tar.gz \
     && wget -P $HIVE_HOME/lib 
https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.24/mysql-connector-java-5.1.24.jar
 COPY conf/hive/hive-site.xml $HIVE_HOME/conf
+COPY conf/hive/hive-site.xml $HADOOP_CONF/
 
 # setup spark
-RUN wget 
https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.6.tgz
 \
-    && tar -zxvf /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6.tgz \
-    && rm -f /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6.tgz \
+RUN wget 
https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz
 \
+    && tar -zxvf /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7.tgz \
+    && rm -f /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7.tgz \
     && cp $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf \
     && cp $SPARK_HOME/yarn/*.jar $HADOOP_HOME/share/hadoop/yarn/lib
 RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $SPARK_HOME/jars
-RUN echo spark.sql.catalogImplementation=hive > 
$SPARK_HOME/conf/spark-defaults.conf
+RUN cp $HIVE_HOME/hcatalog/share/hcatalog/hive-hcatalog-core-1.2.1.jar 
$SPARK_HOME/jars/
+COPY conf/spark/* $SPARK_CONF_DIR/
 
 # setup kafka
 RUN wget 
https://archive.apache.org/dist/kafka/$KAFKA_VERSION/kafka_2.11-$KAFKA_VERSION.tgz
 \
     && tar -zxvf /home/admin/kafka_2.11-$KAFKA_VERSION.tgz \
     && rm -f /home/admin/kafka_2.11-$KAFKA_VERSION.tgz
 
-# setup livy
-RUN wget 
https://www.apache.org/dist/incubator/livy/$LIVY_VERSION-incubating/apache-livy-$LIVY_VERSION-incubating-bin.zip
 \
-    && unzip /home/admin/apache-livy-$LIVY_VERSION-incubating-bin.zip \
-    && rm -f /home/admin/apache-livy-$LIVY_VERSION-incubating-bin.zip
-
+# setup zk
+RUN wget 
https://archive.apache.org/dist/zookeeper/zookeeper-$ZK_VERSION/zookeeper-$ZK_VERSION.tar.gz
 \
+    && tar -zxvf /home/admin/zookeeper-$ZK_VERSION.tar.gz \
+    && rm -f /home/admin/zookeeper-$ZK_VERSION.tar.gz \
+    && mkdir -p /data/zookeeper
+COPY conf/zk/zoo.cfg $ZK_HOME/conf
diff --git a/docker/README.md b/docker/README.md
index 376547f..d137c8b 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -4,8 +4,7 @@ In order to allow users to easily try Kylin, and to facilitate 
developers to ver
 - Jdk 1.8
 - Hadoop 2.7.0
 - Hive 1.2.1
-- Hbase 1.1.2
-- Spark 2.3.1
+- Spark 2.4.6
 - Zookeeper 3.4.6
 - Kafka 1.1.1
 - MySQL 5.1.73
@@ -16,7 +15,7 @@ In order to allow users to easily try Kylin, and to 
facilitate developers to ver
 We have pushed the Kylin images to the [docker 
hub](https://hub.docker.com/r/apachekylin/apache-kylin-standalone). You do not 
need to build the image locally, just pull the image from remote (you can 
browse docker hub to check the available versions):
 
 ```
-docker pull apachekylin/apache-kylin-standalone:3.0.1
+docker pull apachekylin/apache-kylin-standalone:4.0.0-alpha
 ```
 
 After the pull is successful, execute "sh run_container.sh" or the following 
command to start the container:
@@ -29,16 +28,14 @@ docker run -d \
 -p 50070:50070 \
 -p 8032:8032 \
 -p 8042:8042 \
--p 16010:16010 \
-apache-kylin-standalone
+-p 2181:2181 \
+apachekylin/apache-kylin-standalone:4.0.0-alpha
 ```
 
 The following services are automatically started when the container starts: 
 
 - NameNode, DataNode
 - ResourceManager, NodeManager
-- HBase
-- Kafka
 - Kylin
 
 and run automatically `$KYLIN_HOME/bin/sample.sh `, create a 
kylin_streaming_topic topic in Kafka and continue to send data to this topic. 
This is to let the users start the container and then experience the batch and 
streaming way to build the cube and query.
@@ -48,7 +45,6 @@ After the container is started, we can enter the container 
through the `docker e
 - Kylin Web UI: 
[http://127.0.0.1:7070/kylin/login](http://127.0.0.1:7070/kylin/login)
 - HDFS NameNode Web UI: [http://127.0.0.1:50070](http://127.0.0.1:50070/)
 - YARN ResourceManager Web UI: [http://127.0.0.1:8088](http://127.0.0.1:8088/)
-- HBase Web UI: [http://127.0.0.1:16010](http://127.0.0.1:16010/)
 
 In the container, the relevant environment variables are as follows: 
 
@@ -56,8 +52,7 @@ In the container, the relevant environment variables are as 
follows:
 JAVA_HOME=/home/admin/jdk1.8.0_141
 HADOOP_HOME=/home/admin/hadoop-2.7.0
 KAFKA_HOME=/home/admin/kafka_2.11-1.1.1
-SPARK_HOME=/home/admin/spark-2.3.1-bin-hadoop2.6
-HBASE_HOME=/home/admin/hbase-1.1.2
+SPARK_HOME=/home/admin/spark-2.4.6-bin-hadoop2.7
 HIVE_HOME=/home/admin/apache-hive-1.2.1-bin
 ```
 
diff --git a/docker/build_image.sh b/docker/build_image.sh
old mode 100644
new mode 100755
index 19fbec5..9c0b925
--- a/docker/build_image.sh
+++ b/docker/build_image.sh
@@ -23,5 +23,5 @@ echo "build image in dir "${DIR}
 
 
 echo "start build Hadoop docker image"
-docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one .
-docker build -f Dockerfile -t apache-kylin-standalone .
+docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 .
+docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha .
diff --git a/docker/conf/hadoop/capacity-scheduler.xml 
b/docker/conf/hadoop/capacity-scheduler.xml
new file mode 100644
index 0000000..503ee3e
--- /dev/null
+++ b/docker/conf/hadoop/capacity-scheduler.xml
@@ -0,0 +1,134 @@
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<configuration>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-applications</name>
+    <value>2</value>
+    <description>
+      Maximum number of applications that can be pending and running.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
+    <value>0.3</value>
+    <description>
+      Maximum percent of resources in the cluster which can be used to run 
+      application masters i.e. controls number of concurrent running
+      applications.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.resource-calculator</name>
+    
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+    <description>
+      The ResourceCalculator implementation to be used to compare 
+      Resources in the scheduler.
+      The default i.e. DefaultResourceCalculator only uses Memory while
+      DominantResourceCalculator uses dominant-resource to compare 
+      multi-dimensional resources such as Memory, CPU etc.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.queues</name>
+    <value>default</value>
+    <description>
+      The queues at the this level (root is the root queue).
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.capacity</name>
+    <value>100</value>
+    <description>Default queue target capacity.</description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
+    <value>1</value>
+    <description>
+      Default queue user limit a percentage from 0.0 to 1.0.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
+    <value>100</value>
+    <description>
+      The maximum capacity of the default queue. 
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.state</name>
+    <value>RUNNING</value>
+    <description>
+      The state of the default queue. State can be one of RUNNING or STOPPED.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
+    <value>*</value>
+    <description>
+      The ACL of who can submit jobs to the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
+    <value>*</value>
+    <description>
+      The ACL of who can administer jobs on the default queue.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.node-locality-delay</name>
+    <value>40</value>
+    <description>
+      Number of missed scheduling opportunities after which the 
CapacityScheduler 
+      attempts to schedule rack-local containers. 
+      Typically this should be set to number of nodes in the cluster, By 
default is setting 
+      approximately number of nodes in one rack which is 40.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.queue-mappings</name>
+    <value></value>
+    <description>
+      A list of mappings that will be used to assign jobs to queues
+      The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
+      Typically this list will be used to map users to queues,
+      for example, u:%user:%user maps all users to queues with the same name
+      as the user.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
+    <value>false</value>
+    <description>
+      If a queue mapping is present, will it override the value specified
+      by the user? This can be used by administrators to place jobs in queues
+      that are different than the one specified by the user.
+      The default is false.
+    </description>
+  </property>
+
+</configuration>
diff --git a/docker/conf/hbase/hbase-site.xml b/docker/conf/hbase/hbase-site.xml
deleted file mode 100644
index 0c77926..0000000
--- a/docker/conf/hbase/hbase-site.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<!--
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License. See accompanying LICENSE file.
--->
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-    <property>
-        <name>hbase.rootdir</name>
-        <value>file:///data/hbase</value>
-    </property>
-    <property>
-        <name>hbase.zookeeper.property.dataDir</name>
-        <value>/data/zookeeper</value>
-    </property>
-    <property>  
-        <name>hbase.master.info.port</name>  
-        <value>16010</value>
-    </property>
-</configuration>
\ No newline at end of file
diff --git a/docker/conf/kylin/kylin.properties 
b/docker/conf/kylin/kylin.properties
new file mode 100644
index 0000000..7054d83
--- /dev/null
+++ b/docker/conf/kylin/kylin.properties
@@ -0,0 +1,371 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+
+
+# The below commented values will effect as default settings
+# Uncomment and override them if necessary
+
+
+
+#
+#### METADATA | ENV ###
+#
+## The metadata store has two implementations(RDBMS/HBase), while RDBMS is 
recommended in Kylin 4.X
+## Please refer to 
https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0
 if you prefer HBase
+#kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10
+#
+## metadata cache sync retry times
+#kylin.metadata.sync-retries=3
+#
+## Working folder in HDFS, better be qualified absolute path, make sure user 
has the right permission to this directory
+#kylin.env.hdfs-working-dir=/kylin
+#
+## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no 
difference in terms of functions.
+#kylin.env=QA
+#
+## kylin zk base path
+#kylin.env.zookeeper-base-path=/kylin
+#
+## Run a TestingServer for curator locally
+#kylin.env.zookeeper-is-local=false
+#
+## Connect to a remote zookeeper with the url, should set 
kylin.env.zookeeper-is-local to false
+#kylin.env.zookeeper-connect-string=sandbox.hortonworks.com
+#
+#### SERVER | WEB | RESTCLIENT ###
+#
+## Kylin server mode, valid value [all, query, job]
+#kylin.server.mode=all
+#
+## List of web servers in use, this enables one web server instance to sync up 
with other servers.
+#kylin.server.cluster-servers=localhost:7070
+#
+## Display timezone on UI,format like[GMT+N or GMT-N]
+#kylin.web.timezone=
+#
+## Timeout value for the queries submitted through the Web UI, in milliseconds
+#kylin.web.query-timeout=300000
+#
+#kylin.web.cross-domain-enabled=true
+#
+##allow user to export query result
+#kylin.web.export-allow-admin=true
+#kylin.web.export-allow-other=true
+#
+## Hide measures in measure list of cube designer, separate by comma
+#kylin.web.hide-measures=RAW
+#
+##max connections of one route
+#kylin.restclient.connection.default-max-per-route=20
+#
+##max connections of one rest-client
+#kylin.restclient.connection.max-total=200
+#
+#### PUBLIC CONFIG ###
+#kylin.engine.default=6
+#kylin.storage.default=4
+#kylin.web.hive-limit=20
+#kylin.web.help.length=4
+#kylin.web.help.0=start|Getting 
Started|http://kylin.apache.org/docs/tutorial/kylin_sample.html
+#kylin.web.help.1=odbc|ODBC 
Driver|http://kylin.apache.org/docs/tutorial/odbc.html
+#kylin.web.help.2=tableau|Tableau 
Guide|http://kylin.apache.org/docs/tutorial/tableau_91.html
+#kylin.web.help.3=onboard|Cube Design 
Tutorial|http://kylin.apache.org/docs/howto/howto_optimize_cubes.html
+#kylin.web.link-streaming-guide=http://kylin.apache.org/
+#kylin.htrace.show-gui-trace-toggle=false
+#kylin.web.link-hadoop=
+#kylin.web.link-diagnostic=
+#kylin.web.contact-mail=
+#kylin.server.external-acl-provider=
+#
+## Default time filter for job list, 0->current day, 1->last one day, 2->last 
one week, 3->last one year, 4->all
+#kylin.web.default-time-filter=1
+#
+#### SOURCE ###
+## Define how to access to hive metadata
+## When user deploy kylin on AWS EMR and Glue is used as external metadata, 
use gluecatalog instead
+#kylin.source.hive.metadata-type=hcatalog
+#
+## Hive client, valid value [cli, beeline]
+#kylin.source.hive.client=cli
+#
+## Absolute path to beeline shell, can be set to spark beeline instead of the 
default hive beeline on PATH
+#kylin.source.hive.beeline-shell=beeline
+#
+## Hive database name for putting the intermediate flat tables
+#kylin.source.hive.database-for-flat-table=default
+#
+#### STORAGE ###
+#
+## The storage for final cube file in hbase
+#kylin.storage.url=hbase
+#
+## clean real storage after delete operation
+## if you want to delete the real storage like htable of deleting segment, you 
can set it to true
+#kylin.storage.clean-after-delete-operation=false
+#
+#### JOB ###
+#
+## Max job retry on error, default 0: no retry
+#kylin.job.retry=0
+#
+## Max count of concurrent jobs running
+#kylin.job.max-concurrent-jobs=10
+#
+## The percentage of the sampling, default 100%
+#kylin.job.sampling-percentage=100
+#
+## If true, will send email notification on job complete
+##kylin.job.notification-enabled=true
+##kylin.job.notification-mail-enable-starttls=true
+##kylin.job.notification-mail-host=smtp.office365.com
+##kylin.job.notification-mail-port=587
+##kylin.job.notification-mail-username=ky...@example.com
+##kylin.job.notification-mail-password=mypassword
+##kylin.job.notification-mail-sender=ky...@example.com
+#kylin.job.scheduler.provider.100=org.apache.kylin.job.impl.curator.CuratorScheduler
+#kylin.job.scheduler.default=0
+#
+#### CUBE | DICTIONARY ###
+#
+#kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler
+#kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor
+#
+## 'auto', 'inmem', 'layer' or 'random' for testing 
+#kylin.cube.algorithm=layer
+#
+## A smaller threshold prefers layer, a larger threshold prefers in-mem
+#kylin.cube.algorithm.layer-or-inmem-threshold=7
+#
+## auto use inmem algorithm:
+## 1, cube planner optimize job
+## 2, no source record
+#kylin.cube.algorithm.inmem-auto-optimize=true
+#
+#kylin.cube.aggrgroup.max-combination=32768
+#
+#kylin.cube.cubeplanner.enabled=false
+#kylin.cube.cubeplanner.enabled-for-existing-cube=false
+#kylin.cube.cubeplanner.expansion-threshold=15.0
+#kylin.cube.cubeplanner.recommend-cache-max-size=200
+#kylin.cube.cubeplanner.mandatory-rollup-threshold=1000
+#kylin.cube.cubeplanner.algorithm-threshold-greedy=8
+#kylin.cube.cubeplanner.algorithm-threshold-genetic=23
+#
+#### QUERY ###
+#
+## Controls the maximum number of bytes a query is allowed to scan storage.
+## The default value 0 means no limit.
+## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per 
coprocessor.
+#kylin.query.max-scan-bytes=0
+#
+#kylin.query.cache-enabled=true
+#kylin.query.cache-threshold-scan-count=10240
+#kylin.query.cache-threshold-duration=2000
+#kylin.query.cache-threshold-scan-bytes=1048576
+#kylin.query.large-query-threshold=1000000
+#
+## Controls extras properties for Calcite jdbc driver
+## all extras properties should undder prefix 
"kylin.query.calcite.extras-props."
+## case sensitive, default: true, to enable case insensitive set it to false
+## @see org.apache.calcite.config.CalciteConnectionProperty.CASE_SENSITIVE
+#kylin.query.calcite.extras-props.caseSensitive=true
+## how to handle unquoted identity, defualt: TO_UPPER, available options: 
UNCHANGED, TO_UPPER, TO_LOWER
+## @see org.apache.calcite.config.CalciteConnectionProperty.UNQUOTED_CASING
+#kylin.query.calcite.extras-props.unquotedCasing=TO_UPPER
+## quoting method, default: DOUBLE_QUOTE, available options: DOUBLE_QUOTE, 
BACK_TICK, BRACKET
+## @see org.apache.calcite.config.CalciteConnectionProperty.QUOTING
+#kylin.query.calcite.extras-props.quoting=DOUBLE_QUOTE
+## change SqlConformance from DEFAULT to LENIENT to enable group by ordinal
+## @see org.apache.calcite.sql.validate.SqlConformance.SqlConformanceEnum
+#kylin.query.calcite.extras-props.conformance=LENIENT
+#
+## TABLE ACL
+#kylin.query.security.table-acl-enabled=true
+#
+## Usually should not modify this
+#kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor
+#
+#kylin.query.escape-default-keyword=false
+#
+## Usually should not modify this
+#kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack
+#
+#### SECURITY ###
+#
+## Spring security profile, options: testing, ldap, saml
+## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN 
to login
+#kylin.security.profile=testing
+#
+## Admin roles in LDAP, for ldap and saml
+#kylin.security.acl.admin-role=admin
+#
+## LDAP authentication configuration
+#kylin.security.ldap.connection-server=ldap://ldap_server:389
+#kylin.security.ldap.connection-username=
+#kylin.security.ldap.connection-password=
+## When you use the customized CA certificate library for user authentication 
based on LDAPs, you need to configure this item.
+## The value of this item will be added to the JVM parameter 
javax.net.ssl.trustStore.
+#kylin.security.ldap.connection-truststore=
+#
+## LDAP user account directory;
+#kylin.security.ldap.user-search-base=
+#kylin.security.ldap.user-search-pattern=
+#kylin.security.ldap.user-group-search-base=
+#kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1}))
+#
+## LDAP service account directory
+#kylin.security.ldap.service-search-base=
+#kylin.security.ldap.service-search-pattern=
+#kylin.security.ldap.service-group-search-base=
+#
+### SAML configurations for SSO
+## SAML IDP metadata file location
+#kylin.security.saml.metadata-file=classpath:sso_metadata.xml
+#kylin.security.saml.metadata-entity-base-url=https://hostname/kylin
+#kylin.security.saml.keystore-file=classpath:samlKeystore.jks
+#kylin.security.saml.context-scheme=https
+#kylin.security.saml.context-server-name=hostname
+#kylin.security.saml.context-server-port=443
+#kylin.security.saml.context-path=/kylin
+#
+#### SPARK BUILD ENGINE CONFIGS ###
+#
+## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run 
spark-submit
+## This must contain site xmls of core, yarn, hive, and hbase in one folder
+##kylin.env.hadoop-conf-dir=/etc/hadoop/conf
+#
+## Spark conf (default is in spark/conf/spark-defaults.conf)
+#kylin.engine.spark-conf.spark.master=yarn
+##kylin.engine.spark-conf.spark.submit.deployMode=client
+#kylin.engine.spark-conf.spark.yarn.queue=default
+#kylin.engine.spark-conf.spark.executor.cores=1
+#kylin.engine.spark-conf.spark.executor.memory=4G
+#kylin.engine.spark-conf.spark.executor.instances=1
+#kylin.engine.spark-conf.spark.executor.memoryOverhead=1024M
+#kylin.engine.spark-conf.spark.driver.cores=1
+#kylin.engine.spark-conf.spark.driver.memory=1G
+#kylin.engine.spark-conf.spark.shuffle.service.enabled=true
+#kylin.engine.spark-conf.spark.eventLog.enabled=true
+#kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
+#kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
+#kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
+##kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
+#
+## manually upload spark-assembly jar to HDFS and then set this property will 
avoid repeatedly uploading jar at runtime
+##kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec
+#
+## uncomment for HDP
+##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+##kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
+#
+#### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ###
+## Enlarge cores and memory to improve query performance in production env, 
please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X
+#
+#kylin.query.spark-conf.spark.master=yarn
+##kylin.query.spark-conf.spark.submit.deployMode=client
+#kylin.query.spark-conf.spark.driver.cores=1
+#kylin.query.spark-conf.spark.driver.memory=4G
+#kylin.query.spark-conf.spark.driver.memoryOverhead=1G
+#kylin.query.spark-conf.spark.executor.cores=1
+#kylin.query.spark-conf.spark.executor.instances=1
+#kylin.query.spark-conf.spark.executor.memory=4G
+#kylin.query.spark-conf.spark.executor.memoryOverhead=1G
+#kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
+##kylin.query.spark-conf.spark.sql.shuffle.partitions=40
+##kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+#
+## uncomment for HDP
+##kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
+##kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
+##kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
+#
+#### QUERY PUSH DOWN ###
+#
+##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl
+##kylin.query.pushdown.update-enabled=false
+
+kylin.env=QA
+kylin.server.mode=all
+kylin.server.host-address=127.0.0.1:7777
+# Display timezone on UI,format like[GMT+N or GMT-N]
+kylin.web.timezone=GMT+8
+
+kylin.source.hive.client=cli
+kylin.source.hive.database-for-flat-table=kylin4
+
+kylin.engine.spark-conf.spark.eventLog.enabled=true
+kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history
+kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history
+kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
+
+kylin.engine.spark-conf.spark.yarn.submit.file.replication=1
+kylin.engine.spark-conf.spark.master=yarn
+kylin.engine.spark-conf.spark.driver.memory=512M
+kylin.engine.spark-conf.spark.driver.memoryOverhead=512M
+kylin.engine.spark-conf.spark.executor.memory=1G
+kylin.engine.spark-conf.spark.executor.instances=1
+kylin.engine.spark-conf.spark.executor.memoryOverhead=512M
+kylin.engine.spark-conf.spark.executor.cores=1
+kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
+kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+
+kylin.storage.columnar.shard-rowcount=2500000
+kylin.storage.columnar.shard-countdistinct-rowcount=1000000
+kylin.storage.columnar.repartition-threshold-size-mb=128
+kylin.storage.columnar.shard-size-mb=128
+
+kylin.query.auto-sparder-context=true
+kylin.query.spark-conf.spark.master=yarn
+kylin.query.spark-conf.spark.driver.memory=512M
+kylin.query.spark-conf.spark.driver.memoryOverhead=512M
+kylin.query.spark-conf.spark.executor.memory=1G
+kylin.query.spark-conf.spark.executor.instances=1
+kylin.query.spark-conf.spark.executor.memoryOverhead=512M
+kylin.query.spark-conf.spark.executor.cores=1
+kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer
+kylin.query.spark-conf.spark.sql.shuffle.partitions=1
+kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
+kylin.query.spark-conf.spark.eventLog.enabled=true
+kylin.query.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history
+kylin.query.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history
+
+# for local cache
+kylin.query.cache-enabled=false
+
+# for pushdown query
+kylin.query.pushdown.update-enabled=false
+kylin.query.pushdown.enabled=true
+kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl
+
+# for Cube Planner
+kylin.cube.cubeplanner.enabled=false
+kylin.server.query-metrics2-enabled=false
+kylin.metrics.reporter-query-enabled=false
+kylin.metrics.reporter-job-enabled=false
+kylin.metrics.monitor-enabled=false
+kylin.web.dashboard-enabled=false
+kylin.web.set-config-enable=true
+
+# metadata for mysql
+kylin.metadata.url=kylin4@jdbc,url=jdbc:mysql://localhost:3306/kylin4,username=root,password=123456,maxActive=10,maxIdle=10
+kylin.env.hdfs-working-dir=/kylin4_metadata
+kylin.env.zookeeper-base-path=/kylin4
+kylin.env.zookeeper-connect-string=127.0.0.1
diff --git a/docker/conf/spark/spark-defaults.conf 
b/docker/conf/spark/spark-defaults.conf
new file mode 100644
index 0000000..dac2e3c
--- /dev/null
+++ b/docker/conf/spark/spark-defaults.conf
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master                     spark://master:7077
+# spark.eventLog.enabled           true
+# spark.eventLog.dir               hdfs://namenode:8021/directory
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value 
-Dnumbers="one two three"
+
+spark.sql.catalogImplementation         hive
+spark.driver.maxResultSize              1g
+spark.sql.hive.thriftServer.singleSession  false
+
+spark.serializer                      
org.apache.spark.serializer.JavaSerializer
+
+spark.memory.useLegacyMode            false
+spark.memory.fraction                 0.3
+spark.memory.storageFraction          0.3
+
+spark.rdd.compress                    true
+spark.io.compression.codec            snappy
+
+spark.locality.wait                     100ms
+spark.speculation                       false
+
+spark.task.maxFailures                  4
+
+spark.scheduler.minRegisteredResourcesRatio         1.0
+spark.scheduler.maxRegisteredResourcesWaitingTime      60s
+
+spark.yarn.jars                        hdfs://localhost:9000/spark2_jars/*
+
+
+
+
+
diff --git a/docker/conf/spark/spark-env.sh b/docker/conf/spark/spark-env.sh
new file mode 100755
index 0000000..3c5837d
--- /dev/null
+++ b/docker/conf/spark/spark-env.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is sourced when running various Spark programs.
+# Copy it as spark-env.sh and edit that to configure Spark for your site.
+
+# Options read when launching programs locally with
+# ./bin/run-example or ./bin/spark-submit
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
+
+# Options read by executors and drivers running inside the cluster
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
+# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and 
RDD data
+# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
+
+# Options read in YARN client/cluster mode
+# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you 
use YARN
+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
+
+# Options for the daemons used in the standalone deploy mode
+# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for 
the master
+# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. 
"-Dx=y")
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give 
executors (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for 
the worker
+# - SPARK_WORKER_DIR, to set the working directory of worker processes
+# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. 
"-Dx=y")
+# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server 
themselves (default: 1g).
+# - SPARK_HISTORY_OPTS, to set config properties only for the history server 
(e.g. "-Dx=y")
+# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle 
service (e.g. "-Dx=y")
+# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. 
"-Dx=y")
+# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
+# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR       Where log files are stored.  (Default: 
${SPARK_HOME}/logs)
+# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING  A string representing this instance of spark. 
(Default: $USER)
+# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
+# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will 
not output a PID file.
+# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
+# You might get better performance to enable these options if using native 
BLAS (see SPARK-21305).
+# - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
+# - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS
+
+export JAVA_HOME=/home/admin/jdk1.8.0_141
+export CLASSPATH=.:$JAVA_HOME/lib
+export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native
+
+export SPARK_PID_DIR=${SPARK_HOME}/
+
diff --git a/docker/conf/zk/zoo.cfg b/docker/conf/zk/zoo.cfg
new file mode 100644
index 0000000..1a576de
--- /dev/null
+++ b/docker/conf/zk/zoo.cfg
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# The number of milliseconds of each tick
+tickTime=2000
+# The number of ticks that the initial 
+# synchronization phase can take
+initLimit=10
+# The number of ticks that can pass between 
+# sending a request and getting an acknowledgement
+syncLimit=5
+# the directory where the snapshot is stored.
+# do not use /tmp for storage, /tmp here is just 
+# example sakes.
+dataDir=/data/zookeeper
+# the port at which the clients will connect
+clientPort=2181
+# the maximum number of client connections.
+# increase this if you need to handle more clients
+#maxClientCnxns=60
+#
+# Be sure to read the maintenance section of the 
+# administrator guide before turning on autopurge.
+#
+# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
+#
+# The number of snapshots to retain in dataDir
+#autopurge.snapRetainCount=3
+# Purge task interval in hours
+# Set to "0" to disable auto purge feature
+#autopurge.purgeInterval=1
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index edeefec..31664f7 100644
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -16,19 +16,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-echo "127.0.0.1 sandbox.hortonworks.com" >> /etc/hosts
+echo "127.0.0.1 sandbox sandbox.hortonworks.com" >> /etc/hosts
+
+# clean pid files
+rm -f /tmp/*.pid
 
 # start mysql
-service mysqld start
-mysqladmin -uroot password 123456
-mysql -uroot -p123456 -e "grant all privileges on root.* to root@'%' 
identified by '123456';"
+if [ ! -f "/home/admin/first_run" ]
+then
+    service mysqld start
+    mysqladmin -uroot password 123456
+    mysql -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS kylin4 default 
charset utf8 COLLATE utf8_general_ci;"
+    mysql -uroot -p123456 -e "grant all privileges on root.* to root@'%' 
identified by '123456';"
+fi
+service mysqld restart
 
 # start hdfs
 if [ ! -f "/home/admin/first_run" ]
 then
     hdfs namenode -format
 fi
-touch /home/admin/first_run
 $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode
 $HADOOP_HOME/sbin/hadoop-daemon.sh start datanode
 
@@ -39,33 +46,42 @@ $HADOOP_HOME/sbin/yarn-daemon.sh start nodemanager
 # start mr jobhistory
 $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
 
-# start hbase
+# start zk
 rm -rf /data/zookeeper/*
-$HBASE_HOME/bin/start-hbase.sh
+rm -f /data/zookeeper/zookeeper_server.pid
+$ZK_HOME/bin/zkServer.sh start
 
 # start kafka
-rm -rf /tmp/kafka-logs
-nohup $KAFKA_HOME/bin/kafka-server-start.sh 
$KAFKA_HOME/config/server.properties &
+# rm -rf /tmp/kafka-logs
+# nohup $KAFKA_HOME/bin/kafka-server-start.sh 
$KAFKA_HOME/config/server.properties &
+
+sleep 10s
+mkdir -p ${KYLIN_HOME}/logs
+# check hive usability first, this operation will insert one version record 
into VERSION table
+$KYLIN_HOME/bin/check-hive-usability.sh > ${KYLIN_HOME}/logs/kylin-verbose.log 
2>&1
 
-# start livy
-hdfs dfs -mkdir -p /kylin/livy
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-client-$HBASE_VERSION.jar 
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-common-$HBASE_VERSION.jar 
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-hadoop-compat-$HBASE_VERSION.jar 
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-hadoop2-compat-$HBASE_VERSION.jar 
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/hbase-server-$HBASE_VERSION.jar 
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/htrace-core-*-incubating.jar 
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $HBASE_HOME/lib/metrics-core-*.jar 
hdfs://localhost:9000/kylin/livy/
-hdfs dfs -put -f $KYLIN_HOME/lib/kylin-job-$KYLIN_VERSION.jar 
hdfs://localhost:9000/kylin/livy/
-$LIVY_HOME/bin/livy-server start
+if [ ! -f "/home/admin/first_run" ]
+then
+    hdfs dfs -mkdir -p /kylin4/spark-history
+    hdfs dfs -mkdir -p /spark2_jars
+    hdfs dfs -put -f $SPARK_HOME/jars/* hdfs://localhost:9000/spark2_jars/
+fi
 
 # prepare kafka topic and data
-$KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 
--replication-factor 1 --partitions 3 --topic kylin_streaming_topic
-nohup $KYLIN_HOME/bin/kylin.sh 
org.apache.kylin.source.kafka.util.KafkaSampleProducer --topic 
kylin_streaming_topic --broker localhost:9092 < /dev/null 2>&1 > 
/tmp/kafka-sample.log &
-# create sample cube
-sh $KYLIN_HOME/bin/sample.sh
+# if [ ! -f "/home/admin/first_run" ]
+# then
+#     $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 
--replication-factor 1 --partitions 3 --topic kylin_streaming_topic
+# fi
+
+# create sample data at the first time
+if [ ! -f "/home/admin/first_run" ]
+then
+    sh $KYLIN_HOME/bin/sample.sh >> ${KYLIN_HOME}/logs/kylin-verbose.log 2>&1
+fi
+touch /home/admin/first_run
+sleep 10s
 # start kylin
-$KYLIN_HOME/bin/kylin.sh start
+$KYLIN_HOME/bin/kylin.sh -v start >> ${KYLIN_HOME}/logs/kylin-verbose.log 2>&1
 
 while :
 do
diff --git a/docker/run_container.sh b/docker/run_container.sh
index 8f03d75..3ed32ce 100755
--- a/docker/run_container.sh
+++ b/docker/run_container.sh
@@ -22,6 +22,5 @@ docker run -d \
 -p 50070:50070 \
 -p 8032:8032 \
 -p 8042:8042 \
--p 16010:16010 \
--p 8998:8998 \
-apache-kylin-standalone
\ No newline at end of file
+-p 2181:2181 \
+apachekylin/apache-kylin-standalone:4.0.0-alpha

[kylin] branch kylin-on-parquet-v2 updated: KYLIN-4452 Kylin 4 with Docker

Reply via email to