This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin-on-parquet-v2 in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this push: new e6565f8 KYLIN-4452 Kylin 4 with Docker e6565f8 is described below commit e6565f87fdeaa9e6bb8e2a5951ff7f3046997b96 Author: Zhichao Zhang <441586...@qq.com> AuthorDate: Tue Sep 15 13:36:27 2020 +0800 KYLIN-4452 Kylin 4 with Docker Docker image for Apache Kylin 4 Alpha --- docker/Dockerfile | 29 +-- docker/Dockerfile_dev | 43 ---- docker/Dockerfile_hadoop | 43 ++-- docker/README.md | 15 +- docker/build_image.sh | 4 +- docker/conf/hadoop/capacity-scheduler.xml | 134 +++++++++++ docker/conf/hbase/hbase-site.xml | 32 --- docker/conf/kylin/kylin.properties | 371 ++++++++++++++++++++++++++++++ docker/conf/spark/spark-defaults.conf | 55 +++++ docker/conf/spark/spark-env.sh | 77 +++++++ docker/conf/zk/zoo.cfg | 45 ++++ docker/entrypoint.sh | 66 ++++-- docker/run_container.sh | 5 +- 13 files changed, 760 insertions(+), 159 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 4ca399f..a168e6c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -16,26 +16,19 @@ # # Docker image for apache kylin, based on the Hadoop image -FROM hadoop2.7-all-in-one +FROM hadoop2.7-all-in-one-for-kylin4 -ENV KYLIN_VERSION 3.0.1 -ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x - -# Download released Kylin -RUN wget https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz \ - && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz \ - && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz - -RUN echo "kylin.engine.spark-conf.spark.executor.memory=1G" >> $KYLIN_HOME/conf/kylin.properties \ - && echo "kylin.engine.spark-conf-mergedict.spark.executor.memory=1.5G" >> $KYLIN_HOME/conf/kylin.properties \ - && echo "kylin.engine.livy-conf.livy-url=http://127.0.0.1:8998" >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.engine.livy-conf.livy-key.file=hdfs://localhost:9000/kylin/livy/kylin-job-$KYLIN_VERSION.jar >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.engine.livy-conf.livy-arr.jars=hdfs://localhost:9000/kylin/livy/hbase-client-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-common-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop2-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-server-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/htrace-core-*-incubating.jar,hdfs://localhost:9000/kylin/livy/metrics- [...] - && echo kylin.source.hive.quote-enabled=false >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin/spark-history >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin/spark-history >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.source.hive.redistribute-flat-table=false >> $KYLIN_HOME/conf/kylin.properties +ENV KYLIN_VERSION 4.0.0-alpha +ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2 +# Download Kylin +RUN wget https://archive.apache.org/dist/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz \ + && tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz \ + && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin-hadoop2.tar.gz +RUN rm -f $KYLIN_HOME/conf/kylin.properties +COPY conf/kylin/* $KYLIN_HOME/conf/ +RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $KYLIN_HOME/lib/ +RUN sed -i "s/hbase/java/g" $KYLIN_HOME/bin/set-java-home.sh COPY ./entrypoint.sh /home/admin/entrypoint.sh RUN chmod u+x /home/admin/entrypoint.sh diff --git a/docker/Dockerfile_dev b/docker/Dockerfile_dev deleted file mode 100644 index c0a8933..0000000 --- a/docker/Dockerfile_dev +++ /dev/null @@ -1,43 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Docker image for apache kylin, based on the Hadoop image -FROM hadoop2.7-all-in-one - -## for dev -ENV KYLIN_VERSION 4.0.0-SNAPSHOT -ENV KYLIN_HOME /home/admin/apache-kylin-$KYLIN_VERSION-bin -# Copy compiled Kylin from local -COPY apache-kylin-$KYLIN_VERSION-bin.tar.gz /home/admin/ -RUN tar -zxvf /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz \ - && rm -f /home/admin/apache-kylin-$KYLIN_VERSION-bin.tar.gz - -RUN echo "kylin.engine.spark-conf.spark.executor.memory=1G" >> $KYLIN_HOME/conf/kylin.properties \ - && echo "kylin.engine.spark-conf-mergedict.spark.executor.memory=1.5G" >> $KYLIN_HOME/conf/kylin.properties \ - && echo "kylin.engine.livy-conf.livy-url=http://127.0.0.1:8998" >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.engine.livy-conf.livy-key.file=hdfs://localhost:9000/kylin/livy/kylin-job-$KYLIN_VERSION.jar >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.engine.livy-conf.livy-arr.jars=hdfs://localhost:9000/kylin/livy/hbase-client-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-common-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-hadoop2-compat-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/hbase-server-$HBASE_VERSION.jar,hdfs://localhost:9000/kylin/livy/htrace-core-*-incubating.jar,hdfs://localhost:9000/kylin/livy/metrics- [...] - && echo kylin.source.hive.quote-enabled=false >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin/spark-history >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin/spark-history >> $KYLIN_HOME/conf/kylin.properties \ - && echo kylin.source.hive.redistribute-flat-table=false >> $KYLIN_HOME/conf/kylin.properties - - -COPY ./entrypoint.sh /home/admin/entrypoint.sh -RUN chmod u+x /home/admin/entrypoint.sh - -ENTRYPOINT ["/home/admin/entrypoint.sh"] diff --git a/docker/Dockerfile_hadoop b/docker/Dockerfile_hadoop index a0312f4..8e76855 100644 --- a/docker/Dockerfile_hadoop +++ b/docker/Dockerfile_hadoop @@ -15,16 +15,14 @@ # limitations under the License. # -# Docker image with Hadoop/Hive/HBase/Spark/ZK/Kafka/Livy installed +# Docker image with Hadoop/Spark/Hive/ZK/Kafka installed FROM centos:6.9 ENV HIVE_VERSION 1.2.1 ENV HADOOP_VERSION 2.7.0 -ENV HBASE_VERSION 1.1.2 -ENV SPARK_VERSION 2.3.1 +ENV SPARK_VERSION 2.4.6 ENV ZK_VERSION 3.4.6 ENV KAFKA_VERSION 1.1.1 -ENV LIVY_VERSION 0.6.0 ENV JAVA_HOME /home/admin/jdk1.8.0_141 ENV MVN_HOME /home/admin/apache-maven-3.6.1 @@ -32,13 +30,11 @@ ENV HADOOP_HOME /home/admin/hadoop-$HADOOP_VERSION ENV HIVE_HOME /home/admin/apache-hive-$HIVE_VERSION-bin ENV HADOOP_CONF $HADOOP_HOME/etc/hadoop ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop -ENV HBASE_HOME /home/admin/hbase-$HBASE_VERSION -ENV SPARK_HOME /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6 -ENV SPARK_CONF_DIR /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6/conf +ENV SPARK_HOME /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7 +ENV SPARK_CONF_DIR $SPARK_HOME/conf ENV ZK_HOME /home/admin/zookeeper-$ZK_VERSION ENV KAFKA_HOME /home/admin/kafka_2.11-$KAFKA_VERSION -ENV LIVY_HOME /home/admin/apache-livy-$LIVY_VERSION-incubating-bin -ENV PATH $PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$HBASE_HOME/bin:$MVN_HOME/bin:spark-$SPARK_VERSION-bin-hadoop2.6/bin:$KAFKA_HOME/bin +ENV PATH $PATH:$JAVA_HOME/bin:$ZK_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$MVN_HOME/bin:$KAFKA_HOME/bin USER root @@ -69,37 +65,32 @@ RUN wget https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hado && mkdir -p /data/hadoop COPY conf/hadoop/* $HADOOP_CONF/ -# setup hbase -RUN wget https://archive.apache.org/dist/hbase/$HBASE_VERSION/hbase-$HBASE_VERSION-bin.tar.gz \ - && tar -zxvf /home/admin/hbase-$HBASE_VERSION-bin.tar.gz \ - && rm -f /home/admin/hbase-$HBASE_VERSION-bin.tar.gz \ - && mkdir -p /data/hbase \ - && mkdir -p /data/zookeeper -COPY conf/hbase/hbase-site.xml $HBASE_HOME/conf - # setup hive RUN wget https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz \ && tar -zxvf /home/admin/apache-hive-$HIVE_VERSION-bin.tar.gz \ && rm -f /home/admin/apache-hive-$HIVE_VERSION-bin.tar.gz \ && wget -P $HIVE_HOME/lib https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.24/mysql-connector-java-5.1.24.jar COPY conf/hive/hive-site.xml $HIVE_HOME/conf +COPY conf/hive/hive-site.xml $HADOOP_CONF/ # setup spark -RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.6.tgz \ - && tar -zxvf /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6.tgz \ - && rm -f /home/admin/spark-$SPARK_VERSION-bin-hadoop2.6.tgz \ +RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop2.7.tgz \ + && tar -zxvf /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7.tgz \ + && rm -f /home/admin/spark-$SPARK_VERSION-bin-hadoop2.7.tgz \ && cp $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf \ && cp $SPARK_HOME/yarn/*.jar $HADOOP_HOME/share/hadoop/yarn/lib RUN cp $HIVE_HOME/lib/mysql-connector-java-5.1.24.jar $SPARK_HOME/jars -RUN echo spark.sql.catalogImplementation=hive > $SPARK_HOME/conf/spark-defaults.conf +RUN cp $HIVE_HOME/hcatalog/share/hcatalog/hive-hcatalog-core-1.2.1.jar $SPARK_HOME/jars/ +COPY conf/spark/* $SPARK_CONF_DIR/ # setup kafka RUN wget https://archive.apache.org/dist/kafka/$KAFKA_VERSION/kafka_2.11-$KAFKA_VERSION.tgz \ && tar -zxvf /home/admin/kafka_2.11-$KAFKA_VERSION.tgz \ && rm -f /home/admin/kafka_2.11-$KAFKA_VERSION.tgz -# setup livy -RUN wget https://www.apache.org/dist/incubator/livy/$LIVY_VERSION-incubating/apache-livy-$LIVY_VERSION-incubating-bin.zip \ - && unzip /home/admin/apache-livy-$LIVY_VERSION-incubating-bin.zip \ - && rm -f /home/admin/apache-livy-$LIVY_VERSION-incubating-bin.zip - +# setup zk +RUN wget https://archive.apache.org/dist/zookeeper/zookeeper-$ZK_VERSION/zookeeper-$ZK_VERSION.tar.gz \ + && tar -zxvf /home/admin/zookeeper-$ZK_VERSION.tar.gz \ + && rm -f /home/admin/zookeeper-$ZK_VERSION.tar.gz \ + && mkdir -p /data/zookeeper +COPY conf/zk/zoo.cfg $ZK_HOME/conf diff --git a/docker/README.md b/docker/README.md index 376547f..d137c8b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,8 +4,7 @@ In order to allow users to easily try Kylin, and to facilitate developers to ver - Jdk 1.8 - Hadoop 2.7.0 - Hive 1.2.1 -- Hbase 1.1.2 -- Spark 2.3.1 +- Spark 2.4.6 - Zookeeper 3.4.6 - Kafka 1.1.1 - MySQL 5.1.73 @@ -16,7 +15,7 @@ In order to allow users to easily try Kylin, and to facilitate developers to ver We have pushed the Kylin images to the [docker hub](https://hub.docker.com/r/apachekylin/apache-kylin-standalone). You do not need to build the image locally, just pull the image from remote (you can browse docker hub to check the available versions): ``` -docker pull apachekylin/apache-kylin-standalone:3.0.1 +docker pull apachekylin/apache-kylin-standalone:4.0.0-alpha ``` After the pull is successful, execute "sh run_container.sh" or the following command to start the container: @@ -29,16 +28,14 @@ docker run -d \ -p 50070:50070 \ -p 8032:8032 \ -p 8042:8042 \ --p 16010:16010 \ -apache-kylin-standalone +-p 2181:2181 \ +apachekylin/apache-kylin-standalone:4.0.0-alpha ``` The following services are automatically started when the container starts: - NameNode, DataNode - ResourceManager, NodeManager -- HBase -- Kafka - Kylin and run automatically `$KYLIN_HOME/bin/sample.sh `, create a kylin_streaming_topic topic in Kafka and continue to send data to this topic. This is to let the users start the container and then experience the batch and streaming way to build the cube and query. @@ -48,7 +45,6 @@ After the container is started, we can enter the container through the `docker e - Kylin Web UI: [http://127.0.0.1:7070/kylin/login](http://127.0.0.1:7070/kylin/login) - HDFS NameNode Web UI: [http://127.0.0.1:50070](http://127.0.0.1:50070/) - YARN ResourceManager Web UI: [http://127.0.0.1:8088](http://127.0.0.1:8088/) -- HBase Web UI: [http://127.0.0.1:16010](http://127.0.0.1:16010/) In the container, the relevant environment variables are as follows: @@ -56,8 +52,7 @@ In the container, the relevant environment variables are as follows: JAVA_HOME=/home/admin/jdk1.8.0_141 HADOOP_HOME=/home/admin/hadoop-2.7.0 KAFKA_HOME=/home/admin/kafka_2.11-1.1.1 -SPARK_HOME=/home/admin/spark-2.3.1-bin-hadoop2.6 -HBASE_HOME=/home/admin/hbase-1.1.2 +SPARK_HOME=/home/admin/spark-2.4.6-bin-hadoop2.7 HIVE_HOME=/home/admin/apache-hive-1.2.1-bin ``` diff --git a/docker/build_image.sh b/docker/build_image.sh old mode 100644 new mode 100755 index 19fbec5..9c0b925 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -23,5 +23,5 @@ echo "build image in dir "${DIR} echo "start build Hadoop docker image" -docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one . -docker build -f Dockerfile -t apache-kylin-standalone . +docker build -f Dockerfile_hadoop -t hadoop2.7-all-in-one-for-kylin4 . +docker build -f Dockerfile -t apachekylin/apache-kylin-standalone:4.0.0-alpha . diff --git a/docker/conf/hadoop/capacity-scheduler.xml b/docker/conf/hadoop/capacity-scheduler.xml new file mode 100644 index 0000000..503ee3e --- /dev/null +++ b/docker/conf/hadoop/capacity-scheduler.xml @@ -0,0 +1,134 @@ +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> +<configuration> + + <property> + <name>yarn.scheduler.capacity.maximum-applications</name> + <value>2</value> + <description> + Maximum number of applications that can be pending and running. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.maximum-am-resource-percent</name> + <value>0.3</value> + <description> + Maximum percent of resources in the cluster which can be used to run + application masters i.e. controls number of concurrent running + applications. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.resource-calculator</name> + <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value> + <description> + The ResourceCalculator implementation to be used to compare + Resources in the scheduler. + The default i.e. DefaultResourceCalculator only uses Memory while + DominantResourceCalculator uses dominant-resource to compare + multi-dimensional resources such as Memory, CPU etc. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.queues</name> + <value>default</value> + <description> + The queues at the this level (root is the root queue). + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.capacity</name> + <value>100</value> + <description>Default queue target capacity.</description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.user-limit-factor</name> + <value>1</value> + <description> + Default queue user limit a percentage from 0.0 to 1.0. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.maximum-capacity</name> + <value>100</value> + <description> + The maximum capacity of the default queue. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.state</name> + <value>RUNNING</value> + <description> + The state of the default queue. State can be one of RUNNING or STOPPED. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name> + <value>*</value> + <description> + The ACL of who can submit jobs to the default queue. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name> + <value>*</value> + <description> + The ACL of who can administer jobs on the default queue. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.node-locality-delay</name> + <value>40</value> + <description> + Number of missed scheduling opportunities after which the CapacityScheduler + attempts to schedule rack-local containers. + Typically this should be set to number of nodes in the cluster, By default is setting + approximately number of nodes in one rack which is 40. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.queue-mappings</name> + <value></value> + <description> + A list of mappings that will be used to assign jobs to queues + The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]* + Typically this list will be used to map users to queues, + for example, u:%user:%user maps all users to queues with the same name + as the user. + </description> + </property> + + <property> + <name>yarn.scheduler.capacity.queue-mappings-override.enable</name> + <value>false</value> + <description> + If a queue mapping is present, will it override the value specified + by the user? This can be used by administrators to place jobs in queues + that are different than the one specified by the user. + The default is false. + </description> + </property> + +</configuration> diff --git a/docker/conf/hbase/hbase-site.xml b/docker/conf/hbase/hbase-site.xml deleted file mode 100644 index 0c77926..0000000 --- a/docker/conf/hbase/hbase-site.xml +++ /dev/null @@ -1,32 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> -<!-- - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. See accompanying LICENSE file. ---> - -<!-- Put site-specific property overrides in this file. --> - -<configuration> - <property> - <name>hbase.rootdir</name> - <value>file:///data/hbase</value> - </property> - <property> - <name>hbase.zookeeper.property.dataDir</name> - <value>/data/zookeeper</value> - </property> - <property> - <name>hbase.master.info.port</name> - <value>16010</value> - </property> -</configuration> \ No newline at end of file diff --git a/docker/conf/kylin/kylin.properties b/docker/conf/kylin/kylin.properties new file mode 100644 index 0000000..7054d83 --- /dev/null +++ b/docker/conf/kylin/kylin.properties @@ -0,0 +1,371 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + + + +# The below commented values will effect as default settings +# Uncomment and override them if necessary + + + +# +#### METADATA | ENV ### +# +## The metadata store has two implementations(RDBMS/HBase), while RDBMS is recommended in Kylin 4.X +## Please refer to https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0 if you prefer HBase +#kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10 +# +## metadata cache sync retry times +#kylin.metadata.sync-retries=3 +# +## Working folder in HDFS, better be qualified absolute path, make sure user has the right permission to this directory +#kylin.env.hdfs-working-dir=/kylin +# +## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no difference in terms of functions. +#kylin.env=QA +# +## kylin zk base path +#kylin.env.zookeeper-base-path=/kylin +# +## Run a TestingServer for curator locally +#kylin.env.zookeeper-is-local=false +# +## Connect to a remote zookeeper with the url, should set kylin.env.zookeeper-is-local to false +#kylin.env.zookeeper-connect-string=sandbox.hortonworks.com +# +#### SERVER | WEB | RESTCLIENT ### +# +## Kylin server mode, valid value [all, query, job] +#kylin.server.mode=all +# +## List of web servers in use, this enables one web server instance to sync up with other servers. +#kylin.server.cluster-servers=localhost:7070 +# +## Display timezone on UI,format like[GMT+N or GMT-N] +#kylin.web.timezone= +# +## Timeout value for the queries submitted through the Web UI, in milliseconds +#kylin.web.query-timeout=300000 +# +#kylin.web.cross-domain-enabled=true +# +##allow user to export query result +#kylin.web.export-allow-admin=true +#kylin.web.export-allow-other=true +# +## Hide measures in measure list of cube designer, separate by comma +#kylin.web.hide-measures=RAW +# +##max connections of one route +#kylin.restclient.connection.default-max-per-route=20 +# +##max connections of one rest-client +#kylin.restclient.connection.max-total=200 +# +#### PUBLIC CONFIG ### +#kylin.engine.default=6 +#kylin.storage.default=4 +#kylin.web.hive-limit=20 +#kylin.web.help.length=4 +#kylin.web.help.0=start|Getting Started|http://kylin.apache.org/docs/tutorial/kylin_sample.html +#kylin.web.help.1=odbc|ODBC Driver|http://kylin.apache.org/docs/tutorial/odbc.html +#kylin.web.help.2=tableau|Tableau Guide|http://kylin.apache.org/docs/tutorial/tableau_91.html +#kylin.web.help.3=onboard|Cube Design Tutorial|http://kylin.apache.org/docs/howto/howto_optimize_cubes.html +#kylin.web.link-streaming-guide=http://kylin.apache.org/ +#kylin.htrace.show-gui-trace-toggle=false +#kylin.web.link-hadoop= +#kylin.web.link-diagnostic= +#kylin.web.contact-mail= +#kylin.server.external-acl-provider= +# +## Default time filter for job list, 0->current day, 1->last one day, 2->last one week, 3->last one year, 4->all +#kylin.web.default-time-filter=1 +# +#### SOURCE ### +## Define how to access to hive metadata +## When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead +#kylin.source.hive.metadata-type=hcatalog +# +## Hive client, valid value [cli, beeline] +#kylin.source.hive.client=cli +# +## Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH +#kylin.source.hive.beeline-shell=beeline +# +## Hive database name for putting the intermediate flat tables +#kylin.source.hive.database-for-flat-table=default +# +#### STORAGE ### +# +## The storage for final cube file in hbase +#kylin.storage.url=hbase +# +## clean real storage after delete operation +## if you want to delete the real storage like htable of deleting segment, you can set it to true +#kylin.storage.clean-after-delete-operation=false +# +#### JOB ### +# +## Max job retry on error, default 0: no retry +#kylin.job.retry=0 +# +## Max count of concurrent jobs running +#kylin.job.max-concurrent-jobs=10 +# +## The percentage of the sampling, default 100% +#kylin.job.sampling-percentage=100 +# +## If true, will send email notification on job complete +##kylin.job.notification-enabled=true +##kylin.job.notification-mail-enable-starttls=true +##kylin.job.notification-mail-host=smtp.office365.com +##kylin.job.notification-mail-port=587 +##kylin.job.notification-mail-username=ky...@example.com +##kylin.job.notification-mail-password=mypassword +##kylin.job.notification-mail-sender=ky...@example.com +#kylin.job.scheduler.provider.100=org.apache.kylin.job.impl.curator.CuratorScheduler +#kylin.job.scheduler.default=0 +# +#### CUBE | DICTIONARY ### +# +#kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler +#kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor +# +## 'auto', 'inmem', 'layer' or 'random' for testing +#kylin.cube.algorithm=layer +# +## A smaller threshold prefers layer, a larger threshold prefers in-mem +#kylin.cube.algorithm.layer-or-inmem-threshold=7 +# +## auto use inmem algorithm: +## 1, cube planner optimize job +## 2, no source record +#kylin.cube.algorithm.inmem-auto-optimize=true +# +#kylin.cube.aggrgroup.max-combination=32768 +# +#kylin.cube.cubeplanner.enabled=false +#kylin.cube.cubeplanner.enabled-for-existing-cube=false +#kylin.cube.cubeplanner.expansion-threshold=15.0 +#kylin.cube.cubeplanner.recommend-cache-max-size=200 +#kylin.cube.cubeplanner.mandatory-rollup-threshold=1000 +#kylin.cube.cubeplanner.algorithm-threshold-greedy=8 +#kylin.cube.cubeplanner.algorithm-threshold-genetic=23 +# +#### QUERY ### +# +## Controls the maximum number of bytes a query is allowed to scan storage. +## The default value 0 means no limit. +## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per coprocessor. +#kylin.query.max-scan-bytes=0 +# +#kylin.query.cache-enabled=true +#kylin.query.cache-threshold-scan-count=10240 +#kylin.query.cache-threshold-duration=2000 +#kylin.query.cache-threshold-scan-bytes=1048576 +#kylin.query.large-query-threshold=1000000 +# +## Controls extras properties for Calcite jdbc driver +## all extras properties should undder prefix "kylin.query.calcite.extras-props." +## case sensitive, default: true, to enable case insensitive set it to false +## @see org.apache.calcite.config.CalciteConnectionProperty.CASE_SENSITIVE +#kylin.query.calcite.extras-props.caseSensitive=true +## how to handle unquoted identity, defualt: TO_UPPER, available options: UNCHANGED, TO_UPPER, TO_LOWER +## @see org.apache.calcite.config.CalciteConnectionProperty.UNQUOTED_CASING +#kylin.query.calcite.extras-props.unquotedCasing=TO_UPPER +## quoting method, default: DOUBLE_QUOTE, available options: DOUBLE_QUOTE, BACK_TICK, BRACKET +## @see org.apache.calcite.config.CalciteConnectionProperty.QUOTING +#kylin.query.calcite.extras-props.quoting=DOUBLE_QUOTE +## change SqlConformance from DEFAULT to LENIENT to enable group by ordinal +## @see org.apache.calcite.sql.validate.SqlConformance.SqlConformanceEnum +#kylin.query.calcite.extras-props.conformance=LENIENT +# +## TABLE ACL +#kylin.query.security.table-acl-enabled=true +# +## Usually should not modify this +#kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor +# +#kylin.query.escape-default-keyword=false +# +## Usually should not modify this +#kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack +# +#### SECURITY ### +# +## Spring security profile, options: testing, ldap, saml +## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN to login +#kylin.security.profile=testing +# +## Admin roles in LDAP, for ldap and saml +#kylin.security.acl.admin-role=admin +# +## LDAP authentication configuration +#kylin.security.ldap.connection-server=ldap://ldap_server:389 +#kylin.security.ldap.connection-username= +#kylin.security.ldap.connection-password= +## When you use the customized CA certificate library for user authentication based on LDAPs, you need to configure this item. +## The value of this item will be added to the JVM parameter javax.net.ssl.trustStore. +#kylin.security.ldap.connection-truststore= +# +## LDAP user account directory; +#kylin.security.ldap.user-search-base= +#kylin.security.ldap.user-search-pattern= +#kylin.security.ldap.user-group-search-base= +#kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1})) +# +## LDAP service account directory +#kylin.security.ldap.service-search-base= +#kylin.security.ldap.service-search-pattern= +#kylin.security.ldap.service-group-search-base= +# +### SAML configurations for SSO +## SAML IDP metadata file location +#kylin.security.saml.metadata-file=classpath:sso_metadata.xml +#kylin.security.saml.metadata-entity-base-url=https://hostname/kylin +#kylin.security.saml.keystore-file=classpath:samlKeystore.jks +#kylin.security.saml.context-scheme=https +#kylin.security.saml.context-server-name=hostname +#kylin.security.saml.context-server-port=443 +#kylin.security.saml.context-path=/kylin +# +#### SPARK BUILD ENGINE CONFIGS ### +# +## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit +## This must contain site xmls of core, yarn, hive, and hbase in one folder +##kylin.env.hadoop-conf-dir=/etc/hadoop/conf +# +## Spark conf (default is in spark/conf/spark-defaults.conf) +#kylin.engine.spark-conf.spark.master=yarn +##kylin.engine.spark-conf.spark.submit.deployMode=client +#kylin.engine.spark-conf.spark.yarn.queue=default +#kylin.engine.spark-conf.spark.executor.cores=1 +#kylin.engine.spark-conf.spark.executor.memory=4G +#kylin.engine.spark-conf.spark.executor.instances=1 +#kylin.engine.spark-conf.spark.executor.memoryOverhead=1024M +#kylin.engine.spark-conf.spark.driver.cores=1 +#kylin.engine.spark-conf.spark.driver.memory=1G +#kylin.engine.spark-conf.spark.shuffle.service.enabled=true +#kylin.engine.spark-conf.spark.eventLog.enabled=true +#kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history +#kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history +#kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false +##kylin.engine.spark-conf.spark.sql.shuffle.partitions=1 +# +## manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime +##kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* +##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec +# +## uncomment for HDP +##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current +##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current +##kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current +# +#### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ### +## Enlarge cores and memory to improve query performance in production env, please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X +# +#kylin.query.spark-conf.spark.master=yarn +##kylin.query.spark-conf.spark.submit.deployMode=client +#kylin.query.spark-conf.spark.driver.cores=1 +#kylin.query.spark-conf.spark.driver.memory=4G +#kylin.query.spark-conf.spark.driver.memoryOverhead=1G +#kylin.query.spark-conf.spark.executor.cores=1 +#kylin.query.spark-conf.spark.executor.instances=1 +#kylin.query.spark-conf.spark.executor.memory=4G +#kylin.query.spark-conf.spark.executor.memoryOverhead=1G +#kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer +##kylin.query.spark-conf.spark.sql.shuffle.partitions=40 +##kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* +# +## uncomment for HDP +##kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current +##kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current +##kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current +# +#### QUERY PUSH DOWN ### +# +##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl +##kylin.query.pushdown.update-enabled=false + +kylin.env=QA +kylin.server.mode=all +kylin.server.host-address=127.0.0.1:7777 +# Display timezone on UI,format like[GMT+N or GMT-N] +kylin.web.timezone=GMT+8 + +kylin.source.hive.client=cli +kylin.source.hive.database-for-flat-table=kylin4 + +kylin.engine.spark-conf.spark.eventLog.enabled=true +kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history +kylin.engine.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history +kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false + +kylin.engine.spark-conf.spark.yarn.submit.file.replication=1 +kylin.engine.spark-conf.spark.master=yarn +kylin.engine.spark-conf.spark.driver.memory=512M +kylin.engine.spark-conf.spark.driver.memoryOverhead=512M +kylin.engine.spark-conf.spark.executor.memory=1G +kylin.engine.spark-conf.spark.executor.instances=1 +kylin.engine.spark-conf.spark.executor.memoryOverhead=512M +kylin.engine.spark-conf.spark.executor.cores=1 +kylin.engine.spark-conf.spark.sql.shuffle.partitions=1 +kylin.engine.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* + +kylin.storage.columnar.shard-rowcount=2500000 +kylin.storage.columnar.shard-countdistinct-rowcount=1000000 +kylin.storage.columnar.repartition-threshold-size-mb=128 +kylin.storage.columnar.shard-size-mb=128 + +kylin.query.auto-sparder-context=true +kylin.query.spark-conf.spark.master=yarn +kylin.query.spark-conf.spark.driver.memory=512M +kylin.query.spark-conf.spark.driver.memoryOverhead=512M +kylin.query.spark-conf.spark.executor.memory=1G +kylin.query.spark-conf.spark.executor.instances=1 +kylin.query.spark-conf.spark.executor.memoryOverhead=512M +kylin.query.spark-conf.spark.executor.cores=1 +kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer +kylin.query.spark-conf.spark.sql.shuffle.partitions=1 +kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* +kylin.query.spark-conf.spark.eventLog.enabled=true +kylin.query.spark-conf.spark.history.fs.logDirectory=hdfs://localhost:9000/kylin4/spark-history +kylin.query.spark-conf.spark.eventLog.dir=hdfs://localhost:9000/kylin4/spark-history + +# for local cache +kylin.query.cache-enabled=false + +# for pushdown query +kylin.query.pushdown.update-enabled=false +kylin.query.pushdown.enabled=true +kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl + +# for Cube Planner +kylin.cube.cubeplanner.enabled=false +kylin.server.query-metrics2-enabled=false +kylin.metrics.reporter-query-enabled=false +kylin.metrics.reporter-job-enabled=false +kylin.metrics.monitor-enabled=false +kylin.web.dashboard-enabled=false +kylin.web.set-config-enable=true + +# metadata for mysql +kylin.metadata.url=kylin4@jdbc,url=jdbc:mysql://localhost:3306/kylin4,username=root,password=123456,maxActive=10,maxIdle=10 +kylin.env.hdfs-working-dir=/kylin4_metadata +kylin.env.zookeeper-base-path=/kylin4 +kylin.env.zookeeper-connect-string=127.0.0.1 diff --git a/docker/conf/spark/spark-defaults.conf b/docker/conf/spark/spark-defaults.conf new file mode 100644 index 0000000..dac2e3c --- /dev/null +++ b/docker/conf/spark/spark-defaults.conf @@ -0,0 +1,55 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default system properties included when running spark-submit. +# This is useful for setting default environmental settings. + +# Example: +# spark.master spark://master:7077 +# spark.eventLog.enabled true +# spark.eventLog.dir hdfs://namenode:8021/directory +# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.driver.memory 5g +# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" + +spark.sql.catalogImplementation hive +spark.driver.maxResultSize 1g +spark.sql.hive.thriftServer.singleSession false + +spark.serializer org.apache.spark.serializer.JavaSerializer + +spark.memory.useLegacyMode false +spark.memory.fraction 0.3 +spark.memory.storageFraction 0.3 + +spark.rdd.compress true +spark.io.compression.codec snappy + +spark.locality.wait 100ms +spark.speculation false + +spark.task.maxFailures 4 + +spark.scheduler.minRegisteredResourcesRatio 1.0 +spark.scheduler.maxRegisteredResourcesWaitingTime 60s + +spark.yarn.jars hdfs://localhost:9000/spark2_jars/* + + + + + diff --git a/docker/conf/spark/spark-env.sh b/docker/conf/spark/spark-env.sh new file mode 100755 index 0000000..3c5837d --- /dev/null +++ b/docker/conf/spark/spark-env.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is sourced when running various Spark programs. +# Copy it as spark-env.sh and edit that to configure Spark for your site. + +# Options read when launching programs locally with +# ./bin/run-example or ./bin/spark-submit +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program + +# Options read by executors and drivers running inside the cluster +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program +# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data +# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos + +# Options read in YARN client/cluster mode +# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf) +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN +# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). +# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) +# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) + +# Options for the daemons used in the standalone deploy mode +# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname +# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master +# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") +# - SPARK_WORKER_CORES, to set the number of cores to use on this machine +# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) +# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker +# - SPARK_WORKER_DIR, to set the working directory of worker processes +# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") +# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). +# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") +# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") +# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") +# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons +# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers + +# Generic options for the daemons used in the standalone deploy mode +# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) +# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) +# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) +# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) +# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) +# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. +# Options for native BLAS, like Intel MKL, OpenBLAS, and so on. +# You might get better performance to enable these options if using native BLAS (see SPARK-21305). +# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL +# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS + +export JAVA_HOME=/home/admin/jdk1.8.0_141 +export CLASSPATH=.:$JAVA_HOME/lib +export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/admin/hadoop-2.7.0/lib/native + +export SPARK_PID_DIR=${SPARK_HOME}/ + diff --git a/docker/conf/zk/zoo.cfg b/docker/conf/zk/zoo.cfg new file mode 100644 index 0000000..1a576de --- /dev/null +++ b/docker/conf/zk/zoo.cfg @@ -0,0 +1,45 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# The number of milliseconds of each tick +tickTime=2000 +# The number of ticks that the initial +# synchronization phase can take +initLimit=10 +# The number of ticks that can pass between +# sending a request and getting an acknowledgement +syncLimit=5 +# the directory where the snapshot is stored. +# do not use /tmp for storage, /tmp here is just +# example sakes. +dataDir=/data/zookeeper +# the port at which the clients will connect +clientPort=2181 +# the maximum number of client connections. +# increase this if you need to handle more clients +#maxClientCnxns=60 +# +# Be sure to read the maintenance section of the +# administrator guide before turning on autopurge. +# +# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance +# +# The number of snapshots to retain in dataDir +#autopurge.snapRetainCount=3 +# Purge task interval in hours +# Set to "0" to disable auto purge feature +#autopurge.purgeInterval=1 diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index edeefec..31664f7 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -16,19 +16,26 @@ # See the License for the specific language governing permissions and # limitations under the License. # -echo "127.0.0.1 sandbox.hortonworks.com" >> /etc/hosts +echo "127.0.0.1 sandbox sandbox.hortonworks.com" >> /etc/hosts + +# clean pid files +rm -f /tmp/*.pid # start mysql -service mysqld start -mysqladmin -uroot password 123456 -mysql -uroot -p123456 -e "grant all privileges on root.* to root@'%' identified by '123456';" +if [ ! -f "/home/admin/first_run" ] +then + service mysqld start + mysqladmin -uroot password 123456 + mysql -uroot -p123456 -e "CREATE DATABASE IF NOT EXISTS kylin4 default charset utf8 COLLATE utf8_general_ci;" + mysql -uroot -p123456 -e "grant all privileges on root.* to root@'%' identified by '123456';" +fi +service mysqld restart # start hdfs if [ ! -f "/home/admin/first_run" ] then hdfs namenode -format fi -touch /home/admin/first_run $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode $HADOOP_HOME/sbin/hadoop-daemon.sh start datanode @@ -39,33 +46,42 @@ $HADOOP_HOME/sbin/yarn-daemon.sh start nodemanager # start mr jobhistory $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver -# start hbase +# start zk rm -rf /data/zookeeper/* -$HBASE_HOME/bin/start-hbase.sh +rm -f /data/zookeeper/zookeeper_server.pid +$ZK_HOME/bin/zkServer.sh start # start kafka -rm -rf /tmp/kafka-logs -nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties & +# rm -rf /tmp/kafka-logs +# nohup $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties & + +sleep 10s +mkdir -p ${KYLIN_HOME}/logs +# check hive usability first, this operation will insert one version record into VERSION table +$KYLIN_HOME/bin/check-hive-usability.sh > ${KYLIN_HOME}/logs/kylin-verbose.log 2>&1 -# start livy -hdfs dfs -mkdir -p /kylin/livy -hdfs dfs -put -f $HBASE_HOME/lib/hbase-client-$HBASE_VERSION.jar hdfs://localhost:9000/kylin/livy/ -hdfs dfs -put -f $HBASE_HOME/lib/hbase-common-$HBASE_VERSION.jar hdfs://localhost:9000/kylin/livy/ -hdfs dfs -put -f $HBASE_HOME/lib/hbase-hadoop-compat-$HBASE_VERSION.jar hdfs://localhost:9000/kylin/livy/ -hdfs dfs -put -f $HBASE_HOME/lib/hbase-hadoop2-compat-$HBASE_VERSION.jar hdfs://localhost:9000/kylin/livy/ -hdfs dfs -put -f $HBASE_HOME/lib/hbase-server-$HBASE_VERSION.jar hdfs://localhost:9000/kylin/livy/ -hdfs dfs -put -f $HBASE_HOME/lib/htrace-core-*-incubating.jar hdfs://localhost:9000/kylin/livy/ -hdfs dfs -put -f $HBASE_HOME/lib/metrics-core-*.jar hdfs://localhost:9000/kylin/livy/ -hdfs dfs -put -f $KYLIN_HOME/lib/kylin-job-$KYLIN_VERSION.jar hdfs://localhost:9000/kylin/livy/ -$LIVY_HOME/bin/livy-server start +if [ ! -f "/home/admin/first_run" ] +then + hdfs dfs -mkdir -p /kylin4/spark-history + hdfs dfs -mkdir -p /spark2_jars + hdfs dfs -put -f $SPARK_HOME/jars/* hdfs://localhost:9000/spark2_jars/ +fi # prepare kafka topic and data -$KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic kylin_streaming_topic -nohup $KYLIN_HOME/bin/kylin.sh org.apache.kylin.source.kafka.util.KafkaSampleProducer --topic kylin_streaming_topic --broker localhost:9092 < /dev/null 2>&1 > /tmp/kafka-sample.log & -# create sample cube -sh $KYLIN_HOME/bin/sample.sh +# if [ ! -f "/home/admin/first_run" ] +# then +# $KAFKA_HOME/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 3 --topic kylin_streaming_topic +# fi + +# create sample data at the first time +if [ ! -f "/home/admin/first_run" ] +then + sh $KYLIN_HOME/bin/sample.sh >> ${KYLIN_HOME}/logs/kylin-verbose.log 2>&1 +fi +touch /home/admin/first_run +sleep 10s # start kylin -$KYLIN_HOME/bin/kylin.sh start +$KYLIN_HOME/bin/kylin.sh -v start >> ${KYLIN_HOME}/logs/kylin-verbose.log 2>&1 while : do diff --git a/docker/run_container.sh b/docker/run_container.sh index 8f03d75..3ed32ce 100755 --- a/docker/run_container.sh +++ b/docker/run_container.sh @@ -22,6 +22,5 @@ docker run -d \ -p 50070:50070 \ -p 8032:8032 \ -p 8042:8042 \ --p 16010:16010 \ --p 8998:8998 \ -apache-kylin-standalone \ No newline at end of file +-p 2181:2181 \ +apachekylin/apache-kylin-standalone:4.0.0-alpha