This is an automated email from the ASF dual-hosted git repository.
xxyu pushed a commit to branch kylin4_on_cloud
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin4_on_cloud by this push:
new c25d881 Support mdx (#1827)
c25d881 is described below
commit c25d8819bee6e2e3c78c3238185aa01539afaeb5
Author: Tengting Xu <[email protected]>
AuthorDate: Wed Mar 9 14:13:39 2022 +0800
Support mdx (#1827)
* # minor fix about destroy
* # minor update for versions of software
* # support mdx & pre-support glue
---
.gitignore | 2 +-
.../properties/templates/kylin.properties.template | 2 +
backup/scripts/prepare-ec2-env-for-kylin4.sh | 233 ++++++++++++++++++---
backup/scripts/prepare-ec2-env-for-spark-master.sh | 68 +++++-
backup/scripts/prepare-ec2-env-for-spark-slave.sh | 61 ++++--
.../scripts/prepare-ec2-env-for-static-services.sh | 26 ++-
backup/scripts/prepare-ec2-env-for-zk.sh | 62 +++---
.../ec2-cluster-kylin4-template.yaml | 29 ++-
cloudformation_templates/ec2-cluster-kylin4.yaml | 30 ++-
.../ec2-cluster-spark-master.yaml | 23 +-
.../ec2-cluster-spark-slave-template.yaml | 21 +-
.../ec2-cluster-spark-slave.yaml | 34 ++-
.../ec2-cluster-static-services.yaml | 10 +-
cloudformation_templates/ec2-cluster-zk.yaml | 18 +-
cloudformation_templates/ec2-or-emr-vpc.yaml | 6 +
clouds/aws.py | 2 +-
constant/yaml_files.py | 2 +
constant/yaml_params.py | 2 +-
engine_utils.py | 49 +++--
instances/aws_instance.py | 37 +++-
kylin_configs.yaml | 56 ++++-
readme/commands.md | 2 +-
readme/quick_start.md | 2 +-
utils.py | 4 +
24 files changed, 643 insertions(+), 138 deletions(-)
diff --git a/.gitignore b/.gitignore
index dbc5998..a087ce7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,6 @@ __pycache__/
*/.DS_Store
/venv/
.idea
-logs/*.log
+logs/
backup/jars
backup/tars
\ No newline at end of file
diff --git a/backup/properties/templates/kylin.properties.template
b/backup/properties/templates/kylin.properties.template
index 41f4a96..0bd043c 100644
--- a/backup/properties/templates/kylin.properties.template
+++ b/backup/properties/templates/kylin.properties.template
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+
+
# Kylin server mode, valid value [all, query, job]
kylin.server.mode=all
kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://{{ DB_HOST }}:{{
DB_PORT }}/kylin,username=root,password={{ DB_PASSWORD
}},maxActive=10,maxIdle=10
diff --git a/backup/scripts/prepare-ec2-env-for-kylin4.sh
b/backup/scripts/prepare-ec2-env-for-kylin4.sh
index 54a3d6d..cdad91d 100644
--- a/backup/scripts/prepare-ec2-env-for-kylin4.sh
+++ b/backup/scripts/prepare-ec2-env-for-kylin4.sh
@@ -73,14 +73,14 @@ function help() {
--db-user db-user-for-kylin
--kylin-mode mode-for-kylin[all|query|job]
--local-soft whether-to-use-local-cache+soft-affinity
- --cluster-num specify-a-cluster"
+ --cluster-num specify-a-cluster
+ --hadoop-version hadoop-version-for-cluster
+ --spark-version spark-version-for-cluster
+ --kylin-version kylin-version-for-cluster
+ --hive-version hive-version-for-cluster"
exit 0
}
-if [[ $# -ne 18 ]]; then
- help
-fi
-
while [[ $# != 0 ]]; do
if [[ $1 == "--bucket-url" ]]; then
# url same as: /xxx/kylin
@@ -93,15 +93,27 @@ while [[ $# != 0 ]]; do
DATABASE_PASSWORD=$2
elif [[ $1 == "--db-user" ]]; then
DATABASE_USER=$2
- elif [[ $1 == '--db-port' ]]; then
+ elif [[ $1 == "--db-port" ]]; then
DATABASE_PORT=$2
elif [[ $1 == "--local-soft" ]]; then
LOCAL_CACHE_SOFT_AFFINITY=$2
- elif [[ $1 == '--cluster-num' ]]; then
- # default value is 'default', and cluster num is from 1 to positive
infinity.
+ elif [[ $1 == "--cluster-num" ]]; then
+ # default value is "default", and cluster num is from 1 to positive
infinity.
CLUSTER_NUM=$2
- elif [[ $1 == '--is-scaled' ]]; then
+ elif [[ $1 == "--is-scaled" ]]; then
IS_SCALED=$2
+ elif [[ $1 == "--hadoop-version" ]]; then
+ HADOOP_VERSION=$2
+ elif [[ $1 == "--spark-version" ]]; then
+ SPARK_VERSION=$2
+ elif [[ $1 == "--kylin-version" ]]; then
+ KYLIN_VERSION=$2
+ elif [[ $1 == "--hive-version" ]]; then
+ HIVE_VERSION=$2
+ elif [[ $1 == "--mdx-version" ]]; then
+ MDX_VERSION=$2
+ elif [[ $1 == "--support-glue" ]]; then
+ SUPPORT_GLUE=$2
else
help
fi
@@ -113,10 +125,33 @@ done
# Prepare Steps
### Parameters for Spark and Kylin
#### ${SPARK_VERSION:0:1} get 2 from 2.4.7
-HADOOP_VERSION=3.2.0
-SPARK_VERSION=3.1.1
-KYLIN_VERSION=4.0.0
-HIVE_VERSION=2.3.9
+if [[ -z "$HADOOP_VERSION" ]]; then
+ HADOOP_VERSION=3.2.0
+fi
+
+if [[ -z "$SPARK_VERSION" ]]; then
+ SPARK_VERSION=3.1.1
+fi
+
+if [[ -z "$KYLIN_VERSION" ]]; then
+ KYLIN_VERSION=4.0.0
+fi
+
+if [[ -z "$HIVE_VERSION" ]]; then
+ HIVE_VERSION=2.3.9
+fi
+
+if [[ -z "$MDX_VERSION" ]]; then
+ MDX_VERSION=4.0.2-beta
+fi
+
+if [[ -z "$SUPPORT_GLUE" ]]; then
+ SUPPORT_GLUE=false
+fi
+
+if [[ -z "$MDX_DATABASE" ]]; then
+ MDX_DATABASE=kylin_mdx
+fi
LOCAL_CACHE_DIR=/home/ec2-user/ssd
@@ -141,10 +176,17 @@ else
fi
fi
-SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz
+
+if [[ $SUPPORT_GLUE == "true" ]]; then
+ SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}-aws.tgz
+else
+ SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz
+fi
+
HADOOP_PACKAGE=hadoop-${HADOOP_VERSION}.tar.gz
HIVE_PACKAGE=apache-hive-${HIVE_VERSION}-bin.tar.gz
NODE_EXPORTER_PACKAGE=node_exporter-1.3.1.linux-amd64.tar.gz
+MDX_PACKAGE=mdx-kylin-${MDX_VERSION}.tar.gz
### Parameter for JDK 1.8
JDK_PACKAGE=jdk-8u301-linux-x64.tar.gz
@@ -163,7 +205,8 @@ function init_env() {
HADOOP_HOME=${HADOOP_DIR}/hadoop-${HADOOP_VERSION}
HIVE_HOME=${HADOOP_DIR}/hive
- KYLIN_HOME=${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE}
+ KYLIN_HOME=${HOME_DIR}/kylin
+ MDX_HOME=${HOME_DIR}/mdx
SPARK_HOME=${HADOOP_DIR}/spark
OUT_LOG=${HOME_DIR}/shell.stdout
@@ -193,6 +236,7 @@ export
PATH=$HIVE_HOME/bin:$HIVE_HOME/conf:${HADOOP_HOME}/bin:${JAVA_HOME}/bin:$
export HOME_DIR=${HOME_DIR}
export KYLIN_HOME=${KYLIN_HOME}
export SPARK_HOME=${SPARK_HOME}
+export MDX_HOME=${MDX_HOME}
export OUT_LOG=${OUT_LOG}
EOF
}
@@ -262,6 +306,10 @@ function prepare_hadoop() {
else
logging info "Downloading Hadoop package ${HADOOP_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${HADOOP_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${HADOOP_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/${HADOOP_PACKAGE}
fi
@@ -332,6 +380,10 @@ function prepare_hive() {
else
logging info "Downloading ${HIVE_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${HIVE_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${HIVE_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
https://downloads.apache.org/hive/hive-${HIVE_VERSION}/${HIVE_PACKAGE}
fi
@@ -373,7 +425,19 @@ function init_hive() {
return
fi
- cat <<EOF >${HIVE_HOME}/conf/hive-site.xml
+ if [[ $SUPPORT_GLUE == "true" ]]; then
+ cat <<EOF >${HIVE_HOME}/conf/hive-site.xml
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+ <property>
+ <name>hive.metastore.client.factory.class</name>
+
<value>com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory</value>
+ </property>
+</configuration>
+EOF
+ else
+ cat <<EOF >${HIVE_HOME}/conf/hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
@@ -410,6 +474,7 @@ function init_hive() {
</property>
</configuration>
EOF
+ fi
# resolve jars conflict
if [[ ! -d $HIVE_HOME/spark_jar ]]; then
@@ -435,14 +500,18 @@ function prepare_spark() {
return
fi
- logging info "Downloading Spark-${SPARK_VERSION} ..."
+ logging info "Downloading ${SPARK_PACKAGE} ..."
## download spark
if [[ -f ${HOME_DIR}/${SPARK_PACKAGE} ]]; then
logging warn "${SPARK_PACKAGE} already download, skip download it."
else
logging warn "Downloading ${SPARK_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${SPARK_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
- # # wget cost lot time
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${SPARK_PACKAGE} failed, please check."
+ exit 1
+ fi
+ # # wget will cost lot time
# wget
http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE}
fi
@@ -528,16 +597,22 @@ function prepare_kylin() {
else
logging info "Kylin-${KYLIN_VERSION} downloading ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${KYLIN_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${KYLIN_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
https://archive.apache.org/dist/kylin/apache-kylin-${KYLIN_VERSION}/${KYLIN_PACKAGE}
fi
- if [[ -d ${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} ]]; then
+ if [[ -d ${KYLIN_HOME} ]]; then
logging warn "Kylin package already decompress, skip decompress ..."
else
logging warn "Kylin package decompressing ..."
### unzip kylin tar file
tar -zxf ${KYLIN_PACKAGE}
+ ### make kylin home directory
+ sudo mv ${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} ${KYLIN_HOME}
fi
logging info "Kylin inited ..."
@@ -626,6 +701,91 @@ EOF
logging info "Kylin is ready ..."
}
+function prepare_mdx() {
+ logging info "Preparing MDX ..."
+
+ if [[ -f ${HOME_DIR}/.prepared_mdx ]]; then
+ logging warn "MDX already prepared ..."
+ return
+ fi
+
+ if [[ -f ${HOME_DIR}/${MDX_PACKAGE} ]]; then
+ logging warn "MDX package already downloaded, skip download it ..."
+ else
+ logging info "mdx-kylin-${MDX_VERSION} downloading ..."
+ aws s3 cp ${PATH_TO_BUCKET}/tar/${MDX_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${MDX_PACKAGE} failed, please check."
+ exit 1
+ fi
+ fi
+
+ if [[ -d ${MDX_HOME} ]]; then
+ logging warn "MDX package already decompress, skip decompress ..."
+ else
+ logging warn "MDX package decompressing ..."
+ ### unzip kylin tar file
+ tar -zxf ${MDX_PACKAGE}
+ ### make mdx home directory
+ sudo mv ${HOME_DIR}/${MDX_PACKAGE%*.tar.gz} ${MDX_HOME}
+ fi
+
+ logging info "MDX inited ..."
+ touch ${HOME_DIR}/.prepared_mdx
+ logging info "MDX prepared ..."
+}
+
+function init_mdx() {
+ if [[ -f ${HOME_DIR}/.inited_mdx ]]; then
+ logging warn "MDX already inited ..."
+ return
+ fi
+
+ if [[ ! -f $MDX_HOME/semantic-mdx/lib/mysql-connector-java-8.0.24.jar ]];
then
+ aws s3 cp ${PATH_TO_BUCKET}/jars/mysql-connector-java-8.0.24.jar
$MDX_HOME/semantic-mdx/lib/ --region ${CURRENT_REGION}
+ fi
+
+ if [[ ! -f $MDX_HOME/semantic-mdx/lib/kylin-jdbc-4.0.0-SNAPSHOT.jar ]]; then
+ logging info "Copy jdbc driver from $KYLIN_HOME to
$MDX_HOME/semantic-mdx/lib/ ..."
+ cp -f $KYLIN_HOME/lib/kylin-jdbc-*.jar $MDX_HOME/semantic-mdx/lib/
+ fi
+
+ # Encrypt db password for mdx
+ marker="The encryption string: "
+ ENCRPTED_PASSWORD=$(${MDX_HOME}/bin/mdx.sh encrypt ${DATABASE_PASSWORD} |
tail -n 1 | cut -d: -f2)
+ logging info "Encrypted Password is: ${ENCRPTED_PASSWORD}, and Original
Password is: ${DATABASE_PASSWORD}."
+
+ logging info "Install mysql client ..."
+ ## install mysql client
+ sudo yum install -y
https://dev.mysql.com/get/mysql57-community-release-el7-11.noarch.rpm
+ sudo rpm --import https://repo.mysql.com/RPM-GPG-KEY-mysql-2022
+ sudo yum install -y mysql-community-client
+
+ logging info "Create Database ${MDX_DATABASE} ..."
+ sudo mysql -h${DATABASE_HOST} -u${DATABASE_USER} -p${DATABASE_PASSWORD} -e
"create database if not exists ${MDX_DATABASE};"
+
+ # Overwrite insight.properties
+ cat <<EOF >>${MDX_HOME}/conf/insight.properties
+insight.kylin.host=$(hostname -I)
+insight.kylin.port=7070
+insight.database.type=mysql
+insight.database.username=${DATABASE_USER}
+insight.database.ip=${DATABASE_HOST}
+insight.database.name=${MDX_DATABASE}
+insight.database.port=${DATABASE_PORT}
+insight.database.password=${ENCRPTED_PASSWORD//[[:blank:]]/}
+insight.mdx.cluster.nodes=127.0.0.1:7080
+insight.semantic.datasource-version=2
+insight.semantic.port=7080
+insight.mdx.jvm.xms=-Xms3g
+insight.mdx.jvm.xmx=-Xmx3g
+EOF
+
+ logging info "MDX inited ..."
+ touch ${HOME_DIR}/.inited_mdx
+ logging info "MDX is ready ..."
+}
+
function after_start_kylin() {
KYLIN_WEB_LIB_PATH=$KYLIN_HOME/tomcat/webapps/kylin/WEB-INF/lib
if [[ ! -f $KYLIN_WEB_LIB_PATH/commons-collections-3.2.2.jar ]]; then
@@ -651,13 +811,17 @@ function start_kylin() {
}
function sample_for_kylin() {
- if [[ ${IS_SCALED} == 'false' ]]; then
- ${KYLIN_HOME}/bin/sample.sh
- if [[ $? -ne 0 ]]; then
- logging error "Sample for kylin is failed, please check ..."
- else
- logging info "Sample for kylin is successful, enjoy it ..."
- fi
+ if [[ $SUPPORT_GLUE == "true" ]]; then
+ return
+ fi
+
+ if [[ ${IS_SCALED} == "false" ]]; then
+ ${KYLIN_HOME}/bin/sample.sh
+ if [[ $? -ne 0 ]]; then
+ logging error "Sample for kylin is failed, please check ..."
+ else
+ logging info "Sample for kylin is successful, enjoy it ..."
+ fi
else
logging info "It is unnecessary to sample data in scaled mode. "
fi
@@ -667,6 +831,10 @@ function restart_kylin() {
${KYLIN_HOME}/bin/kylin.sh restart
}
+function start_mdx() {
+ ${MDX_HOME}/bin/mdx.sh start
+}
+
function prepare_node_exporter() {
logging info "Preparing node_exporter ..."
if [[ -f ${HOME_DIR}/.prepared_node_exporter ]]; then
@@ -735,16 +903,23 @@ function prepare_packages() {
prepare_kylin
init_kylin
+ prepare_mdx
+ init_mdx
+
touch ${HOME_DIR}/.prepared_packages
logging info "All need packages are ready ..."
}
function start_services_on_kylin() {
# special step for compatible jars, details in after_start_kylin
- sample_for_kylin
- start_kylin
- after_start_kylin
+ if [[ ! -f ${HOME_DIR}/.first_run ]]; then
+ sample_for_kylin
+ start_kylin
+ after_start_kylin
+ touch ${HOME_DIR}/.first_run
+ fi
restart_kylin
+ start_mdx
}
function main() {
diff --git a/backup/scripts/prepare-ec2-env-for-spark-master.sh
b/backup/scripts/prepare-ec2-env-for-spark-master.sh
index 496dab9..5a17d32 100644
--- a/backup/scripts/prepare-ec2-env-for-spark-master.sh
+++ b/backup/scripts/prepare-ec2-env-for-spark-master.sh
@@ -71,14 +71,13 @@ function help() {
--db-password db-password-for-hive-metadata
--db-user db-user-for-hive-metadata
--db-port db-port-for-hive-metadata
- --local-soft whether-to-use-local-cache+soft-affinity"
+ --local-soft whether-to-use-local-cache+soft-affinity
+ --hadoop-version hadoop-version-for-cluster
+ --spark-version spark-version-for-cluster
+ --hive-version hive-version-for-cluster"
exit 0
}
-if [[ $# -ne 14 ]]; then
- help
-fi
-
while [[ $# != 0 ]]; do
if [[ $1 == "--bucket-url" ]]; then
# url same as: /xxx/kylin
@@ -95,6 +94,14 @@ while [[ $# != 0 ]]; do
DATABASE_PORT=$2
elif [[ $1 == "--local-soft" ]]; then
LOCAL_CACHE_SOFT_AFFINITY=$2
+ elif [[ $1 == "--hadoop-version" ]]; then
+ HADOOP_VERSION=$2
+ elif [[ $1 == "--spark-version" ]]; then
+ SPARK_VERSION=$2
+ elif [[ $1 == "--hive-version" ]]; then
+ HIVE_VERSION=$2
+ elif [[ $1 == "--support-glue" ]]; then
+ SUPPORT_GLUE=$2
else
help
fi
@@ -106,9 +113,21 @@ done
# Prepare Steps
### Parameters for Spark and Kylin
#### ${SPARK_VERSION:0:1} get 2 from 2.4.7
-HADOOP_VERSION=3.2.0
-SPARK_VERSION=3.1.1
-HIVE_VERSION=2.3.9
+if [[ -z "$HADOOP_VERSION" ]]; then
+ HADOOP_VERSION=3.2.0
+fi
+
+if [[ -z "$SPARK_VERSION" ]]; then
+ SPARK_VERSION=3.1.1
+fi
+
+if [[ -z "$HIVE_VERSION" ]]; then
+ HIVE_VERSION=2.3.9
+fi
+
+if [[ -z "$SUPPORT_GLUE" ]]; then
+ SUPPORT_GLUE=false
+fi
LOCAL_CACHE_DIR=/home/ec2-user/ssd
@@ -124,9 +143,13 @@ if [[ $LOCAL_CACHE_SOFT_AFFINITY == "true" ]]; then
sudo mkdir -p ${LOCAL_CACHE_DIR}/alluxio-cache-driver
sudo chmod -R 777 ${LOCAL_CACHE_DIR}/alluxio-cache-driver
fi
+fi
+if [[ $SUPPORT_GLUE == "true" ]]; then
+ SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}-aws.tgz
+else
+ SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz
fi
-SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz
HADOOP_PACKAGE=hadoop-${HADOOP_VERSION}.tar.gz
HIVE_PACKAGE=apache-hive-${HIVE_VERSION}-bin.tar.gz
NODE_EXPORTER_PACKAGE=node_exporter-1.3.1.linux-amd64.tar.gz
@@ -245,6 +268,10 @@ function prepare_hadoop() {
else
logging info "Downloading Hadoop package ${HADOOP_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${HADOOP_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${HADOOP_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/${HADOOP_PACKAGE}
fi
@@ -315,6 +342,10 @@ function prepare_hive() {
else
logging info "Downloading ${HIVE_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${HIVE_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${HIVE_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
https://downloads.apache.org/hive/hive-${HIVE_VERSION}/${HIVE_PACKAGE}
fi
@@ -356,7 +387,19 @@ function init_hive() {
return
fi
- cat <<EOF >${HIVE_HOME}/conf/hive-site.xml
+ if [[ $SUPPORT_GLUE == "true" ]]; then
+ cat <<EOF >${HIVE_HOME}/conf/hive-site.xml
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+ <property>
+ <name>hive.metastore.client.factory.class</name>
+
<value>com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory</value>
+ </property>
+</configuration>
+EOF
+ else
+ cat <<EOF >${HIVE_HOME}/conf/hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
@@ -393,6 +436,7 @@ function init_hive() {
</property>
</configuration>
EOF
+ fi
# resolve jars conflict
if [[ ! -d $HIVE_HOME/spark_jar ]]; then
@@ -425,6 +469,10 @@ function prepare_spark() {
else
logging warn "Downloading ${SPARK_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${SPARK_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${SPARK_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE}
fi
diff --git a/backup/scripts/prepare-ec2-env-for-spark-slave.sh
b/backup/scripts/prepare-ec2-env-for-spark-slave.sh
index 7d871a5..84645f6 100644
--- a/backup/scripts/prepare-ec2-env-for-spark-slave.sh
+++ b/backup/scripts/prepare-ec2-env-for-spark-slave.sh
@@ -71,14 +71,13 @@ function help() {
--region region-for-s3
--waiting-time time-for-start-services
--mode cluster-mode-is-product-or-test
- --local-soft whether-to-use-local-cache+soft-affinity"
+ --local-soft whether-to-use-local-cache+soft-affinity
+ --hadoop-version hadoop-version-for-cluster
+ --spark-version spark-version-for-cluster
+ --kylin-version kylin-version-for-cluster"
exit 0
}
-if [[ $# -ne 14 ]]; then
- help
-fi
-
while [[ $# != 0 ]]; do
if [[ $1 == "--bucket-url" ]]; then
BUCKET_SUFFIX=$2
@@ -94,6 +93,14 @@ while [[ $# != 0 ]]; do
WORKER_MODE=$2
elif [[ $1 == "--local-soft" ]]; then
LOCAL_CACHE_SOFT_AFFINITY=$2
+ elif [[ $1 == "--hadoop-version" ]]; then
+ HADOOP_VERSION=$2
+ elif [[ $1 == "--spark-version" ]]; then
+ SPARK_VERSION=$2
+ elif [[ $1 == "--kylin-version" ]]; then
+ KYLIN_VERSION=$2
+ elif [[ $1 == "--support-glue" ]]; then
+ SUPPORT_GLUE=$2
else
help
fi
@@ -106,9 +113,21 @@ done
## Parameter
### Parameters for Spark
#### ${SPARK_VERSION:0:1} get 2 from 2.4.7
-HADOOP_VERSION=3.2.0
-SPARK_VERSION=3.1.1
-KYLIN_VERSION=4.0.0
+if [[ -z "$HADOOP_VERSION" ]]; then
+ HADOOP_VERSION=3.2.0
+fi
+
+if [[ -z "$SPARK_VERSION" ]]; then
+ SPARK_VERSION=3.1.1
+fi
+
+if [[ -z "$KYLIN_VERSION" ]]; then
+ KYLIN_VERSION=4.0.0
+fi
+
+if [[ -z "$SUPPORT_GLUE" ]]; then
+ SUPPORT_GLUE=false
+fi
### Parameter for JDK 1.8
JDK_PACKAGE=jdk-8u301-linux-x64.tar.gz
@@ -137,7 +156,12 @@ else
fi
fi
-SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz
+if [[ $SUPPORT_GLUE == "true" ]]; then
+ SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}-aws.tgz
+else
+ SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz
+fi
+
HADOOP_PACKAGE=hadoop-${HADOOP_VERSION}.tar.gz
NODE_EXPORTER_PACKAGE=node_exporter-1.3.1.linux-amd64.tar.gz
@@ -151,7 +175,7 @@ function init_env() {
JAVA_HOME=/usr/local/java
JRE_HOME=${JAVA_HOME}/jre
- KYLIN_HOME=${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE}
+ KYLIN_HOME=${HOME_DIR}/kylin
SPARK_HOME=${HADOOP_DIR}/spark
OUT_LOG=${HOME_DIR}/shell.stdout
HADOOP_HOME=${HADOOP_DIR}/hadoop-${HADOOP_VERSION}
@@ -245,6 +269,10 @@ function prepare_hadoop() {
else
logging info "Downloading Hadoop package ${HADOOP_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${HADOOP_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${HADOOP_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/${HADOOP_PACKAGE}
fi
@@ -276,6 +304,10 @@ function prepare_spark() {
else
logging warn "Downloading ${SPARK_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${SPARK_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${SPARK_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE}
fi
@@ -351,7 +383,7 @@ EOF
function start_spark_worker() {
# TODO: fix hard code for waiting time
sleep ${WAITING_TIME}
- if [[ $WORKER_MODE == 'product' ]]; then
+ if [[ $WORKER_MODE == "product" ]]; then
# product:
# # ec2 instance type is m5.4xlarge which has 16 cores! Set 15 to Spark
master.
# # Also set 60 GB memory for cluster
@@ -380,16 +412,21 @@ function prepare_kylin() {
else
logging info "Kylin-${KYLIN_VERSION} downloading ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${KYLIN_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${KYLIN_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
https://archive.apache.org/dist/kylin/apache-kylin-${KYLIN_VERSION}/${KYLIN_PACKAGE}
fi
- if [[ -d ${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} ]]; then
+ if [[ -d ${KYLIN_HOME} ]]; then
logging warn "Kylin package already decompress, skip decompress ..."
else
logging warn "Kylin package decompressing ..."
### unzip kylin tar file
tar -zxf ${KYLIN_PACKAGE}
+ sudo mv ${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} ${KYLIN_HOME}
fi
logging info "Kylin inited ..."
diff --git a/backup/scripts/prepare-ec2-env-for-static-services.sh
b/backup/scripts/prepare-ec2-env-for-static-services.sh
index 02e02dc..752c9b4 100644
--- a/backup/scripts/prepare-ec2-env-for-static-services.sh
+++ b/backup/scripts/prepare-ec2-env-for-static-services.sh
@@ -147,14 +147,11 @@ function help() {
--db-host host-for-hive-to-access-rds
--db-user user-for-hive-to-access-rds
--db-password password-for-hive-to-access-rds
- --db-port port-for-hive-to-access-rds"
+ --db-port port-for-hive-to-access-rds
+ --support-glue support-for-glue"
exit 0
}
-if [[ $# -ne 12 ]]; then
- help
-fi
-
while [[ $# != 0 ]]; do
if [[ $1 == "--bucket-url" ]]; then
# url same as: /xxx/kylin
@@ -169,6 +166,8 @@ while [[ $# != 0 ]]; do
DATABASE_USER=$2
elif [[ $1 == "--db-port" ]]; then
DATABASE_PORT=$2
+ elif [[ $1 == "--support-glue" ]]; then
+ SUPPORT_GLUE=$2
else
help
fi
@@ -176,6 +175,10 @@ while [[ $# != 0 ]]; do
shift
done
+if [[ -z "$SUPPORT_GLUE" ]]; then
+ SUPPORT_GLUE=false
+fi
+
PATH_TO_BUCKET=s3:/${BUCKET_SUFFIX}
CONFIG_PATH_TO_BUCKET=s3a:/${BUCKET_SUFFIX}
@@ -411,6 +414,9 @@ EOF
}
function start_hive_metastore() {
+ if [[ $SUPPORT_GLUE == "true" ]]; then
+ return
+ fi
nohup $HIVE_HOME/bin/hive --service metastore >>
$HIVE_HOME/logs/hivemetastorelog.log 2>&1 &
logging info "Hive was logging in $HIVE_HOME/logs, you can check ..."
}
@@ -469,14 +475,15 @@ function prepare_docker() {
}
function start_grafana() {
+ logging info "Starting docker ..."
+ start_docker
+
logging info "Preparing grafana ..."
if [[ -f ${HOME_DIR}/.prepared_grafana ]]; then
logging warn "Grafana service already installed, check it."
return
fi
- start_docker
-
if [[ $(sudo docker ps -q -f name=grafana-${GRAFANA_VERSION}) ]]; then
logging warn "Grafana-${GRAFANA_VERSION} already running, skip this ..."
else
@@ -603,7 +610,10 @@ function prepare_packages() {
}
function start_services_on_other() {
- start_hive_metastore
+ if [[ ! -f ${HOME_DIR}/.first_run ]]; then
+ start_hive_metastore
+ touch ${HOME_DIR}/.first_run
+ fi
# start extra monitor service
# NOTE: prometheus server will start after all node_exporter on every node
started.
diff --git a/backup/scripts/prepare-ec2-env-for-zk.sh
b/backup/scripts/prepare-ec2-env-for-zk.sh
index 03483d4..039ee59 100644
--- a/backup/scripts/prepare-ec2-env-for-zk.sh
+++ b/backup/scripts/prepare-ec2-env-for-zk.sh
@@ -63,12 +63,41 @@ function logging() {
set +e
+function help() {
+ logging warn "Invalid input."
+ logging warn "Usage: ${BASH_SOURCE[0]}
+ --bucket-url /path/to/bucket/without/prefix
+ --region region-for-current-instance
+ --zk-num current-zookeeper-number
+ --zookeeper-version zk-version-for-cluster"
+ exit 0
+}
+
+while [[ $# != 0 ]]; do
+ if [[ $1 == "--bucket-url" ]]; then
+ # url same as: /xxx/kylin
+ BUCKET_SUFFIX=$2
+ elif [[ $1 == "--region" ]]; then
+ CURRENT_REGION=$2
+ elif [[ $1 == "--zk-num" ]]; then
+ ZK_NUM=$2
+ elif [[ $1 == "--zookeeper-version" ]]; then
+ ZOOKEEPER_VERSION=3.4.13
+ else
+ help
+ fi
+ shift
+ shift
+done
+
# =============== Env Parameters =================
# Prepare Steps
## Parameter
### Parameters for Spark and Kylin
#### ${SPARK_VERSION:0:1} get 2 from 2.4.7
-ZOOKEEPER_VERSION=3.4.13
+if [[ -z $ZOOKEEPER_VERSION ]]; then
+ ZOOKEEPER_VERSION=3.4.13
+fi
### File name
ZOOKEEPER_PACKAGE=zookeeper-${ZOOKEEPER_VERSION}.tar.gz
@@ -132,33 +161,6 @@ source ~/.bash_profile
exec 2>>${OUT_LOG}
set -o pipefail
# ================ Main Functions ======================
-function help() {
- logging warn "Invalid input."
- logging warn "Usage: ${BASH_SOURCE[0]}
- --bucket-url /path/to/bucket/without/prefix
- --region region-for-current-instance
- --zk-num current-zookeeper-number"
- exit 0
-}
-
-if [[ $# -ne 6 ]]; then
- help
-fi
-
-while [[ $# != 0 ]]; do
- if [[ $1 == "--bucket-url" ]]; then
- # url same as: /xxx/kylin
- BUCKET_SUFFIX=$2
- elif [[ $1 == "--region" ]]; then
- CURRENT_REGION=$2
- elif [[ $1 == "--zk-num" ]]; then
- ZK_NUM=$2
- else
- help
- fi
- shift
- shift
-done
PATH_TO_BUCKET=s3:/${BUCKET_SUFFIX}
@@ -212,6 +214,10 @@ function prepare_zookeeper() {
else
logging info "Downloading Zookeeper package ${ZOOKEEPER_PACKAGE} ..."
aws s3 cp ${PATH_TO_BUCKET}/tar/${ZOOKEEPER_PACKAGE} ${HOME_DIR} --region
${CURRENT_REGION}
+ if [[ $? -ne 0 ]]; then
+ logging error "Downloading ${ZOOKEEPER_PACKAGE} failed, please check."
+ exit 1
+ fi
# # wget cost lot time
# wget
http://archive.apache.org/dist/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/${ZOOKEEPER_PACKAGE}
fi
diff --git a/cloudformation_templates/ec2-cluster-kylin4-template.yaml
b/cloudformation_templates/ec2-cluster-kylin4-template.yaml
index 5278ab3..57cced5 100644
--- a/cloudformation_templates/ec2-cluster-kylin4-template.yaml
+++ b/cloudformation_templates/ec2-cluster-kylin4-template.yaml
@@ -135,6 +135,24 @@ Parameters:
MinValue: 30
MaxValue: 30
+ KylinVersion:
+ Type: String
+ Default: 4.0.0
+ SparkVersion:
+ Type: String
+ Default: 3.1.1
+ HadoopVersion:
+ Type: String
+ Default: 3.2.0
+ HiveVersion:
+ Type: String
+ Default: 2.3.9
+ MdxVersion:
+ Type: String
+ Default: 4.0.2-beta
+ SupportGlue:
+ Type: String
+ Default: false
Mappings:
AWSRegionArch2AMI:
@@ -242,7 +260,7 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateKylin4ScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateKylin4ScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost}
--db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port
${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num
${PrivateClusterNum} --is-scaled ${PrivateIsScaled}
+ bash ${PrivateKylin4ScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost}
--db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port
${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num
${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version
${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version
${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...]
echo " Kylin4 is ready ..."
- PrivateBucketFullPath: !Ref BucketFullPath
PrivateKylin4ScriptFileName: !Ref Kylin4ScriptFileName
@@ -255,6 +273,12 @@ Resources:
PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity
PrivateClusterNum: !Ref ClusterNum
PrivateIsScaled: !Ref IsScaled
+ PrivateKylinVersion: !Ref KylinVersion
+ PrivateSparkVersion: !Ref SparkVersion
+ PrivateHiveVersion: !Ref HiveVersion
+ PrivateHadoopVersion: !Ref HadoopVersion
+ PrivateMdxVersion: !Ref MdxVersion
+ PrivateSupportGlue: !Ref SupportGlue
Outputs:
IdOfInstance:
@@ -278,3 +302,6 @@ Outputs:
Kylin4ZookeeperHosts:
Description: Zookeeper hosts for Kylin 4
Value: !Ref ZookeepersHost
+ SupportGlue:
+ Description: is supported glue ?
+ Value: !Ref SupportGlue
diff --git a/cloudformation_templates/ec2-cluster-kylin4.yaml
b/cloudformation_templates/ec2-cluster-kylin4.yaml
index a186f45..a4eb58b 100644
--- a/cloudformation_templates/ec2-cluster-kylin4.yaml
+++ b/cloudformation_templates/ec2-cluster-kylin4.yaml
@@ -134,6 +134,25 @@ Parameters:
MinValue: 30
MaxValue: 30
+ KylinVersion:
+ Type: String
+ Default: 4.0.0
+ SparkVersion:
+ Type: String
+ Default: 3.1.1
+ HadoopVersion:
+ Type: String
+ Default: 3.2.0
+ HiveVersion:
+ Type: String
+ Default: 2.3.9
+ MdxVersion:
+ Type: String
+ Default: 4.0.2-beta
+
+ SupportGlue:
+ Type: String
+ Default: false
Mappings:
AWSRegionArch2AMI:
@@ -241,7 +260,7 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateKylin4ScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateKylin4ScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost}
--db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port
${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num
${PrivateClusterNum} --is-scaled ${PrivateIsScaled}
+ bash ${PrivateKylin4ScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost}
--db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port
${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num
${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version
${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version
${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...]
echo " Kylin4 is ready ..."
- PrivateBucketFullPath: !Ref BucketFullPath
PrivateKylin4ScriptFileName: !Ref Kylin4ScriptFileName
@@ -254,6 +273,12 @@ Resources:
PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity
PrivateClusterNum: !Ref ClusterNum
PrivateIsScaled: !Ref IsScaled
+ PrivateKylinVersion: !Ref KylinVersion
+ PrivateSparkVersion: !Ref SparkVersion
+ PrivateHiveVersion: !Ref HiveVersion
+ PrivateHadoopVersion: !Ref HadoopVersion
+ PrivateMdxVersion: !Ref MdxVersion
+ PrivateSupportGlue: !Ref SupportGlue
Outputs:
IdOfInstance:
@@ -277,3 +302,6 @@ Outputs:
Kylin4ZookeeperHosts:
Description: Zookeeper hosts for Kylin 4
Value: !Ref ZookeepersHost
+ SupportGlue:
+ Description: is supported glue ?
+ Value: !Ref SupportGlue
diff --git a/cloudformation_templates/ec2-cluster-spark-master.yaml
b/cloudformation_templates/ec2-cluster-spark-master.yaml
index 418ef12..32fe1fc 100644
--- a/cloudformation_templates/ec2-cluster-spark-master.yaml
+++ b/cloudformation_templates/ec2-cluster-spark-master.yaml
@@ -116,6 +116,20 @@ Parameters:
MinValue: 30
MaxValue: 30
+ SparkVersion:
+ Type: String
+ Default: 3.1.1
+ HadoopVersion:
+ Type: String
+ Default: 3.2.0
+ HiveVersion:
+ Type: String
+ Default: 2.3.9
+
+ SupportGlue:
+ Type: String
+ Default: false
+
Mappings:
AWSRegionArch2AMI:
@@ -213,7 +227,7 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateSparkMasterScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateSparkMasterScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --local-soft
${PrivateLocalCacheSoftAffinity} --db-port ${PrivateDbPort} --db-host
${PrivateDbHost} --db-user ${PrivateDbUser} --db-password ${PrivateDbPassword}
+ bash ${PrivateSparkMasterScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --local-soft
${PrivateLocalCacheSoftAffinity} --db-port ${PrivateDbPort} --db-host
${PrivateDbHost} --db-user ${PrivateDbUser} --db-password ${PrivateDbPassword}
--hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion}
--hive-version ${PrivateHiveVersion} --support-glue ${PrivateSupportGlue}
echo " Spark Master is ready ..."
- PrivateBucketFullPath: !Ref BucketFullPath
PrivateSparkMasterScriptFileName: !Ref SparkMasterScriptFileName
@@ -224,6 +238,10 @@ Resources:
PrivateDbHost: !Ref DbHost
PrivateDbUser: !Ref DbUser
PrivateDbPassword: !Ref DbPassword
+ PrivateSparkVersion: !Ref SparkVersion
+ PrivateHiveVersion: !Ref HiveVersion
+ PrivateHadoopVersion: !Ref HadoopVersion
+ PrivateSupportGlue: !Ref SupportGlue
Outputs:
IdOfInstance:
@@ -245,3 +263,6 @@ Outputs:
Value: !Ref SubnetId
SparkMasterSecurityGroupIdDependsOnDNode:
Value: !Ref SecurityGroupId
+ SupportGlue:
+ Description: is supported glue ?
+ Value: !Ref SupportGlue
diff --git a/cloudformation_templates/ec2-cluster-spark-slave-template.yaml
b/cloudformation_templates/ec2-cluster-spark-slave-template.yaml
index 0faa939..13f8c0b 100644
--- a/cloudformation_templates/ec2-cluster-spark-slave-template.yaml
+++ b/cloudformation_templates/ec2-cluster-spark-slave-template.yaml
@@ -113,6 +113,18 @@ Parameters:
MinValue: 30
MaxValue: 30
+ KylinVersion:
+ Type: String
+ Default: 4.0.0
+ SparkVersion:
+ Type: String
+ Default: 3.1.1
+ HadoopVersion:
+ Type: String
+ Default: 3.2.0
+ SupportGlue:
+ Type: String
+ Default: false
Mappings:
AWSRegionArch2AMI:
@@ -225,7 +237,7 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateSlaveScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateSlaveScriptFileName} --bucket-url
${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number
${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime}
--mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity}
+ bash ${PrivateSlaveScriptFileName} --bucket-url
${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number
${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime}
--mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity}
--hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion}
--kylin-version ${PrivateKylinVersion} --support-glue ${PrivateSupportGlue}
- PrivateMasterHost: !Ref SparkMasterNodeHost
WorkerNum: !Ref WorkerNum
PrivateBucketFullPath: !Ref BucketFullPath
@@ -235,6 +247,10 @@ Resources:
PrivateWaitingTime: !Ref WaitingTime
WorkerMode: !Ref Ec2Mode
PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity
+ PrivateKylinVersion: !Ref KylinVersion
+ PrivateHadoopVersion: !Ref HadoopVersion
+ PrivateSparkVersion: !Ref SparkVersion
+ PrivateSupportGlue: !Ref SupportGlue
Outputs:
IdOfInstance:
@@ -247,3 +263,6 @@ Outputs:
Description: the Slave Instance Public IP
Value: !GetAtt Ec2InstanceOfSlave.PublicIp
Condition: IsAssociatedPublicIp
+ SupportGlue:
+ Description: is supported glue ?
+ Value: !Ref SupportGlue
diff --git a/cloudformation_templates/ec2-cluster-spark-slave.yaml
b/cloudformation_templates/ec2-cluster-spark-slave.yaml
index 2fed70a..49c8773 100644
--- a/cloudformation_templates/ec2-cluster-spark-slave.yaml
+++ b/cloudformation_templates/ec2-cluster-spark-slave.yaml
@@ -111,7 +111,18 @@ Parameters:
Default: 30
MinValue: 30
MaxValue: 30
-
+ KylinVersion:
+ Type: String
+ Default: 4.0.0
+ SparkVersion:
+ Type: String
+ Default: 3.1.1
+ HadoopVersion:
+ Type: String
+ Default: 3.2.0
+ SupportGlue:
+ Type: String
+ Default: false
Mappings:
AWSRegionArch2AMI:
@@ -221,7 +232,7 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateSlaveScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateSlaveScriptFileName} --bucket-url
${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number
${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime}
--mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity}
+ bash ${PrivateSlaveScriptFileName} --bucket-url
${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number
${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime}
--mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity}
--hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion}
--kylin-version ${PrivateKylinVersion} --support-glue ${PrivateSupportGlue}
- PrivateMasterHost: !Ref SparkMasterNodeHost
WorkerNum: 1
PrivateBucketFullPath: !Ref BucketFullPath
@@ -231,6 +242,10 @@ Resources:
PrivateWaitingTime: !Ref WaitingTime
WorkerMode: !Ref Ec2Mode
PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity
+ PrivateKylinVersion: !Ref KylinVersion
+ PrivateHadoopVersion: !Ref HadoopVersion
+ PrivateSparkVersion: !Ref SparkVersion
+ PrivateSupportGlue: !Ref SupportGlue
Ec2InstanceOfSlave02:
Type: AWS::EC2::Instance
DeletionPolicy: Delete
@@ -289,7 +304,7 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateSlaveScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateSlaveScriptFileName} --bucket-url
${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number
${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime}
--mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity}
+ bash ${PrivateSlaveScriptFileName} --bucket-url
${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number
${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime}
--mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity}
--hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion}
--kylin-version ${PrivateKylinVersion} --support-glue ${PrivateSupportGlue}
- PrivateMasterHost: !Ref SparkMasterNodeHost
WorkerNum: 2
PrivateBucketFullPath: !Ref BucketFullPath
@@ -299,6 +314,10 @@ Resources:
PrivateWaitingTime: !Ref WaitingTime
WorkerMode: !Ref Ec2Mode
PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity
+ PrivateKylinVersion: !Ref KylinVersion
+ PrivateHadoopVersion: !Ref HadoopVersion
+ PrivateSparkVersion: !Ref SparkVersion
+ PrivateSupportGlue: !Ref SupportGlue
Ec2InstanceOfSlave03:
Type: AWS::EC2::Instance
@@ -358,7 +377,7 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateSlaveScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateSlaveScriptFileName} --bucket-url
${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number
${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime}
--mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity}
+ bash ${PrivateSlaveScriptFileName} --bucket-url
${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number
${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime}
--mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity}
--hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion}
--kylin-version ${PrivateKylinVersion} --support-glue ${PrivateSupportGlue}
- PrivateMasterHost: !Ref SparkMasterNodeHost
WorkerNum: 3
PrivateBucketFullPath: !Ref BucketFullPath
@@ -368,6 +387,10 @@ Resources:
PrivateWaitingTime: !Ref WaitingTime
WorkerMode: !Ref Ec2Mode
PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity
+ PrivateKylinVersion: !Ref KylinVersion
+ PrivateHadoopVersion: !Ref HadoopVersion
+ PrivateSparkVersion: !Ref SparkVersion
+ PrivateSupportGlue: !Ref SupportGlue
Outputs:
@@ -403,3 +426,6 @@ Outputs:
Description: Slave03 Instance Public IP
Value: !GetAtt Ec2InstanceOfSlave03.PublicIp
Condition: IsAssociatedPublicIp
+ SupportGlue:
+ Description: is supported glue ?
+ Value: !Ref SupportGlue
diff --git a/cloudformation_templates/ec2-cluster-static-services.yaml
b/cloudformation_templates/ec2-cluster-static-services.yaml
index 9c214dd..960bed9 100644
--- a/cloudformation_templates/ec2-cluster-static-services.yaml
+++ b/cloudformation_templates/ec2-cluster-static-services.yaml
@@ -106,6 +106,9 @@ Parameters:
Default: gp2
AllowedValues:
- gp2
+ SupportGlue:
+ Type: String
+ Default: false
Mappings:
AWSRegionArch2AMI:
@@ -203,7 +206,7 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateStaticServicesScriptFileName} .
--region ${PrivateRegion}
- bash ${PrivateStaticServicesScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost}
--db-password ${PrivateDbPassword} --db-user ${PrivateDbUser} --db-port
${PrivateDbPort}
+ bash ${PrivateStaticServicesScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost}
--db-password ${PrivateDbPassword} --db-user ${PrivateDbUser} --db-port
${PrivateDbPort} --support-glue ${PrivateSupportGlue}
echo " Static Services are ready ..."
- PrivateBucketFullPath: !Ref BucketFullPath
PrivateStaticServicesScriptFileName: !Ref
StaticServicesScriptFileName
@@ -213,6 +216,7 @@ Resources:
PrivateDbPassword: !Ref DbPassword
PrivateDbUser: !Ref DbUser
PrivateDbPort: !Ref DbPort
+ PrivateSupportGlue: !Ref SupportGlue
Outputs:
# Env parameters
@@ -237,3 +241,7 @@ Outputs:
Description: StaticServices Public IP
Value: !GetAtt Ec2InstanceOfStaticServicesNode.PublicIp
Condition: IsAssociatedPublicIp
+
+ SupportGlue:
+ Description: is supported glue ?
+ Value: !Ref SupportGlue
diff --git a/cloudformation_templates/ec2-cluster-zk.yaml
b/cloudformation_templates/ec2-cluster-zk.yaml
index fbad62f..8b3fb11 100644
--- a/cloudformation_templates/ec2-cluster-zk.yaml
+++ b/cloudformation_templates/ec2-cluster-zk.yaml
@@ -92,6 +92,9 @@ Parameters:
Default: gp2
AllowedValues:
- gp2
+ ZookeeperVersion:
+ Type: String
+ Default: 3.4.13
Mappings:
AWSRegionArch2AMI:
@@ -189,13 +192,14 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateZookeeperScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateZookeeperScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${privateZkNum}
+ bash ${PrivateZookeeperScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${PrivateZkNum}
--zookeeper-version ${PrivateZkVersion}
echo "Zookeeper is ready ..."
- PrivateBucketFullPath: !Ref BucketFullPath
PrivateZookeeperScriptFileName: !Ref ZookeeperScriptFileName
PrivateBucketPath: !Ref BucketPath
PrivateRegion: !Ref AWS::Region
- privateZkNum: 1
+ PrivateZkNum: 1
+ PrivateZkVersion: !Ref ZookeeperVersion
Ec2InstanceOfZookeeperNode02:
Type: AWS::EC2::Instance
@@ -246,13 +250,14 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateZookeeperScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateZookeeperScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${privateZkNum}
+ bash ${PrivateZookeeperScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${PrivateZkNum}
--zookeeper-version ${PrivateZkVersion}
echo "Zookeeper is ready ..."
- PrivateBucketFullPath: !Ref BucketFullPath
PrivateZookeeperScriptFileName: !Ref ZookeeperScriptFileName
PrivateBucketPath: !Ref BucketPath
PrivateRegion: !Ref AWS::Region
- privateZkNum: 2
+ PrivateZkNum: 2
+ PrivateZkVersion: !Ref ZookeeperVersion
Ec2InstanceOfZookeeperNode03:
Type: AWS::EC2::Instance
@@ -303,13 +308,14 @@ Resources:
#!/bin/bash -xe
cd /home/ec2-user
aws s3 cp
${PrivateBucketFullPath}/scripts/${PrivateZookeeperScriptFileName} . --region
${PrivateRegion}
- bash ${PrivateZookeeperScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${privateZkNum}
+ bash ${PrivateZookeeperScriptFileName} --bucket-url
${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${PrivateZkNum}
--zookeeper-version ${PrivateZkVersion}
echo "Zookeeper is ready ..."
- PrivateBucketFullPath: !Ref BucketFullPath
PrivateZookeeperScriptFileName: !Ref ZookeeperScriptFileName
PrivateBucketPath: !Ref BucketPath
PrivateRegion: !Ref AWS::Region
- privateZkNum: 3
+ PrivateZkNum: 3
+ PrivateZkVersion: !Ref ZookeeperVersion
Outputs:
# Instance 01 parameters
diff --git a/cloudformation_templates/ec2-or-emr-vpc.yaml
b/cloudformation_templates/ec2-or-emr-vpc.yaml
index 3deb667..f1e173d 100644
--- a/cloudformation_templates/ec2-or-emr-vpc.yaml
+++ b/cloudformation_templates/ec2-or-emr-vpc.yaml
@@ -26,6 +26,7 @@ Parameters:
Default: 4.0.0
AllowedValues:
- 4.0.0
+ - 4.0.2
ClusterType:
Type: String
Default: ec2
@@ -274,6 +275,11 @@ Resources:
FromPort: '4040'
ToPort: '4050'
CidrIp: !Ref CidrIp
+ # For MDX
+ - IpProtocol: tcp
+ FromPort: '7080'
+ ToPort: '7080'
+ CidrIp: !Ref CidrIp
DeletionPolicy: Delete
Ec2OrEmrBenchMarkSecurityGroupFullTcpIngress:
Type: AWS::EC2::SecurityGroupIngress
diff --git a/clouds/aws.py b/clouds/aws.py
index a0dfaa5..c71ce95 100644
--- a/clouds/aws.py
+++ b/clouds/aws.py
@@ -97,7 +97,7 @@ class AWS:
@property
def is_destroy_all(self) -> bool:
- return self.config[Params.ALWAYS_DESTROY_ALL.value] is True
+ return self.config[Params.ALWAYS_DESTROY_VPC_RDS_MONITOR.value] is True
def is_target_cluster_ready(self, cluster_num: int) -> bool:
if self.is_target_cluster_instances_ready(cluster_num):
diff --git a/constant/yaml_files.py b/constant/yaml_files.py
index b0492d7..9b73807 100644
--- a/constant/yaml_files.py
+++ b/constant/yaml_files.py
@@ -40,4 +40,6 @@ class Tar(Enum):
NODE = 'node_exporter-{NODE_EXPORTER_VERSION}.linux-amd64.tar.gz'
PROMETHEUS = 'prometheus-{PROMETHEUS_VERSION}.linux-amd64.tar.gz'
SPARK = 'spark-{SPARK_VERSION}-bin-hadoop{HADOOP_VERSION!s:3.3s}.tgz'
+ SPARK_FOR_GLUE =
'spark-{SPARK_VERSION}-bin-hadoop{HADOOP_VERSION!s:3.3s}-aws.tgz'
ZOOKEEPER = 'zookeeper-{ZOOKEEPER_VERSION}.tar.gz'
+ MDX = 'mdx-kylin-{MDX_VERSION}.tar.gz'
diff --git a/constant/yaml_params.py b/constant/yaml_params.py
index d921d32..8482922 100644
--- a/constant/yaml_params.py
+++ b/constant/yaml_params.py
@@ -21,7 +21,7 @@ from enum import Enum
class Params(Enum):
# global params
ASSOSICATED_PUBLIC_IP = 'ASSOSICATED_PUBLIC_IP'
- ALWAYS_DESTROY_ALL = 'ALWAYS_DESTROY_ALL'
+ ALWAYS_DESTROY_VPC_RDS_MONITOR = 'ALWAYS_DESTROY_VPC_RDS_MONITOR'
S3_URI = 'S3_URI'
INSTANCE_ID = 'IdOfInstance'
CLUSTER_NUM = 'ClusterNum'
diff --git a/engine_utils.py b/engine_utils.py
index 97a7099..acbc90e 100644
--- a/engine_utils.py
+++ b/engine_utils.py
@@ -47,20 +47,34 @@ class EngineUtils:
hadoop_package =
Tar.HADOOP.value.format(HADOOP_VERSION=self.config['HADOOP_VERSION'])
node_exporter_package =
Tar.NODE.value.format(NODE_EXPORTER_VERSION=self.config['NODE_EXPORTER_VERSION'])
prometheus_package =
Tar.PROMETHEUS.value.format(PROMETHEUS_VERSION=self.config['PROMETHEUS_VERSION'])
- spark_package =
Tar.SPARK.value.format(SPARK_VERSION=self.config['SPARK_VERSION'],
-
HADOOP_VERSION=self.config['HADOOP_VERSION'])
+ if self.config['SUPPORT_GLUE'] == 'true':
+ spark_package = Tar.SPARK_FOR_GLUE.value.format(
+ SPARK_VERSION=self.config['SPARK_VERSION'],
+ HADOOP_VERSION=self.config['HADOOP_VERSION'])
+ else:
+ spark_package = Tar.SPARK.value.format(
+ SPARK_VERSION=self.config['SPARK_VERSION'],
+ HADOOP_VERSION=self.config['HADOOP_VERSION'])
zookeeper_package =
Tar.ZOOKEEPER.value.format(ZOOKEEPER_VERSION=self.config['ZOOKEEPER_VERSION'])
- packages = [jdk_package, kylin_package, hive_package, hadoop_package,
node_exporter_package,
- prometheus_package, spark_package, zookeeper_package]
+ mdx_package =
Tar.MDX.value.format(MDX_VERSION=self.config['MDX_VERSION'])
+
+ packages = [
+ jdk_package, kylin_package, hive_package,
+ hadoop_package, node_exporter_package,
+ prometheus_package, spark_package,
+ zookeeper_package, mdx_package]
return packages
def needed_jars(self) -> List:
# FIXME: hard version of jars
- jars = []
commons_configuration = 'commons-configuration-1.3.jar'
- mysql_connector = 'mysql-connector-java-5.1.40.jar'
- jars.append(commons_configuration)
- jars.append(mysql_connector)
+ mysql_driver = 'mysql-connector-java-5.1.40.jar'
+ mysql_driver_for_mdx = 'mysql-connector-java-8.0.24.jar'
+ jars = [
+ commons_configuration,
+ mysql_driver,
+ mysql_driver_for_mdx,
+ ]
if self.config[Config.ENABLE_SOFT_AFFINITY.value] == 'true':
kylin_soft_affinity_cache =
'kylin-soft-affinity-cache-4.0.0-SNAPSHOT.jar'
alluxio_client = 'alluxio-2.6.1-client.jar'
@@ -102,10 +116,12 @@ class EngineUtils:
self.aws.after_scale_up(node_type=node_type)
elif scale_type == ScaleType.DOWN.value:
- self.aws.after_scale_down(node_type=node_type)
+ if not self.aws.is_destroy_all:
+ self.aws.after_scale_down(node_type=node_type)
self.aws.scale_down(node_type=node_type)
- self.aws.restart_prometheus_server()
+ if not self.aws.is_destroy_all:
+ self.aws.restart_prometheus_server()
def scale_nodes_in_cluster(
self,
@@ -122,10 +138,12 @@ class EngineUtils:
self.aws.scale_up(node_type=node_type, cluster_num=cluster_num,
is_destroy=is_destroy)
self.aws.after_scale_up(node_type=node_type,
cluster_num=cluster_num)
else:
- self.aws.after_scale_down(node_type=node_type,
cluster_num=cluster_num)
+ if not self.aws.is_destroy_all:
+ self.aws.after_scale_down(node_type=node_type,
cluster_num=cluster_num)
self.aws.scale_down(node_type=node_type, cluster_num=cluster_num,
is_destroy=is_destroy)
- self.aws.restart_prometheus_server()
+ if not self.aws.is_destroy_all:
+ self.aws.restart_prometheus_server()
def prepare_for_cluster(self) -> None:
# create vpc, rds and monitor node for whole cluster
@@ -154,10 +172,11 @@ class EngineUtils:
scale_type=ScaleType.DOWN.value,
node_type=NodeType.SPARK_WORKER.value,
cluster_num=num, is_destroy=True)
-
- self.aws.after_destroy_clusters(cluster_nums=cluster_nums)
+ if not self.aws.is_destroy_all:
+ self.aws.after_destroy_clusters(cluster_nums=cluster_nums)
self.aws.destroy_clusters(cluster_nums=cluster_nums)
- self.aws.restart_prometheus_server()
+ if not self.aws.is_destroy_all:
+ self.aws.restart_prometheus_server()
def destroy_cluster(self, cluster_num: int) -> None:
self.scale_nodes_in_cluster(
diff --git a/instances/aws_instance.py b/instances/aws_instance.py
index 291bfc2..c256fa7 100644
--- a/instances/aws_instance.py
+++ b/instances/aws_instance.py
@@ -139,6 +139,10 @@ class AWSInstance:
return self.cf_client.get_waiter('stack_exists')
@property
+ def db_available_waiter(self):
+ return self.rds_client.get_waiter('db_instance_available')
+
+ @property
def db_port(self) -> str:
return self.config[Config.DB_PORT.value]
@@ -391,12 +395,7 @@ class AWSInstance:
return db_instances[0]
def is_rds_exists(self) -> bool:
- try:
-
self.rds_client.describe_db_instances(DBInstanceIdentifier=self.db_identifier)
- except self.rds_client.exceptions.DBInstanceNotFoundFault as ex:
- logger.warning(f'DB {self.db_identifier} is not found.')
- return False
- return True
+ return self.is_db_available(self.db_identifier)
def create_rds_stack(self) -> Optional[Dict]:
if self.is_stack_complete(self.rds_stack_name):
@@ -414,6 +413,8 @@ class AWSInstance:
file_path=self.path_of_rds_stack,
params=params,
)
+ # make sure that rds stack will create successfully
+ assert self.is_stack_complete(self.rds_stack_name)
return resp
def terminate_rds_stack(self) -> Optional[Dict]:
@@ -1424,7 +1425,6 @@ class AWSInstance:
return True
-
def update_basic_params(self, params: Dict) -> Dict:
params[Params.SUBNET_ID.value] = self.get_subnet_id()
params[Params.SECURITY_GROUP.value] = self.get_security_group_id()
@@ -1846,7 +1846,7 @@ class AWSInstance:
)
if not deleted_cost_stacks:
return False
- if not self.config['ALWAYS_DESTROY_ALL'] \
+ if not self.config['ALWAYS_DESTROY_VPC_RDS_MONITOR'] \
or self.is_stack_deleted_complete(self.vpc_stack_name):
return True
return False
@@ -2099,6 +2099,25 @@ class AWSInstance:
raise Exception(f'Current stack: {stack_name} is create failed,
please check.')
return False
+ def is_db_available(self, db_name: str) -> bool:
+ if self._db_available(db_name):
+ return True
+ return False
+
+ def _db_available(self, db_name: str) -> bool:
+ try:
+ self.db_available_waiter.wait(
+ DBInstanceIdentifier=db_name,
+ MaxRecords=60,
+ WaiterConfig={
+ 'Delay': 30,
+ 'MaxAttempts': 120
+ }
+ )
+ except WaiterError as wx:
+ return False
+ return True
+
def _validate_spark_worker_scale(self, stack_name: str) -> None:
if stack_name not in self.scaled_spark_workers_stacks:
msg = f'{stack_name} not in scaled list, please check.'
@@ -2135,7 +2154,7 @@ class AWSInstance:
self.create_complete_waiter.wait(
StackName=stack_name,
WaiterConfig={
- 'Delay': 30,
+ 'Delay': 60,
'MaxAttempts': 120
}
)
diff --git a/kylin_configs.yaml b/kylin_configs.yaml
index 161cf76..658bcec 100644
--- a/kylin_configs.yaml
+++ b/kylin_configs.yaml
@@ -46,18 +46,29 @@ KeyName: &security_key ${KEY_PAIR}
# Required
CIDR_IP: ${Cidr Ip}
+# Support for Glue on AWS
+#
+# Description:
+# There is a limitation for supporting glue on AWS which needed a special
package of spark by AWS.
+# Note:
+# If you set `SUPPORT_GLUE` to be `true`, then please make sure that you
using Kylin only support `Job` mode, not `Query` as same as `All`.
+# Because `All` will support `Query`. So you need to change the
`kylin.server.mode` in `kylin.properties` to be `Job` or `All`.
+# If you set `All` mode for Kylin, there will be a error when you query any
sql.
+SUPPORT_GLUE: &SUPPORT_GLUE 'false'
+
# ============ AWS Configs End ============
# ============ Related Version of Services ============
# Related Version of Services, Current packages are compatible.
# Note: Current support these versions, don't modify them.
-KYLIN_VERSION: &KYLIN_VERSION '4.0.0'
-HIVE_VERSION: '2.3.9'
-HADOOP_VERSION: '3.2.0'
+KYLIN_VERSION: &KYLIN_VERSION '4.0.2'
+HIVE_VERSION: &HIVE_VERSION '2.3.9'
+HADOOP_VERSION: &HADOOP_VERSION '3.2.0'
NODE_EXPORTER_VERSION: '1.3.1'
PROMETHEUS_VERSION: '2.31.1'
-SPARK_VERSION: '3.1.1'
-ZOOKEEPER_VERSION: '3.4.13'
+SPARK_VERSION: &SPARK_VERSION '3.1.1'
+ZOOKEEPER_VERSION: &ZOOKEEPER_VERSION '3.4.13'
+MDX_VERSION: &MDX_VERSION '4.0.2-beta'
# ============ Related Version of Services End============
# ============ Debug Configs ============
@@ -75,8 +86,8 @@ DEPLOY_PLATFORM: &platform ec2
# ============ Tool Configs ============
## Dangerous !!!
-## Optional: destroy all will delete rds and the vpc and monitor node, please
be careful.
-ALWAYS_DESTROY_ALL: false
+## Optional: destroy all will delete rds and the vpc and monitor node, and
clean all resources, please be careful.
+ALWAYS_DESTROY_VPC_RDS_MONITOR: false
## Open public Ip on Instances
ASSOSICATED_PUBLIC_IP: &associated_public_ip 'true'
@@ -166,6 +177,8 @@ EC2_STATIC_SERVICES_PARAMS:
EMREc2KeyName: *security_key
AssociatedPublicIp: *associated_public_ip
+ SupportGlue: *SUPPORT_GLUE
+
DbPort: *DbPort
DbUser: *DbUser
DbPassword: *DbPassword
@@ -184,6 +197,7 @@ EC2_ZOOKEEPERS_PARAMS:
SecurityGroupId:
EMREc2KeyName: *security_key
AssociatedPublicIp: *associated_public_ip
+ ZookeeperVersion: *ZOOKEEPER_VERSION
ZookeeperScriptFileName: prepare-ec2-env-for-zk.sh
Ec2Mode: test
@@ -203,6 +217,10 @@ EC2_SPARK_MASTER_PARAMS:
DbPort: *DbPort
DbUser: *DbUser
DbPassword: *DbPassword
+ SparkVersion: *SPARK_VERSION
+ HadoopVersion: *HADOOP_VERSION
+ HiveVersion: *HIVE_VERSION
+ SupportGlue: *SUPPORT_GLUE
AssociatedPublicIp: *associated_public_ip
SparkMasterScriptFileName: prepare-ec2-env-for-spark-master.sh
@@ -222,6 +240,13 @@ EC2_KYLIN4_PARAMS:
SubnetId:
SecurityGroupId:
+ KylinVersion: *KYLIN_VERSION
+ SparkVersion: *SPARK_VERSION
+ HadoopVersion: *HADOOP_VERSION
+ HiveVersion: *HIVE_VERSION
+ MdxVersion: *MDX_VERSION
+ SupportGlue: *SUPPORT_GLUE
+
AssociatedPublicIp: *associated_public_ip
DbPort: *DbPort
@@ -248,6 +273,10 @@ EC2_SPARK_WORKER_PARAMS:
EMREc2KeyName: *security_key
# set 'true' for test
AssociatedPublicIp: *associated_public_ip
+ KylinVersion: *KYLIN_VERSION
+ SparkVersion: *SPARK_VERSION
+ HadoopVersion: *HADOOP_VERSION
+ SupportGlue: *SUPPORT_GLUE
SlaveScriptFileName: prepare-ec2-env-for-spark-slave.sh
Ec2Mode: test
@@ -266,6 +295,13 @@ EC2_KYLIN4_SCALE_PARAMS:
SubnetId:
SecurityGroupId:
+ KylinVersion: *KYLIN_VERSION
+ SparkVersion: *SPARK_VERSION
+ HadoopVersion: *HADOOP_VERSION
+ HiveVersion: *HIVE_VERSION
+ MdxVersion: *MDX_VERSION
+ SupportGlue: *SUPPORT_GLUE
+
AssociatedPublicIp: *associated_public_ip
DbPort: *DbPort
@@ -290,6 +326,12 @@ EC2_SPARK_SCALE_SLAVE_PARAMS:
SubnetId:
SecurityGroupId:
WorkerNum:
+
+ KylinVersion: *KYLIN_VERSION
+ SparkVersion: *SPARK_VERSION
+ HadoopVersion: *HADOOP_VERSION
+ SupportGlue: *SUPPORT_GLUE
+
WaitingTime: '50'
EMREc2KeyName: *security_key
# set 'true' for test
diff --git a/readme/commands.md b/readme/commands.md
index ab55e32..49b80dc 100644
--- a/readme/commands.md
+++ b/readme/commands.md
@@ -47,7 +47,7 @@ $ python deploy.py --type deploy --cluster all
> Note:
>
-> Destroy all clusters will not delete vpc, rds, and monitor
node. So if user doesn't want to hold the env, please set the
`ALWAYS_DESTROY_ALL` to be `'true'`.
+> Destroy all clusters will not delete vpc, rds, and monitor
node. So if user doesn't want to hold the env, please set the
`ALWAYS_DESTROY_VPC_RDS_MONITOR` to be `'true'`.
- Destroy a default cluster
diff --git a/readme/quick_start.md b/readme/quick_start.md
index 3c44d55..4f88b6a 100644
--- a/readme/quick_start.md
+++ b/readme/quick_start.md
@@ -75,5 +75,5 @@ $ python deploy.py --type destroy
> Note:
>
> 1. If you want to check about a quick start for multiple clusters, please
> referer to a [quick start for multiple
> clusters](./quick_start_for_multiple_clusters.md).
-> 2. **Current destroy operation will remain some stack which contains `RDS`
and so on**. So if user want to destroy clearly, please modify the
`ALWAYS_DESTROY_ALL` in `kylin_configs.yml` to be `true` and re-execute
`destroy` command.
+> 2. **Current destroy operation will remain some stack which contains `RDS`
and so on**. So if user want to destroy clearly, please modify the
`ALWAYS_DESTROY_VPC_RDS_MONITOR` in `kylin_configs.yml` to be `true` and
re-execute `destroy` command.
diff --git a/utils.py b/utils.py
index cbfff62..1f8b0cb 100644
--- a/utils.py
+++ b/utils.py
@@ -43,6 +43,7 @@ class Utils:
FILES_SIZE_IN_BYTES = {
'jdk-8u301-linux-x64.tar.gz': 145520298,
'apache-kylin-4.0.0-bin-spark3.tar.gz': 198037626,
+ 'apache-kylin-4.0.2-bin-spark3.tar.gz': 198051064,
'apache-hive-2.3.9-bin.tar.gz': 286170958,
'hadoop-3.2.0.tar.gz': 345625475,
'node_exporter-1.3.1.linux-amd64.tar.gz': 9033415,
@@ -51,6 +52,9 @@ class Utils:
'zookeeper-3.4.13.tar.gz': 37191810,
'commons-configuration-1.3.jar': 232915,
'mysql-connector-java-5.1.40.jar': 990924,
+ 'mysql-connector-java-8.0.24.jar': 2428323,
+ 'mdx-kylin-4.0.2-beta.tar.gz': 81935515,
+ 'spark-3.1.1-bin-hadoop3.2-aws.tgz': 531069078,
}
@staticmethod