This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin4_on_cloud in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin4_on_cloud by this push: new c25d881 Support mdx (#1827) c25d881 is described below commit c25d8819bee6e2e3c78c3238185aa01539afaeb5 Author: Tengting Xu <34978943+muk...@users.noreply.github.com> AuthorDate: Wed Mar 9 14:13:39 2022 +0800 Support mdx (#1827) * # minor fix about destroy * # minor update for versions of software * # support mdx & pre-support glue --- .gitignore | 2 +- .../properties/templates/kylin.properties.template | 2 + backup/scripts/prepare-ec2-env-for-kylin4.sh | 233 ++++++++++++++++++--- backup/scripts/prepare-ec2-env-for-spark-master.sh | 68 +++++- backup/scripts/prepare-ec2-env-for-spark-slave.sh | 61 ++++-- .../scripts/prepare-ec2-env-for-static-services.sh | 26 ++- backup/scripts/prepare-ec2-env-for-zk.sh | 62 +++--- .../ec2-cluster-kylin4-template.yaml | 29 ++- cloudformation_templates/ec2-cluster-kylin4.yaml | 30 ++- .../ec2-cluster-spark-master.yaml | 23 +- .../ec2-cluster-spark-slave-template.yaml | 21 +- .../ec2-cluster-spark-slave.yaml | 34 ++- .../ec2-cluster-static-services.yaml | 10 +- cloudformation_templates/ec2-cluster-zk.yaml | 18 +- cloudformation_templates/ec2-or-emr-vpc.yaml | 6 + clouds/aws.py | 2 +- constant/yaml_files.py | 2 + constant/yaml_params.py | 2 +- engine_utils.py | 49 +++-- instances/aws_instance.py | 37 +++- kylin_configs.yaml | 56 ++++- readme/commands.md | 2 +- readme/quick_start.md | 2 +- utils.py | 4 + 24 files changed, 643 insertions(+), 138 deletions(-) diff --git a/.gitignore b/.gitignore index dbc5998..a087ce7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,6 @@ __pycache__/ */.DS_Store /venv/ .idea -logs/*.log +logs/ backup/jars backup/tars \ No newline at end of file diff --git a/backup/properties/templates/kylin.properties.template b/backup/properties/templates/kylin.properties.template index 41f4a96..0bd043c 100644 --- a/backup/properties/templates/kylin.properties.template +++ b/backup/properties/templates/kylin.properties.template @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # + + # Kylin server mode, valid value [all, query, job] kylin.server.mode=all kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://{{ DB_HOST }}:{{ DB_PORT }}/kylin,username=root,password={{ DB_PASSWORD }},maxActive=10,maxIdle=10 diff --git a/backup/scripts/prepare-ec2-env-for-kylin4.sh b/backup/scripts/prepare-ec2-env-for-kylin4.sh index 54a3d6d..cdad91d 100644 --- a/backup/scripts/prepare-ec2-env-for-kylin4.sh +++ b/backup/scripts/prepare-ec2-env-for-kylin4.sh @@ -73,14 +73,14 @@ function help() { --db-user db-user-for-kylin --kylin-mode mode-for-kylin[all|query|job] --local-soft whether-to-use-local-cache+soft-affinity - --cluster-num specify-a-cluster" + --cluster-num specify-a-cluster + --hadoop-version hadoop-version-for-cluster + --spark-version spark-version-for-cluster + --kylin-version kylin-version-for-cluster + --hive-version hive-version-for-cluster" exit 0 } -if [[ $# -ne 18 ]]; then - help -fi - while [[ $# != 0 ]]; do if [[ $1 == "--bucket-url" ]]; then # url same as: /xxx/kylin @@ -93,15 +93,27 @@ while [[ $# != 0 ]]; do DATABASE_PASSWORD=$2 elif [[ $1 == "--db-user" ]]; then DATABASE_USER=$2 - elif [[ $1 == '--db-port' ]]; then + elif [[ $1 == "--db-port" ]]; then DATABASE_PORT=$2 elif [[ $1 == "--local-soft" ]]; then LOCAL_CACHE_SOFT_AFFINITY=$2 - elif [[ $1 == '--cluster-num' ]]; then - # default value is 'default', and cluster num is from 1 to positive infinity. + elif [[ $1 == "--cluster-num" ]]; then + # default value is "default", and cluster num is from 1 to positive infinity. CLUSTER_NUM=$2 - elif [[ $1 == '--is-scaled' ]]; then + elif [[ $1 == "--is-scaled" ]]; then IS_SCALED=$2 + elif [[ $1 == "--hadoop-version" ]]; then + HADOOP_VERSION=$2 + elif [[ $1 == "--spark-version" ]]; then + SPARK_VERSION=$2 + elif [[ $1 == "--kylin-version" ]]; then + KYLIN_VERSION=$2 + elif [[ $1 == "--hive-version" ]]; then + HIVE_VERSION=$2 + elif [[ $1 == "--mdx-version" ]]; then + MDX_VERSION=$2 + elif [[ $1 == "--support-glue" ]]; then + SUPPORT_GLUE=$2 else help fi @@ -113,10 +125,33 @@ done # Prepare Steps ### Parameters for Spark and Kylin #### ${SPARK_VERSION:0:1} get 2 from 2.4.7 -HADOOP_VERSION=3.2.0 -SPARK_VERSION=3.1.1 -KYLIN_VERSION=4.0.0 -HIVE_VERSION=2.3.9 +if [[ -z "$HADOOP_VERSION" ]]; then + HADOOP_VERSION=3.2.0 +fi + +if [[ -z "$SPARK_VERSION" ]]; then + SPARK_VERSION=3.1.1 +fi + +if [[ -z "$KYLIN_VERSION" ]]; then + KYLIN_VERSION=4.0.0 +fi + +if [[ -z "$HIVE_VERSION" ]]; then + HIVE_VERSION=2.3.9 +fi + +if [[ -z "$MDX_VERSION" ]]; then + MDX_VERSION=4.0.2-beta +fi + +if [[ -z "$SUPPORT_GLUE" ]]; then + SUPPORT_GLUE=false +fi + +if [[ -z "$MDX_DATABASE" ]]; then + MDX_DATABASE=kylin_mdx +fi LOCAL_CACHE_DIR=/home/ec2-user/ssd @@ -141,10 +176,17 @@ else fi fi -SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz + +if [[ $SUPPORT_GLUE == "true" ]]; then + SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}-aws.tgz +else + SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz +fi + HADOOP_PACKAGE=hadoop-${HADOOP_VERSION}.tar.gz HIVE_PACKAGE=apache-hive-${HIVE_VERSION}-bin.tar.gz NODE_EXPORTER_PACKAGE=node_exporter-1.3.1.linux-amd64.tar.gz +MDX_PACKAGE=mdx-kylin-${MDX_VERSION}.tar.gz ### Parameter for JDK 1.8 JDK_PACKAGE=jdk-8u301-linux-x64.tar.gz @@ -163,7 +205,8 @@ function init_env() { HADOOP_HOME=${HADOOP_DIR}/hadoop-${HADOOP_VERSION} HIVE_HOME=${HADOOP_DIR}/hive - KYLIN_HOME=${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} + KYLIN_HOME=${HOME_DIR}/kylin + MDX_HOME=${HOME_DIR}/mdx SPARK_HOME=${HADOOP_DIR}/spark OUT_LOG=${HOME_DIR}/shell.stdout @@ -193,6 +236,7 @@ export PATH=$HIVE_HOME/bin:$HIVE_HOME/conf:${HADOOP_HOME}/bin:${JAVA_HOME}/bin:$ export HOME_DIR=${HOME_DIR} export KYLIN_HOME=${KYLIN_HOME} export SPARK_HOME=${SPARK_HOME} +export MDX_HOME=${MDX_HOME} export OUT_LOG=${OUT_LOG} EOF } @@ -262,6 +306,10 @@ function prepare_hadoop() { else logging info "Downloading Hadoop package ${HADOOP_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${HADOOP_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${HADOOP_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/${HADOOP_PACKAGE} fi @@ -332,6 +380,10 @@ function prepare_hive() { else logging info "Downloading ${HIVE_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${HIVE_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${HIVE_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget https://downloads.apache.org/hive/hive-${HIVE_VERSION}/${HIVE_PACKAGE} fi @@ -373,7 +425,19 @@ function init_hive() { return fi - cat <<EOF >${HIVE_HOME}/conf/hive-site.xml + if [[ $SUPPORT_GLUE == "true" ]]; then + cat <<EOF >${HIVE_HOME}/conf/hive-site.xml +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<configuration> + <property> + <name>hive.metastore.client.factory.class</name> + <value>com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory</value> + </property> +</configuration> +EOF + else + cat <<EOF >${HIVE_HOME}/conf/hive-site.xml <?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> @@ -410,6 +474,7 @@ function init_hive() { </property> </configuration> EOF + fi # resolve jars conflict if [[ ! -d $HIVE_HOME/spark_jar ]]; then @@ -435,14 +500,18 @@ function prepare_spark() { return fi - logging info "Downloading Spark-${SPARK_VERSION} ..." + logging info "Downloading ${SPARK_PACKAGE} ..." ## download spark if [[ -f ${HOME_DIR}/${SPARK_PACKAGE} ]]; then logging warn "${SPARK_PACKAGE} already download, skip download it." else logging warn "Downloading ${SPARK_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${SPARK_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} - # # wget cost lot time + if [[ $? -ne 0 ]]; then + logging error "Downloading ${SPARK_PACKAGE} failed, please check." + exit 1 + fi + # # wget will cost lot time # wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE} fi @@ -528,16 +597,22 @@ function prepare_kylin() { else logging info "Kylin-${KYLIN_VERSION} downloading ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${KYLIN_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${KYLIN_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget https://archive.apache.org/dist/kylin/apache-kylin-${KYLIN_VERSION}/${KYLIN_PACKAGE} fi - if [[ -d ${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} ]]; then + if [[ -d ${KYLIN_HOME} ]]; then logging warn "Kylin package already decompress, skip decompress ..." else logging warn "Kylin package decompressing ..." ### unzip kylin tar file tar -zxf ${KYLIN_PACKAGE} + ### make kylin home directory + sudo mv ${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} ${KYLIN_HOME} fi logging info "Kylin inited ..." @@ -626,6 +701,91 @@ EOF logging info "Kylin is ready ..." } +function prepare_mdx() { + logging info "Preparing MDX ..." + + if [[ -f ${HOME_DIR}/.prepared_mdx ]]; then + logging warn "MDX already prepared ..." + return + fi + + if [[ -f ${HOME_DIR}/${MDX_PACKAGE} ]]; then + logging warn "MDX package already downloaded, skip download it ..." + else + logging info "mdx-kylin-${MDX_VERSION} downloading ..." + aws s3 cp ${PATH_TO_BUCKET}/tar/${MDX_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${MDX_PACKAGE} failed, please check." + exit 1 + fi + fi + + if [[ -d ${MDX_HOME} ]]; then + logging warn "MDX package already decompress, skip decompress ..." + else + logging warn "MDX package decompressing ..." + ### unzip kylin tar file + tar -zxf ${MDX_PACKAGE} + ### make mdx home directory + sudo mv ${HOME_DIR}/${MDX_PACKAGE%*.tar.gz} ${MDX_HOME} + fi + + logging info "MDX inited ..." + touch ${HOME_DIR}/.prepared_mdx + logging info "MDX prepared ..." +} + +function init_mdx() { + if [[ -f ${HOME_DIR}/.inited_mdx ]]; then + logging warn "MDX already inited ..." + return + fi + + if [[ ! -f $MDX_HOME/semantic-mdx/lib/mysql-connector-java-8.0.24.jar ]]; then + aws s3 cp ${PATH_TO_BUCKET}/jars/mysql-connector-java-8.0.24.jar $MDX_HOME/semantic-mdx/lib/ --region ${CURRENT_REGION} + fi + + if [[ ! -f $MDX_HOME/semantic-mdx/lib/kylin-jdbc-4.0.0-SNAPSHOT.jar ]]; then + logging info "Copy jdbc driver from $KYLIN_HOME to $MDX_HOME/semantic-mdx/lib/ ..." + cp -f $KYLIN_HOME/lib/kylin-jdbc-*.jar $MDX_HOME/semantic-mdx/lib/ + fi + + # Encrypt db password for mdx + marker="The encryption string: " + ENCRPTED_PASSWORD=$(${MDX_HOME}/bin/mdx.sh encrypt ${DATABASE_PASSWORD} | tail -n 1 | cut -d: -f2) + logging info "Encrypted Password is: ${ENCRPTED_PASSWORD}, and Original Password is: ${DATABASE_PASSWORD}." + + logging info "Install mysql client ..." + ## install mysql client + sudo yum install -y https://dev.mysql.com/get/mysql57-community-release-el7-11.noarch.rpm + sudo rpm --import https://repo.mysql.com/RPM-GPG-KEY-mysql-2022 + sudo yum install -y mysql-community-client + + logging info "Create Database ${MDX_DATABASE} ..." + sudo mysql -h${DATABASE_HOST} -u${DATABASE_USER} -p${DATABASE_PASSWORD} -e "create database if not exists ${MDX_DATABASE};" + + # Overwrite insight.properties + cat <<EOF >>${MDX_HOME}/conf/insight.properties +insight.kylin.host=$(hostname -I) +insight.kylin.port=7070 +insight.database.type=mysql +insight.database.username=${DATABASE_USER} +insight.database.ip=${DATABASE_HOST} +insight.database.name=${MDX_DATABASE} +insight.database.port=${DATABASE_PORT} +insight.database.password=${ENCRPTED_PASSWORD//[[:blank:]]/} +insight.mdx.cluster.nodes=127.0.0.1:7080 +insight.semantic.datasource-version=2 +insight.semantic.port=7080 +insight.mdx.jvm.xms=-Xms3g +insight.mdx.jvm.xmx=-Xmx3g +EOF + + logging info "MDX inited ..." + touch ${HOME_DIR}/.inited_mdx + logging info "MDX is ready ..." +} + function after_start_kylin() { KYLIN_WEB_LIB_PATH=$KYLIN_HOME/tomcat/webapps/kylin/WEB-INF/lib if [[ ! -f $KYLIN_WEB_LIB_PATH/commons-collections-3.2.2.jar ]]; then @@ -651,13 +811,17 @@ function start_kylin() { } function sample_for_kylin() { - if [[ ${IS_SCALED} == 'false' ]]; then - ${KYLIN_HOME}/bin/sample.sh - if [[ $? -ne 0 ]]; then - logging error "Sample for kylin is failed, please check ..." - else - logging info "Sample for kylin is successful, enjoy it ..." - fi + if [[ $SUPPORT_GLUE == "true" ]]; then + return + fi + + if [[ ${IS_SCALED} == "false" ]]; then + ${KYLIN_HOME}/bin/sample.sh + if [[ $? -ne 0 ]]; then + logging error "Sample for kylin is failed, please check ..." + else + logging info "Sample for kylin is successful, enjoy it ..." + fi else logging info "It is unnecessary to sample data in scaled mode. " fi @@ -667,6 +831,10 @@ function restart_kylin() { ${KYLIN_HOME}/bin/kylin.sh restart } +function start_mdx() { + ${MDX_HOME}/bin/mdx.sh start +} + function prepare_node_exporter() { logging info "Preparing node_exporter ..." if [[ -f ${HOME_DIR}/.prepared_node_exporter ]]; then @@ -735,16 +903,23 @@ function prepare_packages() { prepare_kylin init_kylin + prepare_mdx + init_mdx + touch ${HOME_DIR}/.prepared_packages logging info "All need packages are ready ..." } function start_services_on_kylin() { # special step for compatible jars, details in after_start_kylin - sample_for_kylin - start_kylin - after_start_kylin + if [[ ! -f ${HOME_DIR}/.first_run ]]; then + sample_for_kylin + start_kylin + after_start_kylin + touch ${HOME_DIR}/.first_run + fi restart_kylin + start_mdx } function main() { diff --git a/backup/scripts/prepare-ec2-env-for-spark-master.sh b/backup/scripts/prepare-ec2-env-for-spark-master.sh index 496dab9..5a17d32 100644 --- a/backup/scripts/prepare-ec2-env-for-spark-master.sh +++ b/backup/scripts/prepare-ec2-env-for-spark-master.sh @@ -71,14 +71,13 @@ function help() { --db-password db-password-for-hive-metadata --db-user db-user-for-hive-metadata --db-port db-port-for-hive-metadata - --local-soft whether-to-use-local-cache+soft-affinity" + --local-soft whether-to-use-local-cache+soft-affinity + --hadoop-version hadoop-version-for-cluster + --spark-version spark-version-for-cluster + --hive-version hive-version-for-cluster" exit 0 } -if [[ $# -ne 14 ]]; then - help -fi - while [[ $# != 0 ]]; do if [[ $1 == "--bucket-url" ]]; then # url same as: /xxx/kylin @@ -95,6 +94,14 @@ while [[ $# != 0 ]]; do DATABASE_PORT=$2 elif [[ $1 == "--local-soft" ]]; then LOCAL_CACHE_SOFT_AFFINITY=$2 + elif [[ $1 == "--hadoop-version" ]]; then + HADOOP_VERSION=$2 + elif [[ $1 == "--spark-version" ]]; then + SPARK_VERSION=$2 + elif [[ $1 == "--hive-version" ]]; then + HIVE_VERSION=$2 + elif [[ $1 == "--support-glue" ]]; then + SUPPORT_GLUE=$2 else help fi @@ -106,9 +113,21 @@ done # Prepare Steps ### Parameters for Spark and Kylin #### ${SPARK_VERSION:0:1} get 2 from 2.4.7 -HADOOP_VERSION=3.2.0 -SPARK_VERSION=3.1.1 -HIVE_VERSION=2.3.9 +if [[ -z "$HADOOP_VERSION" ]]; then + HADOOP_VERSION=3.2.0 +fi + +if [[ -z "$SPARK_VERSION" ]]; then + SPARK_VERSION=3.1.1 +fi + +if [[ -z "$HIVE_VERSION" ]]; then + HIVE_VERSION=2.3.9 +fi + +if [[ -z "$SUPPORT_GLUE" ]]; then + SUPPORT_GLUE=false +fi LOCAL_CACHE_DIR=/home/ec2-user/ssd @@ -124,9 +143,13 @@ if [[ $LOCAL_CACHE_SOFT_AFFINITY == "true" ]]; then sudo mkdir -p ${LOCAL_CACHE_DIR}/alluxio-cache-driver sudo chmod -R 777 ${LOCAL_CACHE_DIR}/alluxio-cache-driver fi +fi +if [[ $SUPPORT_GLUE == "true" ]]; then + SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}-aws.tgz +else + SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz fi -SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz HADOOP_PACKAGE=hadoop-${HADOOP_VERSION}.tar.gz HIVE_PACKAGE=apache-hive-${HIVE_VERSION}-bin.tar.gz NODE_EXPORTER_PACKAGE=node_exporter-1.3.1.linux-amd64.tar.gz @@ -245,6 +268,10 @@ function prepare_hadoop() { else logging info "Downloading Hadoop package ${HADOOP_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${HADOOP_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${HADOOP_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/${HADOOP_PACKAGE} fi @@ -315,6 +342,10 @@ function prepare_hive() { else logging info "Downloading ${HIVE_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${HIVE_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${HIVE_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget https://downloads.apache.org/hive/hive-${HIVE_VERSION}/${HIVE_PACKAGE} fi @@ -356,7 +387,19 @@ function init_hive() { return fi - cat <<EOF >${HIVE_HOME}/conf/hive-site.xml + if [[ $SUPPORT_GLUE == "true" ]]; then + cat <<EOF >${HIVE_HOME}/conf/hive-site.xml +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<configuration> + <property> + <name>hive.metastore.client.factory.class</name> + <value>com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory</value> + </property> +</configuration> +EOF + else + cat <<EOF >${HIVE_HOME}/conf/hive-site.xml <?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> @@ -393,6 +436,7 @@ function init_hive() { </property> </configuration> EOF + fi # resolve jars conflict if [[ ! -d $HIVE_HOME/spark_jar ]]; then @@ -425,6 +469,10 @@ function prepare_spark() { else logging warn "Downloading ${SPARK_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${SPARK_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${SPARK_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE} fi diff --git a/backup/scripts/prepare-ec2-env-for-spark-slave.sh b/backup/scripts/prepare-ec2-env-for-spark-slave.sh index 7d871a5..84645f6 100644 --- a/backup/scripts/prepare-ec2-env-for-spark-slave.sh +++ b/backup/scripts/prepare-ec2-env-for-spark-slave.sh @@ -71,14 +71,13 @@ function help() { --region region-for-s3 --waiting-time time-for-start-services --mode cluster-mode-is-product-or-test - --local-soft whether-to-use-local-cache+soft-affinity" + --local-soft whether-to-use-local-cache+soft-affinity + --hadoop-version hadoop-version-for-cluster + --spark-version spark-version-for-cluster + --kylin-version kylin-version-for-cluster" exit 0 } -if [[ $# -ne 14 ]]; then - help -fi - while [[ $# != 0 ]]; do if [[ $1 == "--bucket-url" ]]; then BUCKET_SUFFIX=$2 @@ -94,6 +93,14 @@ while [[ $# != 0 ]]; do WORKER_MODE=$2 elif [[ $1 == "--local-soft" ]]; then LOCAL_CACHE_SOFT_AFFINITY=$2 + elif [[ $1 == "--hadoop-version" ]]; then + HADOOP_VERSION=$2 + elif [[ $1 == "--spark-version" ]]; then + SPARK_VERSION=$2 + elif [[ $1 == "--kylin-version" ]]; then + KYLIN_VERSION=$2 + elif [[ $1 == "--support-glue" ]]; then + SUPPORT_GLUE=$2 else help fi @@ -106,9 +113,21 @@ done ## Parameter ### Parameters for Spark #### ${SPARK_VERSION:0:1} get 2 from 2.4.7 -HADOOP_VERSION=3.2.0 -SPARK_VERSION=3.1.1 -KYLIN_VERSION=4.0.0 +if [[ -z "$HADOOP_VERSION" ]]; then + HADOOP_VERSION=3.2.0 +fi + +if [[ -z "$SPARK_VERSION" ]]; then + SPARK_VERSION=3.1.1 +fi + +if [[ -z "$KYLIN_VERSION" ]]; then + KYLIN_VERSION=4.0.0 +fi + +if [[ -z "$SUPPORT_GLUE" ]]; then + SUPPORT_GLUE=false +fi ### Parameter for JDK 1.8 JDK_PACKAGE=jdk-8u301-linux-x64.tar.gz @@ -137,7 +156,12 @@ else fi fi -SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz +if [[ $SUPPORT_GLUE == "true" ]]; then + SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}-aws.tgz +else + SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION:0:3}.tgz +fi + HADOOP_PACKAGE=hadoop-${HADOOP_VERSION}.tar.gz NODE_EXPORTER_PACKAGE=node_exporter-1.3.1.linux-amd64.tar.gz @@ -151,7 +175,7 @@ function init_env() { JAVA_HOME=/usr/local/java JRE_HOME=${JAVA_HOME}/jre - KYLIN_HOME=${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} + KYLIN_HOME=${HOME_DIR}/kylin SPARK_HOME=${HADOOP_DIR}/spark OUT_LOG=${HOME_DIR}/shell.stdout HADOOP_HOME=${HADOOP_DIR}/hadoop-${HADOOP_VERSION} @@ -245,6 +269,10 @@ function prepare_hadoop() { else logging info "Downloading Hadoop package ${HADOOP_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${HADOOP_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${HADOOP_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/${HADOOP_PACKAGE} fi @@ -276,6 +304,10 @@ function prepare_spark() { else logging warn "Downloading ${SPARK_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${SPARK_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${SPARK_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_PACKAGE} fi @@ -351,7 +383,7 @@ EOF function start_spark_worker() { # TODO: fix hard code for waiting time sleep ${WAITING_TIME} - if [[ $WORKER_MODE == 'product' ]]; then + if [[ $WORKER_MODE == "product" ]]; then # product: # # ec2 instance type is m5.4xlarge which has 16 cores! Set 15 to Spark master. # # Also set 60 GB memory for cluster @@ -380,16 +412,21 @@ function prepare_kylin() { else logging info "Kylin-${KYLIN_VERSION} downloading ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${KYLIN_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${KYLIN_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget https://archive.apache.org/dist/kylin/apache-kylin-${KYLIN_VERSION}/${KYLIN_PACKAGE} fi - if [[ -d ${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} ]]; then + if [[ -d ${KYLIN_HOME} ]]; then logging warn "Kylin package already decompress, skip decompress ..." else logging warn "Kylin package decompressing ..." ### unzip kylin tar file tar -zxf ${KYLIN_PACKAGE} + sudo mv ${HOME_DIR}/${DECOMPRESSED_KYLIN_PACKAGE} ${KYLIN_HOME} fi logging info "Kylin inited ..." diff --git a/backup/scripts/prepare-ec2-env-for-static-services.sh b/backup/scripts/prepare-ec2-env-for-static-services.sh index 02e02dc..752c9b4 100644 --- a/backup/scripts/prepare-ec2-env-for-static-services.sh +++ b/backup/scripts/prepare-ec2-env-for-static-services.sh @@ -147,14 +147,11 @@ function help() { --db-host host-for-hive-to-access-rds --db-user user-for-hive-to-access-rds --db-password password-for-hive-to-access-rds - --db-port port-for-hive-to-access-rds" + --db-port port-for-hive-to-access-rds + --support-glue support-for-glue" exit 0 } -if [[ $# -ne 12 ]]; then - help -fi - while [[ $# != 0 ]]; do if [[ $1 == "--bucket-url" ]]; then # url same as: /xxx/kylin @@ -169,6 +166,8 @@ while [[ $# != 0 ]]; do DATABASE_USER=$2 elif [[ $1 == "--db-port" ]]; then DATABASE_PORT=$2 + elif [[ $1 == "--support-glue" ]]; then + SUPPORT_GLUE=$2 else help fi @@ -176,6 +175,10 @@ while [[ $# != 0 ]]; do shift done +if [[ -z "$SUPPORT_GLUE" ]]; then + SUPPORT_GLUE=false +fi + PATH_TO_BUCKET=s3:/${BUCKET_SUFFIX} CONFIG_PATH_TO_BUCKET=s3a:/${BUCKET_SUFFIX} @@ -411,6 +414,9 @@ EOF } function start_hive_metastore() { + if [[ $SUPPORT_GLUE == "true" ]]; then + return + fi nohup $HIVE_HOME/bin/hive --service metastore >> $HIVE_HOME/logs/hivemetastorelog.log 2>&1 & logging info "Hive was logging in $HIVE_HOME/logs, you can check ..." } @@ -469,14 +475,15 @@ function prepare_docker() { } function start_grafana() { + logging info "Starting docker ..." + start_docker + logging info "Preparing grafana ..." if [[ -f ${HOME_DIR}/.prepared_grafana ]]; then logging warn "Grafana service already installed, check it." return fi - start_docker - if [[ $(sudo docker ps -q -f name=grafana-${GRAFANA_VERSION}) ]]; then logging warn "Grafana-${GRAFANA_VERSION} already running, skip this ..." else @@ -603,7 +610,10 @@ function prepare_packages() { } function start_services_on_other() { - start_hive_metastore + if [[ ! -f ${HOME_DIR}/.first_run ]]; then + start_hive_metastore + touch ${HOME_DIR}/.first_run + fi # start extra monitor service # NOTE: prometheus server will start after all node_exporter on every node started. diff --git a/backup/scripts/prepare-ec2-env-for-zk.sh b/backup/scripts/prepare-ec2-env-for-zk.sh index 03483d4..039ee59 100644 --- a/backup/scripts/prepare-ec2-env-for-zk.sh +++ b/backup/scripts/prepare-ec2-env-for-zk.sh @@ -63,12 +63,41 @@ function logging() { set +e +function help() { + logging warn "Invalid input." + logging warn "Usage: ${BASH_SOURCE[0]} + --bucket-url /path/to/bucket/without/prefix + --region region-for-current-instance + --zk-num current-zookeeper-number + --zookeeper-version zk-version-for-cluster" + exit 0 +} + +while [[ $# != 0 ]]; do + if [[ $1 == "--bucket-url" ]]; then + # url same as: /xxx/kylin + BUCKET_SUFFIX=$2 + elif [[ $1 == "--region" ]]; then + CURRENT_REGION=$2 + elif [[ $1 == "--zk-num" ]]; then + ZK_NUM=$2 + elif [[ $1 == "--zookeeper-version" ]]; then + ZOOKEEPER_VERSION=3.4.13 + else + help + fi + shift + shift +done + # =============== Env Parameters ================= # Prepare Steps ## Parameter ### Parameters for Spark and Kylin #### ${SPARK_VERSION:0:1} get 2 from 2.4.7 -ZOOKEEPER_VERSION=3.4.13 +if [[ -z $ZOOKEEPER_VERSION ]]; then + ZOOKEEPER_VERSION=3.4.13 +fi ### File name ZOOKEEPER_PACKAGE=zookeeper-${ZOOKEEPER_VERSION}.tar.gz @@ -132,33 +161,6 @@ source ~/.bash_profile exec 2>>${OUT_LOG} set -o pipefail # ================ Main Functions ====================== -function help() { - logging warn "Invalid input." - logging warn "Usage: ${BASH_SOURCE[0]} - --bucket-url /path/to/bucket/without/prefix - --region region-for-current-instance - --zk-num current-zookeeper-number" - exit 0 -} - -if [[ $# -ne 6 ]]; then - help -fi - -while [[ $# != 0 ]]; do - if [[ $1 == "--bucket-url" ]]; then - # url same as: /xxx/kylin - BUCKET_SUFFIX=$2 - elif [[ $1 == "--region" ]]; then - CURRENT_REGION=$2 - elif [[ $1 == "--zk-num" ]]; then - ZK_NUM=$2 - else - help - fi - shift - shift -done PATH_TO_BUCKET=s3:/${BUCKET_SUFFIX} @@ -212,6 +214,10 @@ function prepare_zookeeper() { else logging info "Downloading Zookeeper package ${ZOOKEEPER_PACKAGE} ..." aws s3 cp ${PATH_TO_BUCKET}/tar/${ZOOKEEPER_PACKAGE} ${HOME_DIR} --region ${CURRENT_REGION} + if [[ $? -ne 0 ]]; then + logging error "Downloading ${ZOOKEEPER_PACKAGE} failed, please check." + exit 1 + fi # # wget cost lot time # wget http://archive.apache.org/dist/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/${ZOOKEEPER_PACKAGE} fi diff --git a/cloudformation_templates/ec2-cluster-kylin4-template.yaml b/cloudformation_templates/ec2-cluster-kylin4-template.yaml index 5278ab3..57cced5 100644 --- a/cloudformation_templates/ec2-cluster-kylin4-template.yaml +++ b/cloudformation_templates/ec2-cluster-kylin4-template.yaml @@ -135,6 +135,24 @@ Parameters: MinValue: 30 MaxValue: 30 + KylinVersion: + Type: String + Default: 4.0.0 + SparkVersion: + Type: String + Default: 3.1.1 + HadoopVersion: + Type: String + Default: 3.2.0 + HiveVersion: + Type: String + Default: 2.3.9 + MdxVersion: + Type: String + Default: 4.0.2-beta + SupportGlue: + Type: String + Default: false Mappings: AWSRegionArch2AMI: @@ -242,7 +260,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateKylin4ScriptFileName} . --region ${PrivateRegion} - bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} + bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...] echo " Kylin4 is ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateKylin4ScriptFileName: !Ref Kylin4ScriptFileName @@ -255,6 +273,12 @@ Resources: PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity PrivateClusterNum: !Ref ClusterNum PrivateIsScaled: !Ref IsScaled + PrivateKylinVersion: !Ref KylinVersion + PrivateSparkVersion: !Ref SparkVersion + PrivateHiveVersion: !Ref HiveVersion + PrivateHadoopVersion: !Ref HadoopVersion + PrivateMdxVersion: !Ref MdxVersion + PrivateSupportGlue: !Ref SupportGlue Outputs: IdOfInstance: @@ -278,3 +302,6 @@ Outputs: Kylin4ZookeeperHosts: Description: Zookeeper hosts for Kylin 4 Value: !Ref ZookeepersHost + SupportGlue: + Description: is supported glue ? + Value: !Ref SupportGlue diff --git a/cloudformation_templates/ec2-cluster-kylin4.yaml b/cloudformation_templates/ec2-cluster-kylin4.yaml index a186f45..a4eb58b 100644 --- a/cloudformation_templates/ec2-cluster-kylin4.yaml +++ b/cloudformation_templates/ec2-cluster-kylin4.yaml @@ -134,6 +134,25 @@ Parameters: MinValue: 30 MaxValue: 30 + KylinVersion: + Type: String + Default: 4.0.0 + SparkVersion: + Type: String + Default: 3.1.1 + HadoopVersion: + Type: String + Default: 3.2.0 + HiveVersion: + Type: String + Default: 2.3.9 + MdxVersion: + Type: String + Default: 4.0.2-beta + + SupportGlue: + Type: String + Default: false Mappings: AWSRegionArch2AMI: @@ -241,7 +260,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateKylin4ScriptFileName} . --region ${PrivateRegion} - bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} + bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...] echo " Kylin4 is ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateKylin4ScriptFileName: !Ref Kylin4ScriptFileName @@ -254,6 +273,12 @@ Resources: PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity PrivateClusterNum: !Ref ClusterNum PrivateIsScaled: !Ref IsScaled + PrivateKylinVersion: !Ref KylinVersion + PrivateSparkVersion: !Ref SparkVersion + PrivateHiveVersion: !Ref HiveVersion + PrivateHadoopVersion: !Ref HadoopVersion + PrivateMdxVersion: !Ref MdxVersion + PrivateSupportGlue: !Ref SupportGlue Outputs: IdOfInstance: @@ -277,3 +302,6 @@ Outputs: Kylin4ZookeeperHosts: Description: Zookeeper hosts for Kylin 4 Value: !Ref ZookeepersHost + SupportGlue: + Description: is supported glue ? + Value: !Ref SupportGlue diff --git a/cloudformation_templates/ec2-cluster-spark-master.yaml b/cloudformation_templates/ec2-cluster-spark-master.yaml index 418ef12..32fe1fc 100644 --- a/cloudformation_templates/ec2-cluster-spark-master.yaml +++ b/cloudformation_templates/ec2-cluster-spark-master.yaml @@ -116,6 +116,20 @@ Parameters: MinValue: 30 MaxValue: 30 + SparkVersion: + Type: String + Default: 3.1.1 + HadoopVersion: + Type: String + Default: 3.2.0 + HiveVersion: + Type: String + Default: 2.3.9 + + SupportGlue: + Type: String + Default: false + Mappings: AWSRegionArch2AMI: @@ -213,7 +227,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateSparkMasterScriptFileName} . --region ${PrivateRegion} - bash ${PrivateSparkMasterScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --local-soft ${PrivateLocalCacheSoftAffinity} --db-port ${PrivateDbPort} --db-host ${PrivateDbHost} --db-user ${PrivateDbUser} --db-password ${PrivateDbPassword} + bash ${PrivateSparkMasterScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --local-soft ${PrivateLocalCacheSoftAffinity} --db-port ${PrivateDbPort} --db-host ${PrivateDbHost} --db-user ${PrivateDbUser} --db-password ${PrivateDbPassword} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --support-glue ${PrivateSupportGlue} echo " Spark Master is ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateSparkMasterScriptFileName: !Ref SparkMasterScriptFileName @@ -224,6 +238,10 @@ Resources: PrivateDbHost: !Ref DbHost PrivateDbUser: !Ref DbUser PrivateDbPassword: !Ref DbPassword + PrivateSparkVersion: !Ref SparkVersion + PrivateHiveVersion: !Ref HiveVersion + PrivateHadoopVersion: !Ref HadoopVersion + PrivateSupportGlue: !Ref SupportGlue Outputs: IdOfInstance: @@ -245,3 +263,6 @@ Outputs: Value: !Ref SubnetId SparkMasterSecurityGroupIdDependsOnDNode: Value: !Ref SecurityGroupId + SupportGlue: + Description: is supported glue ? + Value: !Ref SupportGlue diff --git a/cloudformation_templates/ec2-cluster-spark-slave-template.yaml b/cloudformation_templates/ec2-cluster-spark-slave-template.yaml index 0faa939..13f8c0b 100644 --- a/cloudformation_templates/ec2-cluster-spark-slave-template.yaml +++ b/cloudformation_templates/ec2-cluster-spark-slave-template.yaml @@ -113,6 +113,18 @@ Parameters: MinValue: 30 MaxValue: 30 + KylinVersion: + Type: String + Default: 4.0.0 + SparkVersion: + Type: String + Default: 3.1.1 + HadoopVersion: + Type: String + Default: 3.2.0 + SupportGlue: + Type: String + Default: false Mappings: AWSRegionArch2AMI: @@ -225,7 +237,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateSlaveScriptFileName} . --region ${PrivateRegion} - bash ${PrivateSlaveScriptFileName} --bucket-url ${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number ${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime} --mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity} + bash ${PrivateSlaveScriptFileName} --bucket-url ${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number ${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime} --mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --kylin-version ${PrivateKylinVersion} --support-glue ${PrivateSupportGlue} - PrivateMasterHost: !Ref SparkMasterNodeHost WorkerNum: !Ref WorkerNum PrivateBucketFullPath: !Ref BucketFullPath @@ -235,6 +247,10 @@ Resources: PrivateWaitingTime: !Ref WaitingTime WorkerMode: !Ref Ec2Mode PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity + PrivateKylinVersion: !Ref KylinVersion + PrivateHadoopVersion: !Ref HadoopVersion + PrivateSparkVersion: !Ref SparkVersion + PrivateSupportGlue: !Ref SupportGlue Outputs: IdOfInstance: @@ -247,3 +263,6 @@ Outputs: Description: the Slave Instance Public IP Value: !GetAtt Ec2InstanceOfSlave.PublicIp Condition: IsAssociatedPublicIp + SupportGlue: + Description: is supported glue ? + Value: !Ref SupportGlue diff --git a/cloudformation_templates/ec2-cluster-spark-slave.yaml b/cloudformation_templates/ec2-cluster-spark-slave.yaml index 2fed70a..49c8773 100644 --- a/cloudformation_templates/ec2-cluster-spark-slave.yaml +++ b/cloudformation_templates/ec2-cluster-spark-slave.yaml @@ -111,7 +111,18 @@ Parameters: Default: 30 MinValue: 30 MaxValue: 30 - + KylinVersion: + Type: String + Default: 4.0.0 + SparkVersion: + Type: String + Default: 3.1.1 + HadoopVersion: + Type: String + Default: 3.2.0 + SupportGlue: + Type: String + Default: false Mappings: AWSRegionArch2AMI: @@ -221,7 +232,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateSlaveScriptFileName} . --region ${PrivateRegion} - bash ${PrivateSlaveScriptFileName} --bucket-url ${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number ${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime} --mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity} + bash ${PrivateSlaveScriptFileName} --bucket-url ${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number ${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime} --mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --kylin-version ${PrivateKylinVersion} --support-glue ${PrivateSupportGlue} - PrivateMasterHost: !Ref SparkMasterNodeHost WorkerNum: 1 PrivateBucketFullPath: !Ref BucketFullPath @@ -231,6 +242,10 @@ Resources: PrivateWaitingTime: !Ref WaitingTime WorkerMode: !Ref Ec2Mode PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity + PrivateKylinVersion: !Ref KylinVersion + PrivateHadoopVersion: !Ref HadoopVersion + PrivateSparkVersion: !Ref SparkVersion + PrivateSupportGlue: !Ref SupportGlue Ec2InstanceOfSlave02: Type: AWS::EC2::Instance DeletionPolicy: Delete @@ -289,7 +304,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateSlaveScriptFileName} . --region ${PrivateRegion} - bash ${PrivateSlaveScriptFileName} --bucket-url ${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number ${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime} --mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity} + bash ${PrivateSlaveScriptFileName} --bucket-url ${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number ${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime} --mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --kylin-version ${PrivateKylinVersion} --support-glue ${PrivateSupportGlue} - PrivateMasterHost: !Ref SparkMasterNodeHost WorkerNum: 2 PrivateBucketFullPath: !Ref BucketFullPath @@ -299,6 +314,10 @@ Resources: PrivateWaitingTime: !Ref WaitingTime WorkerMode: !Ref Ec2Mode PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity + PrivateKylinVersion: !Ref KylinVersion + PrivateHadoopVersion: !Ref HadoopVersion + PrivateSparkVersion: !Ref SparkVersion + PrivateSupportGlue: !Ref SupportGlue Ec2InstanceOfSlave03: Type: AWS::EC2::Instance @@ -358,7 +377,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateSlaveScriptFileName} . --region ${PrivateRegion} - bash ${PrivateSlaveScriptFileName} --bucket-url ${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number ${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime} --mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity} + bash ${PrivateSlaveScriptFileName} --bucket-url ${PrivateBucketPath} --master-host ${PrivateMasterHost} --worker-number ${WorkerNum} --region ${PrivateRegion} --waiting-time ${PrivateWaitingTime} --mode ${WorkerMode} --local-soft ${PrivateLocalCacheSoftAffinity} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --kylin-version ${PrivateKylinVersion} --support-glue ${PrivateSupportGlue} - PrivateMasterHost: !Ref SparkMasterNodeHost WorkerNum: 3 PrivateBucketFullPath: !Ref BucketFullPath @@ -368,6 +387,10 @@ Resources: PrivateWaitingTime: !Ref WaitingTime WorkerMode: !Ref Ec2Mode PrivateLocalCacheSoftAffinity: !Ref LocalCacheSoftAffinity + PrivateKylinVersion: !Ref KylinVersion + PrivateHadoopVersion: !Ref HadoopVersion + PrivateSparkVersion: !Ref SparkVersion + PrivateSupportGlue: !Ref SupportGlue Outputs: @@ -403,3 +426,6 @@ Outputs: Description: Slave03 Instance Public IP Value: !GetAtt Ec2InstanceOfSlave03.PublicIp Condition: IsAssociatedPublicIp + SupportGlue: + Description: is supported glue ? + Value: !Ref SupportGlue diff --git a/cloudformation_templates/ec2-cluster-static-services.yaml b/cloudformation_templates/ec2-cluster-static-services.yaml index 9c214dd..960bed9 100644 --- a/cloudformation_templates/ec2-cluster-static-services.yaml +++ b/cloudformation_templates/ec2-cluster-static-services.yaml @@ -106,6 +106,9 @@ Parameters: Default: gp2 AllowedValues: - gp2 + SupportGlue: + Type: String + Default: false Mappings: AWSRegionArch2AMI: @@ -203,7 +206,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateStaticServicesScriptFileName} . --region ${PrivateRegion} - bash ${PrivateStaticServicesScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPassword} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} + bash ${PrivateStaticServicesScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPassword} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --support-glue ${PrivateSupportGlue} echo " Static Services are ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateStaticServicesScriptFileName: !Ref StaticServicesScriptFileName @@ -213,6 +216,7 @@ Resources: PrivateDbPassword: !Ref DbPassword PrivateDbUser: !Ref DbUser PrivateDbPort: !Ref DbPort + PrivateSupportGlue: !Ref SupportGlue Outputs: # Env parameters @@ -237,3 +241,7 @@ Outputs: Description: StaticServices Public IP Value: !GetAtt Ec2InstanceOfStaticServicesNode.PublicIp Condition: IsAssociatedPublicIp + + SupportGlue: + Description: is supported glue ? + Value: !Ref SupportGlue diff --git a/cloudformation_templates/ec2-cluster-zk.yaml b/cloudformation_templates/ec2-cluster-zk.yaml index fbad62f..8b3fb11 100644 --- a/cloudformation_templates/ec2-cluster-zk.yaml +++ b/cloudformation_templates/ec2-cluster-zk.yaml @@ -92,6 +92,9 @@ Parameters: Default: gp2 AllowedValues: - gp2 + ZookeeperVersion: + Type: String + Default: 3.4.13 Mappings: AWSRegionArch2AMI: @@ -189,13 +192,14 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateZookeeperScriptFileName} . --region ${PrivateRegion} - bash ${PrivateZookeeperScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${privateZkNum} + bash ${PrivateZookeeperScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${PrivateZkNum} --zookeeper-version ${PrivateZkVersion} echo "Zookeeper is ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateZookeeperScriptFileName: !Ref ZookeeperScriptFileName PrivateBucketPath: !Ref BucketPath PrivateRegion: !Ref AWS::Region - privateZkNum: 1 + PrivateZkNum: 1 + PrivateZkVersion: !Ref ZookeeperVersion Ec2InstanceOfZookeeperNode02: Type: AWS::EC2::Instance @@ -246,13 +250,14 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateZookeeperScriptFileName} . --region ${PrivateRegion} - bash ${PrivateZookeeperScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${privateZkNum} + bash ${PrivateZookeeperScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${PrivateZkNum} --zookeeper-version ${PrivateZkVersion} echo "Zookeeper is ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateZookeeperScriptFileName: !Ref ZookeeperScriptFileName PrivateBucketPath: !Ref BucketPath PrivateRegion: !Ref AWS::Region - privateZkNum: 2 + PrivateZkNum: 2 + PrivateZkVersion: !Ref ZookeeperVersion Ec2InstanceOfZookeeperNode03: Type: AWS::EC2::Instance @@ -303,13 +308,14 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateZookeeperScriptFileName} . --region ${PrivateRegion} - bash ${PrivateZookeeperScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${privateZkNum} + bash ${PrivateZookeeperScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --zk-num ${PrivateZkNum} --zookeeper-version ${PrivateZkVersion} echo "Zookeeper is ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateZookeeperScriptFileName: !Ref ZookeeperScriptFileName PrivateBucketPath: !Ref BucketPath PrivateRegion: !Ref AWS::Region - privateZkNum: 3 + PrivateZkNum: 3 + PrivateZkVersion: !Ref ZookeeperVersion Outputs: # Instance 01 parameters diff --git a/cloudformation_templates/ec2-or-emr-vpc.yaml b/cloudformation_templates/ec2-or-emr-vpc.yaml index 3deb667..f1e173d 100644 --- a/cloudformation_templates/ec2-or-emr-vpc.yaml +++ b/cloudformation_templates/ec2-or-emr-vpc.yaml @@ -26,6 +26,7 @@ Parameters: Default: 4.0.0 AllowedValues: - 4.0.0 + - 4.0.2 ClusterType: Type: String Default: ec2 @@ -274,6 +275,11 @@ Resources: FromPort: '4040' ToPort: '4050' CidrIp: !Ref CidrIp + # For MDX + - IpProtocol: tcp + FromPort: '7080' + ToPort: '7080' + CidrIp: !Ref CidrIp DeletionPolicy: Delete Ec2OrEmrBenchMarkSecurityGroupFullTcpIngress: Type: AWS::EC2::SecurityGroupIngress diff --git a/clouds/aws.py b/clouds/aws.py index a0dfaa5..c71ce95 100644 --- a/clouds/aws.py +++ b/clouds/aws.py @@ -97,7 +97,7 @@ class AWS: @property def is_destroy_all(self) -> bool: - return self.config[Params.ALWAYS_DESTROY_ALL.value] is True + return self.config[Params.ALWAYS_DESTROY_VPC_RDS_MONITOR.value] is True def is_target_cluster_ready(self, cluster_num: int) -> bool: if self.is_target_cluster_instances_ready(cluster_num): diff --git a/constant/yaml_files.py b/constant/yaml_files.py index b0492d7..9b73807 100644 --- a/constant/yaml_files.py +++ b/constant/yaml_files.py @@ -40,4 +40,6 @@ class Tar(Enum): NODE = 'node_exporter-{NODE_EXPORTER_VERSION}.linux-amd64.tar.gz' PROMETHEUS = 'prometheus-{PROMETHEUS_VERSION}.linux-amd64.tar.gz' SPARK = 'spark-{SPARK_VERSION}-bin-hadoop{HADOOP_VERSION!s:3.3s}.tgz' + SPARK_FOR_GLUE = 'spark-{SPARK_VERSION}-bin-hadoop{HADOOP_VERSION!s:3.3s}-aws.tgz' ZOOKEEPER = 'zookeeper-{ZOOKEEPER_VERSION}.tar.gz' + MDX = 'mdx-kylin-{MDX_VERSION}.tar.gz' diff --git a/constant/yaml_params.py b/constant/yaml_params.py index d921d32..8482922 100644 --- a/constant/yaml_params.py +++ b/constant/yaml_params.py @@ -21,7 +21,7 @@ from enum import Enum class Params(Enum): # global params ASSOSICATED_PUBLIC_IP = 'ASSOSICATED_PUBLIC_IP' - ALWAYS_DESTROY_ALL = 'ALWAYS_DESTROY_ALL' + ALWAYS_DESTROY_VPC_RDS_MONITOR = 'ALWAYS_DESTROY_VPC_RDS_MONITOR' S3_URI = 'S3_URI' INSTANCE_ID = 'IdOfInstance' CLUSTER_NUM = 'ClusterNum' diff --git a/engine_utils.py b/engine_utils.py index 97a7099..acbc90e 100644 --- a/engine_utils.py +++ b/engine_utils.py @@ -47,20 +47,34 @@ class EngineUtils: hadoop_package = Tar.HADOOP.value.format(HADOOP_VERSION=self.config['HADOOP_VERSION']) node_exporter_package = Tar.NODE.value.format(NODE_EXPORTER_VERSION=self.config['NODE_EXPORTER_VERSION']) prometheus_package = Tar.PROMETHEUS.value.format(PROMETHEUS_VERSION=self.config['PROMETHEUS_VERSION']) - spark_package = Tar.SPARK.value.format(SPARK_VERSION=self.config['SPARK_VERSION'], - HADOOP_VERSION=self.config['HADOOP_VERSION']) + if self.config['SUPPORT_GLUE'] == 'true': + spark_package = Tar.SPARK_FOR_GLUE.value.format( + SPARK_VERSION=self.config['SPARK_VERSION'], + HADOOP_VERSION=self.config['HADOOP_VERSION']) + else: + spark_package = Tar.SPARK.value.format( + SPARK_VERSION=self.config['SPARK_VERSION'], + HADOOP_VERSION=self.config['HADOOP_VERSION']) zookeeper_package = Tar.ZOOKEEPER.value.format(ZOOKEEPER_VERSION=self.config['ZOOKEEPER_VERSION']) - packages = [jdk_package, kylin_package, hive_package, hadoop_package, node_exporter_package, - prometheus_package, spark_package, zookeeper_package] + mdx_package = Tar.MDX.value.format(MDX_VERSION=self.config['MDX_VERSION']) + + packages = [ + jdk_package, kylin_package, hive_package, + hadoop_package, node_exporter_package, + prometheus_package, spark_package, + zookeeper_package, mdx_package] return packages def needed_jars(self) -> List: # FIXME: hard version of jars - jars = [] commons_configuration = 'commons-configuration-1.3.jar' - mysql_connector = 'mysql-connector-java-5.1.40.jar' - jars.append(commons_configuration) - jars.append(mysql_connector) + mysql_driver = 'mysql-connector-java-5.1.40.jar' + mysql_driver_for_mdx = 'mysql-connector-java-8.0.24.jar' + jars = [ + commons_configuration, + mysql_driver, + mysql_driver_for_mdx, + ] if self.config[Config.ENABLE_SOFT_AFFINITY.value] == 'true': kylin_soft_affinity_cache = 'kylin-soft-affinity-cache-4.0.0-SNAPSHOT.jar' alluxio_client = 'alluxio-2.6.1-client.jar' @@ -102,10 +116,12 @@ class EngineUtils: self.aws.after_scale_up(node_type=node_type) elif scale_type == ScaleType.DOWN.value: - self.aws.after_scale_down(node_type=node_type) + if not self.aws.is_destroy_all: + self.aws.after_scale_down(node_type=node_type) self.aws.scale_down(node_type=node_type) - self.aws.restart_prometheus_server() + if not self.aws.is_destroy_all: + self.aws.restart_prometheus_server() def scale_nodes_in_cluster( self, @@ -122,10 +138,12 @@ class EngineUtils: self.aws.scale_up(node_type=node_type, cluster_num=cluster_num, is_destroy=is_destroy) self.aws.after_scale_up(node_type=node_type, cluster_num=cluster_num) else: - self.aws.after_scale_down(node_type=node_type, cluster_num=cluster_num) + if not self.aws.is_destroy_all: + self.aws.after_scale_down(node_type=node_type, cluster_num=cluster_num) self.aws.scale_down(node_type=node_type, cluster_num=cluster_num, is_destroy=is_destroy) - self.aws.restart_prometheus_server() + if not self.aws.is_destroy_all: + self.aws.restart_prometheus_server() def prepare_for_cluster(self) -> None: # create vpc, rds and monitor node for whole cluster @@ -154,10 +172,11 @@ class EngineUtils: scale_type=ScaleType.DOWN.value, node_type=NodeType.SPARK_WORKER.value, cluster_num=num, is_destroy=True) - - self.aws.after_destroy_clusters(cluster_nums=cluster_nums) + if not self.aws.is_destroy_all: + self.aws.after_destroy_clusters(cluster_nums=cluster_nums) self.aws.destroy_clusters(cluster_nums=cluster_nums) - self.aws.restart_prometheus_server() + if not self.aws.is_destroy_all: + self.aws.restart_prometheus_server() def destroy_cluster(self, cluster_num: int) -> None: self.scale_nodes_in_cluster( diff --git a/instances/aws_instance.py b/instances/aws_instance.py index 291bfc2..c256fa7 100644 --- a/instances/aws_instance.py +++ b/instances/aws_instance.py @@ -139,6 +139,10 @@ class AWSInstance: return self.cf_client.get_waiter('stack_exists') @property + def db_available_waiter(self): + return self.rds_client.get_waiter('db_instance_available') + + @property def db_port(self) -> str: return self.config[Config.DB_PORT.value] @@ -391,12 +395,7 @@ class AWSInstance: return db_instances[0] def is_rds_exists(self) -> bool: - try: - self.rds_client.describe_db_instances(DBInstanceIdentifier=self.db_identifier) - except self.rds_client.exceptions.DBInstanceNotFoundFault as ex: - logger.warning(f'DB {self.db_identifier} is not found.') - return False - return True + return self.is_db_available(self.db_identifier) def create_rds_stack(self) -> Optional[Dict]: if self.is_stack_complete(self.rds_stack_name): @@ -414,6 +413,8 @@ class AWSInstance: file_path=self.path_of_rds_stack, params=params, ) + # make sure that rds stack will create successfully + assert self.is_stack_complete(self.rds_stack_name) return resp def terminate_rds_stack(self) -> Optional[Dict]: @@ -1424,7 +1425,6 @@ class AWSInstance: return True - def update_basic_params(self, params: Dict) -> Dict: params[Params.SUBNET_ID.value] = self.get_subnet_id() params[Params.SECURITY_GROUP.value] = self.get_security_group_id() @@ -1846,7 +1846,7 @@ class AWSInstance: ) if not deleted_cost_stacks: return False - if not self.config['ALWAYS_DESTROY_ALL'] \ + if not self.config['ALWAYS_DESTROY_VPC_RDS_MONITOR'] \ or self.is_stack_deleted_complete(self.vpc_stack_name): return True return False @@ -2099,6 +2099,25 @@ class AWSInstance: raise Exception(f'Current stack: {stack_name} is create failed, please check.') return False + def is_db_available(self, db_name: str) -> bool: + if self._db_available(db_name): + return True + return False + + def _db_available(self, db_name: str) -> bool: + try: + self.db_available_waiter.wait( + DBInstanceIdentifier=db_name, + MaxRecords=60, + WaiterConfig={ + 'Delay': 30, + 'MaxAttempts': 120 + } + ) + except WaiterError as wx: + return False + return True + def _validate_spark_worker_scale(self, stack_name: str) -> None: if stack_name not in self.scaled_spark_workers_stacks: msg = f'{stack_name} not in scaled list, please check.' @@ -2135,7 +2154,7 @@ class AWSInstance: self.create_complete_waiter.wait( StackName=stack_name, WaiterConfig={ - 'Delay': 30, + 'Delay': 60, 'MaxAttempts': 120 } ) diff --git a/kylin_configs.yaml b/kylin_configs.yaml index 161cf76..658bcec 100644 --- a/kylin_configs.yaml +++ b/kylin_configs.yaml @@ -46,18 +46,29 @@ KeyName: &security_key ${KEY_PAIR} # Required CIDR_IP: ${Cidr Ip} +# Support for Glue on AWS +# +# Description: +# There is a limitation for supporting glue on AWS which needed a special package of spark by AWS. +# Note: +# If you set `SUPPORT_GLUE` to be `true`, then please make sure that you using Kylin only support `Job` mode, not `Query` as same as `All`. +# Because `All` will support `Query`. So you need to change the `kylin.server.mode` in `kylin.properties` to be `Job` or `All`. +# If you set `All` mode for Kylin, there will be a error when you query any sql. +SUPPORT_GLUE: &SUPPORT_GLUE 'false' + # ============ AWS Configs End ============ # ============ Related Version of Services ============ # Related Version of Services, Current packages are compatible. # Note: Current support these versions, don't modify them. -KYLIN_VERSION: &KYLIN_VERSION '4.0.0' -HIVE_VERSION: '2.3.9' -HADOOP_VERSION: '3.2.0' +KYLIN_VERSION: &KYLIN_VERSION '4.0.2' +HIVE_VERSION: &HIVE_VERSION '2.3.9' +HADOOP_VERSION: &HADOOP_VERSION '3.2.0' NODE_EXPORTER_VERSION: '1.3.1' PROMETHEUS_VERSION: '2.31.1' -SPARK_VERSION: '3.1.1' -ZOOKEEPER_VERSION: '3.4.13' +SPARK_VERSION: &SPARK_VERSION '3.1.1' +ZOOKEEPER_VERSION: &ZOOKEEPER_VERSION '3.4.13' +MDX_VERSION: &MDX_VERSION '4.0.2-beta' # ============ Related Version of Services End============ # ============ Debug Configs ============ @@ -75,8 +86,8 @@ DEPLOY_PLATFORM: &platform ec2 # ============ Tool Configs ============ ## Dangerous !!! -## Optional: destroy all will delete rds and the vpc and monitor node, please be careful. -ALWAYS_DESTROY_ALL: false +## Optional: destroy all will delete rds and the vpc and monitor node, and clean all resources, please be careful. +ALWAYS_DESTROY_VPC_RDS_MONITOR: false ## Open public Ip on Instances ASSOSICATED_PUBLIC_IP: &associated_public_ip 'true' @@ -166,6 +177,8 @@ EC2_STATIC_SERVICES_PARAMS: EMREc2KeyName: *security_key AssociatedPublicIp: *associated_public_ip + SupportGlue: *SUPPORT_GLUE + DbPort: *DbPort DbUser: *DbUser DbPassword: *DbPassword @@ -184,6 +197,7 @@ EC2_ZOOKEEPERS_PARAMS: SecurityGroupId: EMREc2KeyName: *security_key AssociatedPublicIp: *associated_public_ip + ZookeeperVersion: *ZOOKEEPER_VERSION ZookeeperScriptFileName: prepare-ec2-env-for-zk.sh Ec2Mode: test @@ -203,6 +217,10 @@ EC2_SPARK_MASTER_PARAMS: DbPort: *DbPort DbUser: *DbUser DbPassword: *DbPassword + SparkVersion: *SPARK_VERSION + HadoopVersion: *HADOOP_VERSION + HiveVersion: *HIVE_VERSION + SupportGlue: *SUPPORT_GLUE AssociatedPublicIp: *associated_public_ip SparkMasterScriptFileName: prepare-ec2-env-for-spark-master.sh @@ -222,6 +240,13 @@ EC2_KYLIN4_PARAMS: SubnetId: SecurityGroupId: + KylinVersion: *KYLIN_VERSION + SparkVersion: *SPARK_VERSION + HadoopVersion: *HADOOP_VERSION + HiveVersion: *HIVE_VERSION + MdxVersion: *MDX_VERSION + SupportGlue: *SUPPORT_GLUE + AssociatedPublicIp: *associated_public_ip DbPort: *DbPort @@ -248,6 +273,10 @@ EC2_SPARK_WORKER_PARAMS: EMREc2KeyName: *security_key # set 'true' for test AssociatedPublicIp: *associated_public_ip + KylinVersion: *KYLIN_VERSION + SparkVersion: *SPARK_VERSION + HadoopVersion: *HADOOP_VERSION + SupportGlue: *SUPPORT_GLUE SlaveScriptFileName: prepare-ec2-env-for-spark-slave.sh Ec2Mode: test @@ -266,6 +295,13 @@ EC2_KYLIN4_SCALE_PARAMS: SubnetId: SecurityGroupId: + KylinVersion: *KYLIN_VERSION + SparkVersion: *SPARK_VERSION + HadoopVersion: *HADOOP_VERSION + HiveVersion: *HIVE_VERSION + MdxVersion: *MDX_VERSION + SupportGlue: *SUPPORT_GLUE + AssociatedPublicIp: *associated_public_ip DbPort: *DbPort @@ -290,6 +326,12 @@ EC2_SPARK_SCALE_SLAVE_PARAMS: SubnetId: SecurityGroupId: WorkerNum: + + KylinVersion: *KYLIN_VERSION + SparkVersion: *SPARK_VERSION + HadoopVersion: *HADOOP_VERSION + SupportGlue: *SUPPORT_GLUE + WaitingTime: '50' EMREc2KeyName: *security_key # set 'true' for test diff --git a/readme/commands.md b/readme/commands.md index ab55e32..49b80dc 100644 --- a/readme/commands.md +++ b/readme/commands.md @@ -47,7 +47,7 @@ $ python deploy.py --type deploy --cluster all > Note: > -> Destroy all clusters will not delete vpc, rds, and monitor node. So if user doesn't want to hold the env, please set the `ALWAYS_DESTROY_ALL` to be `'true'`. +> Destroy all clusters will not delete vpc, rds, and monitor node. So if user doesn't want to hold the env, please set the `ALWAYS_DESTROY_VPC_RDS_MONITOR` to be `'true'`. - Destroy a default cluster diff --git a/readme/quick_start.md b/readme/quick_start.md index 3c44d55..4f88b6a 100644 --- a/readme/quick_start.md +++ b/readme/quick_start.md @@ -75,5 +75,5 @@ $ python deploy.py --type destroy > Note: > > 1. If you want to check about a quick start for multiple clusters, please > referer to a [quick start for multiple > clusters](./quick_start_for_multiple_clusters.md). -> 2. **Current destroy operation will remain some stack which contains `RDS` and so on**. So if user want to destroy clearly, please modify the `ALWAYS_DESTROY_ALL` in `kylin_configs.yml` to be `true` and re-execute `destroy` command. +> 2. **Current destroy operation will remain some stack which contains `RDS` and so on**. So if user want to destroy clearly, please modify the `ALWAYS_DESTROY_VPC_RDS_MONITOR` in `kylin_configs.yml` to be `true` and re-execute `destroy` command. diff --git a/utils.py b/utils.py index cbfff62..1f8b0cb 100644 --- a/utils.py +++ b/utils.py @@ -43,6 +43,7 @@ class Utils: FILES_SIZE_IN_BYTES = { 'jdk-8u301-linux-x64.tar.gz': 145520298, 'apache-kylin-4.0.0-bin-spark3.tar.gz': 198037626, + 'apache-kylin-4.0.2-bin-spark3.tar.gz': 198051064, 'apache-hive-2.3.9-bin.tar.gz': 286170958, 'hadoop-3.2.0.tar.gz': 345625475, 'node_exporter-1.3.1.linux-amd64.tar.gz': 9033415, @@ -51,6 +52,9 @@ class Utils: 'zookeeper-3.4.13.tar.gz': 37191810, 'commons-configuration-1.3.jar': 232915, 'mysql-connector-java-5.1.40.jar': 990924, + 'mysql-connector-java-8.0.24.jar': 2428323, + 'mdx-kylin-4.0.2-beta.tar.gz': 81935515, + 'spark-3.1.1-bin-hadoop3.2-aws.tgz': 531069078, } @staticmethod