This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin4_on_cloud in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin4_on_cloud by this push: new 85e9f5a Fix 0314 (#1833) 85e9f5a is described below commit 85e9f5a7970465ba15ad578b8ac693770f45c34b Author: Tengting Xu <34978943+muk...@users.noreply.github.com> AuthorDate: Mon Mar 14 11:47:30 2022 +0800 Fix 0314 (#1833) * # switch on enable MDX * # smaller default resources for query and job engine * # update document when using glue --- backup/properties/default/kylin.properties | 8 ++++---- .../properties/templates/kylin.properties.template | 8 ++++---- backup/scripts/prepare-ec2-env-for-kylin4.sh | 23 ++++++++++++++++++---- .../ec2-cluster-kylin4-template.yaml | 9 ++++++++- cloudformation_templates/ec2-cluster-kylin4.yaml | 10 +++++++++- kylin_configs.yaml | 5 +++++ readme/prerequisites.md | 2 +- 7 files changed, 50 insertions(+), 15 deletions(-) diff --git a/backup/properties/default/kylin.properties b/backup/properties/default/kylin.properties index ef5ef94..aecaa8f 100644 --- a/backup/properties/default/kylin.properties +++ b/backup/properties/default/kylin.properties @@ -27,9 +27,9 @@ kylin.engine.spark-conf.spark.history.fs.logDirectory=s3a:/{{ S3_BUCKET_PATH }}/ kylin.engine.spark-conf.spark.master=spark://{{ SPARK_MASTER }}:7077 kylin.cube.cubeplanner.enabled=false -kylin.engine.spark-conf.spark.executor.cores=3 -kylin.engine.spark-conf.spark.executor.instances=20 -kylin.engine.spark-conf.spark.executor.memory=12GB +kylin.engine.spark-conf.spark.executor.cores=2 +kylin.engine.spark-conf.spark.executor.instances=4 +kylin.engine.spark-conf.spark.executor.memory=7GB kylin.engine.spark-conf.spark.executor.memoryOverhead=1GB ### support prometheus @@ -47,7 +47,7 @@ kylin.query.spark-conf.spark.master=spark://{{ SPARK_MASTER }}:7077 kylin.query.spark-conf.spark.driver.cores=1 kylin.query.spark-conf.spark.driver.memory=8GB kylin.query.spark-conf.spark.driver.memoryOverhead=1G -kylin.query.spark-conf.spark.executor.instances=30 +kylin.query.spark-conf.spark.executor.instances=2 kylin.query.spark-conf.spark.executor.cores=2 kylin.query.spark-conf.spark.executor.memory=7G kylin.query.spark-conf.spark.executor.memoryOverhead=1G diff --git a/backup/properties/templates/kylin.properties.template b/backup/properties/templates/kylin.properties.template index ef5ef94..aecaa8f 100644 --- a/backup/properties/templates/kylin.properties.template +++ b/backup/properties/templates/kylin.properties.template @@ -27,9 +27,9 @@ kylin.engine.spark-conf.spark.history.fs.logDirectory=s3a:/{{ S3_BUCKET_PATH }}/ kylin.engine.spark-conf.spark.master=spark://{{ SPARK_MASTER }}:7077 kylin.cube.cubeplanner.enabled=false -kylin.engine.spark-conf.spark.executor.cores=3 -kylin.engine.spark-conf.spark.executor.instances=20 -kylin.engine.spark-conf.spark.executor.memory=12GB +kylin.engine.spark-conf.spark.executor.cores=2 +kylin.engine.spark-conf.spark.executor.instances=4 +kylin.engine.spark-conf.spark.executor.memory=7GB kylin.engine.spark-conf.spark.executor.memoryOverhead=1GB ### support prometheus @@ -47,7 +47,7 @@ kylin.query.spark-conf.spark.master=spark://{{ SPARK_MASTER }}:7077 kylin.query.spark-conf.spark.driver.cores=1 kylin.query.spark-conf.spark.driver.memory=8GB kylin.query.spark-conf.spark.driver.memoryOverhead=1G -kylin.query.spark-conf.spark.executor.instances=30 +kylin.query.spark-conf.spark.executor.instances=2 kylin.query.spark-conf.spark.executor.cores=2 kylin.query.spark-conf.spark.executor.memory=7G kylin.query.spark-conf.spark.executor.memoryOverhead=1G diff --git a/backup/scripts/prepare-ec2-env-for-kylin4.sh b/backup/scripts/prepare-ec2-env-for-kylin4.sh index cdad91d..bfdfdfa 100644 --- a/backup/scripts/prepare-ec2-env-for-kylin4.sh +++ b/backup/scripts/prepare-ec2-env-for-kylin4.sh @@ -114,6 +114,8 @@ while [[ $# != 0 ]]; do MDX_VERSION=$2 elif [[ $1 == "--support-glue" ]]; then SUPPORT_GLUE=$2 + elif [[ $1 == "--enable-mdx" ]]; then + ENABLE_MDX=$2 else help fi @@ -153,6 +155,10 @@ if [[ -z "$MDX_DATABASE" ]]; then MDX_DATABASE=kylin_mdx fi +if [[ -z "$ENABLE_MDX" ]]; then + ENABLE_MDX=false +fi + LOCAL_CACHE_DIR=/home/ec2-user/ssd ### File name @@ -426,7 +432,7 @@ function init_hive() { fi if [[ $SUPPORT_GLUE == "true" ]]; then - cat <<EOF >${HIVE_HOME}/conf/hive-site.xml + cat <<EOF >${HIVE_HOME}/conf/hive-site.xml <?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> @@ -661,7 +667,7 @@ function init_kylin() { aws s3 cp ${PATH_TO_BUCKET}/properties/${CLUSTER_NUM}/kylin.properties ${KYLIN_HOME}/conf/kylin.properties --region ${CURRENT_REGION} if [[ ${LOCAL_CACHE_SOFT_AFFINITY} == "true" ]]; then - cat <<EOF >> ${KYLIN_HOME}/conf/kylin.properties + cat <<EOF >>${KYLIN_HOME}/conf/kylin.properties kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug -Dkylin.hdfs.working.dir=\${kylin.env.hdfs-working-dir} -Dkylin.metadata.identifier=\${kylin.metadata.url.identifier} -Dkylin.spark.category=sparder -Dkylin.spark.identifier={{APP_ID}} -Dalluxio.user.client.cache.dir=${LOCAL_CACHE_DIR}/alluxio-cache-{{APP_ID}}-{{EXECUTOR_ID}} kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current -Dalluxio.user.client.cache.dir=${LOCAL_CACHE_DIR}/alluxio-cache-driver @@ -702,6 +708,9 @@ EOF } function prepare_mdx() { + if [[ ${ENABLE_MDX} == "false" ]]; then + return + fi logging info "Preparing MDX ..." if [[ -f ${HOME_DIR}/.prepared_mdx ]]; then @@ -736,6 +745,9 @@ function prepare_mdx() { } function init_mdx() { + if [[ ${ENABLE_MDX} == "false" ]]; then + return + fi if [[ -f ${HOME_DIR}/.inited_mdx ]]; then logging warn "MDX already inited ..." return @@ -745,7 +757,7 @@ function init_mdx() { aws s3 cp ${PATH_TO_BUCKET}/jars/mysql-connector-java-8.0.24.jar $MDX_HOME/semantic-mdx/lib/ --region ${CURRENT_REGION} fi - if [[ ! -f $MDX_HOME/semantic-mdx/lib/kylin-jdbc-4.0.0-SNAPSHOT.jar ]]; then + if [[ ! -f $MDX_HOME/semantic-mdx/lib/kylin-jdbc-4.0.0-SNAPSHOT.jar ]]; then logging info "Copy jdbc driver from $KYLIN_HOME to $MDX_HOME/semantic-mdx/lib/ ..." cp -f $KYLIN_HOME/lib/kylin-jdbc-*.jar $MDX_HOME/semantic-mdx/lib/ fi @@ -812,7 +824,7 @@ function start_kylin() { function sample_for_kylin() { if [[ $SUPPORT_GLUE == "true" ]]; then - return + return fi if [[ ${IS_SCALED} == "false" ]]; then @@ -832,6 +844,9 @@ function restart_kylin() { } function start_mdx() { + if [[ ${ENABLE_MDX} == "false" ]]; then + return + fi ${MDX_HOME}/bin/mdx.sh start } diff --git a/cloudformation_templates/ec2-cluster-kylin4-template.yaml b/cloudformation_templates/ec2-cluster-kylin4-template.yaml index 57cced5..5b14a18 100644 --- a/cloudformation_templates/ec2-cluster-kylin4-template.yaml +++ b/cloudformation_templates/ec2-cluster-kylin4-template.yaml @@ -153,6 +153,9 @@ Parameters: SupportGlue: Type: String Default: false + EnableMDX: + Type: String + Default: false Mappings: AWSRegionArch2AMI: @@ -260,7 +263,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateKylin4ScriptFileName} . --region ${PrivateRegion} - bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...] + bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...] echo " Kylin4 is ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateKylin4ScriptFileName: !Ref Kylin4ScriptFileName @@ -279,6 +282,7 @@ Resources: PrivateHadoopVersion: !Ref HadoopVersion PrivateMdxVersion: !Ref MdxVersion PrivateSupportGlue: !Ref SupportGlue + PrivateEnableMDX: !Ref EnableMDX Outputs: IdOfInstance: @@ -305,3 +309,6 @@ Outputs: SupportGlue: Description: is supported glue ? Value: !Ref SupportGlue + EnableMDX: + Description: mdx is enabled? + Value: !Ref EnableMDX diff --git a/cloudformation_templates/ec2-cluster-kylin4.yaml b/cloudformation_templates/ec2-cluster-kylin4.yaml index a4eb58b..e02fadf 100644 --- a/cloudformation_templates/ec2-cluster-kylin4.yaml +++ b/cloudformation_templates/ec2-cluster-kylin4.yaml @@ -154,6 +154,10 @@ Parameters: Type: String Default: false + EnableMDX: + Type: String + Default: false + Mappings: AWSRegionArch2AMI: cn-north-1: @@ -260,7 +264,7 @@ Resources: #!/bin/bash -xe cd /home/ec2-user aws s3 cp ${PrivateBucketFullPath}/scripts/${PrivateKylin4ScriptFileName} . --region ${PrivateRegion} - bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...] + bash ${PrivateKylin4ScriptFileName} --bucket-url ${PrivateBucketPath} --region ${PrivateRegion} --db-host ${PrivateDbHost} --db-password ${PrivateDbPass} --db-user ${PrivateDbUser} --db-port ${PrivateDbPort} --local-soft ${PrivateLocalCacheSoftAffinity} --cluster-num ${PrivateClusterNum} --is-scaled ${PrivateIsScaled} --hadoop-version ${PrivateHadoopVersion} --spark-version ${PrivateSparkVersion} --hive-version ${PrivateHiveVersion} --kylin-version ${PrivateKylinVersion} -- [...] echo " Kylin4 is ready ..." - PrivateBucketFullPath: !Ref BucketFullPath PrivateKylin4ScriptFileName: !Ref Kylin4ScriptFileName @@ -279,6 +283,7 @@ Resources: PrivateHadoopVersion: !Ref HadoopVersion PrivateMdxVersion: !Ref MdxVersion PrivateSupportGlue: !Ref SupportGlue + PrivateEnableMDX: !Ref EnableMDX Outputs: IdOfInstance: @@ -305,3 +310,6 @@ Outputs: SupportGlue: Description: is supported glue ? Value: !Ref SupportGlue + EnableMDX: + Description: mdx is enabled? + Value: !Ref EnableMDX diff --git a/kylin_configs.yaml b/kylin_configs.yaml index 658bcec..d924459 100644 --- a/kylin_configs.yaml +++ b/kylin_configs.yaml @@ -56,6 +56,9 @@ CIDR_IP: ${Cidr Ip} # If you set `All` mode for Kylin, there will be a error when you query any sql. SUPPORT_GLUE: &SUPPORT_GLUE 'false' +# Enable using MDX +ENABLE_MDX: &ENABLE_MDX 'false' + # ============ AWS Configs End ============ # ============ Related Version of Services ============ @@ -246,6 +249,7 @@ EC2_KYLIN4_PARAMS: HiveVersion: *HIVE_VERSION MdxVersion: *MDX_VERSION SupportGlue: *SUPPORT_GLUE + EnableMDX: *ENABLE_MDX AssociatedPublicIp: *associated_public_ip @@ -301,6 +305,7 @@ EC2_KYLIN4_SCALE_PARAMS: HiveVersion: *HIVE_VERSION MdxVersion: *MDX_VERSION SupportGlue: *SUPPORT_GLUE + EnableMDX: *ENABLE_MDX AssociatedPublicIp: *associated_public_ip diff --git a/readme/prerequisites.md b/readme/prerequisites.md index 361077d..2a6497d 100644 --- a/readme/prerequisites.md +++ b/readme/prerequisites.md @@ -14,7 +14,7 @@ git clone https://github.com/apache/kylin.git && cd kylin && git checkout kylin4 > Note: > -> `IAM` role must have the access which contains `AmazonEC2RoleforSSM`, `AmazonSSMFullAccess,` and `AmazonSSMManagedInstanceCore`. +> `IAM` role must have the access which contains `AmazonEC2RoleforSSM`, `AmazonSSMFullAccess,` and `AmazonSSMManagedInstanceCore`, and `AWSGlueConsoleFullAccess` (this role is needed if you want to use glue). > > This `IAM` Role will be used to initialize every ec2 instances which is > for creating a kylin4 cluster on AWS. And it will configure in `Initialize > Env of Local Machine` part.