This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin-on-parquet-v2 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit bc94f4978bc3009a5ad7853c2c6f86c8f2f1c39d Author: XiaoxiangYu <x...@apache.org> AuthorDate: Tue Sep 8 18:19:49 2020 +0800 KYLIN-4660 Refine kylin-default.properties --- build/bin/download-spark.sh | 2 +- .../src/main/resources/kylin-defaults.properties | 31 +++++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/build/bin/download-spark.sh b/build/bin/download-spark.sh index 906a23a..03107c4 100755 --- a/build/bin/download-spark.sh +++ b/build/bin/download-spark.sh @@ -57,7 +57,7 @@ unalias md5cmd echo "Start to decompress package" tar -zxvf spark-${spark_version}-bin-hadoop2.7.tgz || { exit 1; } -mv spark-${spark_version}-bin-hadoop2.7.tgz spark +mv spark-${spark_version}-bin-hadoop2.7 spark # Remove unused components in Spark rm -rf spark/lib/spark-examples-* diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties index 4eb2f17..2dfc5f6 100644 --- a/core-common/src/main/resources/kylin-defaults.properties +++ b/core-common/src/main/resources/kylin-defaults.properties @@ -17,7 +17,8 @@ ### METADATA | ENV ### -# The metadata store has two implementations(RDBMS/HBase), while RDBMS is recommended +# The metadata store has two implementations(RDBMS/HBase), while RDBMS is recommended in Kylin 4.X +# Please refer to https://cwiki.apache.org/confluence/display/KYLIN/How+to+use+HBase+metastore+in+Kylin+4.0 if you prefer HBase kylin.metadata.url=kylin_metadata@jdbc,url=jdbc:mysql://localhost:3306/kylin,username=XXXX,password=XXXXXX,maxActive=10,maxIdle=10 # metadata cache sync retry times @@ -91,6 +92,15 @@ kylin.web.default-time-filter=1 # When user deploy kylin on AWS EMR and Glue is used as external metadata, use gluecatalog instead kylin.source.hive.metadata-type=hcatalog +# Hive client, valid value [cli, beeline] +kylin.source.hive.client=cli + +# Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH +kylin.source.hive.beeline-shell=beeline + +# Hive database name for putting the intermediate flat tables +kylin.source.hive.database-for-flat-table=default + ### STORAGE ### # The storage for final cube file in hbase @@ -225,7 +235,7 @@ kylin.security.saml.context-server-name=hostname kylin.security.saml.context-server-port=443 kylin.security.saml.context-path=/kylin -### SPARK BUILD/MERGE ENGINE CONFIGS ### +### SPARK BUILD ENGINE CONFIGS ### # Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit # This must contain site xmls of core, yarn, hive, and hbase in one folder @@ -257,20 +267,27 @@ kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false #kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current #kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current -### SPARK QUERY ENGINE CONFIGS ### +### SPARK QUERY ENGINE CONFIGS (a.k.a. Sparder Context) ### +# Enlarge cores and memory to improve query performance in production env, please check https://cwiki.apache.org/confluence/display/KYLIN/User+Manual+4.X + kylin.query.spark-conf.spark.master=yarn #kylin.query.spark-conf.spark.submit.deployMode=client kylin.query.spark-conf.spark.driver.cores=1 kylin.query.spark-conf.spark.driver.memory=4G kylin.query.spark-conf.spark.driver.memoryOverhead=1G -kylin.query.spark-conf.spark.executor.cores=5 -kylin.query.spark-conf.spark.executor.instances=4 -kylin.query.spark-conf.spark.executor.memory=20G -kylin.query.spark-conf.spark.executor.memoryOverhead=2G +kylin.query.spark-conf.spark.executor.cores=1 +kylin.query.spark-conf.spark.executor.instances=1 +kylin.query.spark-conf.spark.executor.memory=4G +kylin.query.spark-conf.spark.executor.memoryOverhead=1G kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializer #kylin.query.spark-conf.spark.sql.shuffle.partitions=40 #kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* +# uncomment for HDP +#kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current +#kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current +#kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current + ### QUERY PUSH DOWN ### #kylin.query.pushdown.runner-class-name=org.apache.kylin.query.pushdown.PushDownRunnerSparkImpl