This is an automated email from the ASF dual-hosted git repository. zhangzc pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push: new e0de524 KYLIN-5019 Avoid building global dictionary from all data of fact table every time (#1720) e0de524 is described below commit e0de52432f3d9e253ac3a581f9f8e389e67afc86 Author: zhengshengjun <74281684+zhengsheng...@users.noreply.github.com> AuthorDate: Wed Aug 11 09:53:06 2021 +0800 KYLIN-5019 Avoid building global dictionary from all data of fact table every time (#1720) --- .../org/apache/kylin/engine/spark/builder/CreateFlatTable.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala index 3bfd519..d266b9a 100644 --- a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala +++ b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala @@ -47,6 +47,7 @@ class CreateFlatTable(val seg: SegmentInfo, var rootFactDataset = generateTableDataset(seg.factTable, ccCols.toSeq, ss, seg.project) logInfo(s"Create flattable need join lookup tables $needJoin, need encode cols $needEncode") + rootFactDataset = applyPartitionCondition(seg, rootFactDataset) (needJoin, needEncode) match { case (true, true) => @@ -148,13 +149,18 @@ object CreateFlatTable extends Logging { private def applyFilterCondition(desc: SegmentInfo, ds: Dataset[Row]): Dataset[Row] = { var afterFilter = ds - if (StringUtils.isNotBlank(desc.filterCondition)) { val afterConvertCondition = desc.filterCondition logInfo(s"Filter condition is $afterConvertCondition") afterFilter = afterFilter.where(afterConvertCondition) } + afterFilter + } + + private def applyPartitionCondition(desc: SegmentInfo, ds: Dataset[Row]): Dataset[Row] = { + var afterFilter = ds if (StringUtils.isNotBlank(desc.partitionExp)) { + logInfo(s"Partition Filter condition is ${desc.partitionExp}") afterFilter = afterFilter.where(desc.partitionExp) } afterFilter