This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit dd46387c394a94391525d18c85cbf9bdabfc6b23 Author: Pengfei Zhan <dethr...@gmail.com> AuthorDate: Mon Apr 3 16:18:36 2023 +0800 KYLIN-5632 move index matchers to ChooserContext --- .../kylin/metadata/cube/cuboid/ChooserContext.java | 51 +++++++++- .../kylin/metadata/cube/cuboid/IndexMatcher.java | 35 ++++--- .../metadata/cube/cuboid/NLayoutCandidate.java | 11 +-- .../metadata/cube/cuboid/NQueryLayoutChooser.java | 108 ++++++++------------- .../metadata/cube/cuboid/TableIndexMatcher.java | 17 +--- .../cube/model/NDataflowCapabilityChecker.java | 1 - .../metadata/realization/CapabilityResult.java | 9 ++ 7 files changed, 126 insertions(+), 106 deletions(-) diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/ChooserContext.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/ChooserContext.java index 5b09b5917e..454cc3bc2f 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/ChooserContext.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/ChooserContext.java @@ -25,19 +25,25 @@ import java.util.stream.Collectors; import org.apache.commons.collections.CollectionUtils; import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.guava30.shaded.common.collect.ImmutableMultimap; +import org.apache.kylin.guava30.shaded.common.collect.Lists; +import org.apache.kylin.guava30.shaded.common.collect.Maps; +import org.apache.kylin.metadata.cube.model.NDataflow; +import org.apache.kylin.metadata.model.AntiFlatChecker; +import org.apache.kylin.metadata.model.ColExcludedChecker; import org.apache.kylin.metadata.model.NDataModel; import org.apache.kylin.metadata.model.NDataModelManager; import org.apache.kylin.metadata.model.NTableMetadataManager; import org.apache.kylin.metadata.model.TableExtDesc; import org.apache.kylin.metadata.model.TblColRef; - -import org.apache.kylin.guava30.shaded.common.collect.ImmutableMultimap; -import org.apache.kylin.guava30.shaded.common.collect.Lists; -import org.apache.kylin.guava30.shaded.common.collect.Maps; +import org.apache.kylin.metadata.project.NProjectManager; +import org.apache.kylin.metadata.realization.SQLDigest; import lombok.Getter; import lombok.val; +import lombok.extern.slf4j.Slf4j; +@Slf4j @Getter public class ChooserContext { @@ -49,8 +55,14 @@ public class ChooserContext { final Map<String, List<Integer>> foreignKeyColumnIds = Maps.newHashMap(); final Map<Integer, TableExtDesc.ColumnStats> columnStatMap = Maps.newHashMap(); + final KylinConfig kylinConfig; + + AggIndexMatcher aggIndexMatcher; + TableIndexMatcher tableIndexMatcher; + public ChooserContext(NDataModel model) { this.model = model; + this.kylinConfig = NProjectManager.getProjectConfig(model.getProject()); ImmutableMultimap.Builder<Integer, Integer> fk2PkBuilder = ImmutableMultimap.builder(); @@ -63,7 +75,38 @@ public class ChooserContext { } this.fk2Pk = fk2PkBuilder.build(); + } + + public ChooserContext(SQLDigest sqlDigest, NDataflow dataflow) { + this(dataflow.getModel()); + prepareIndexMatchers(sqlDigest, dataflow); + } + + private void prepareIndexMatchers(SQLDigest sqlDigest, NDataflow dataflow) { + String project = dataflow.getProject(); + ColExcludedChecker excludedChecker = new ColExcludedChecker(kylinConfig, project, model); + if (log.isDebugEnabled()) { + log.debug("When matching layouts, all deduced excluded columns are: {}", + excludedChecker.getExcludedColNames()); + } + AntiFlatChecker antiFlatChecker = new AntiFlatChecker(model.getJoinTables(), model); + if (log.isDebugEnabled()) { + log.debug("When matching layouts, all deduced anti-flatten lookup tables are: {}", + antiFlatChecker.getAntiFlattenLookups()); + } + + aggIndexMatcher = new AggIndexMatcher(sqlDigest, this, dataflow, excludedChecker, antiFlatChecker); + tableIndexMatcher = new TableIndexMatcher(sqlDigest, this, dataflow, excludedChecker, antiFlatChecker); + } + /** + * Bail out if both AggIndex and TableIndex are invalid. This may be caused by: + * 1. cc col is not present in the model; + * 2. dynamic params ? present in query like select sum(col/?) from ..., + * see org.apache.kylin.query.DynamicQueryTest.testDynamicParamOnAgg. + */ + public boolean isIndexMatchersInvalid() { + return !getAggIndexMatcher().isValid() && !getTableIndexMatcher().isValid(); } public TableExtDesc.ColumnStats getColumnStats(TblColRef ref) { diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/IndexMatcher.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/IndexMatcher.java index 0b600250e1..eea05e1a8e 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/IndexMatcher.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/IndexMatcher.java @@ -27,6 +27,13 @@ import java.util.stream.Stream; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.kylin.guava30.shaded.common.base.Preconditions; +import org.apache.kylin.guava30.shaded.common.collect.ImmutableCollection; +import org.apache.kylin.guava30.shaded.common.collect.ImmutableMultimap; +import org.apache.kylin.guava30.shaded.common.collect.Iterables; +import org.apache.kylin.guava30.shaded.common.collect.Lists; +import org.apache.kylin.guava30.shaded.common.collect.Maps; +import org.apache.kylin.guava30.shaded.common.collect.Sets; import org.apache.kylin.metadata.cube.model.IndexEntity; import org.apache.kylin.metadata.cube.model.LayoutEntity; import org.apache.kylin.metadata.cube.model.NDataflow; @@ -41,15 +48,6 @@ import org.apache.kylin.metadata.project.NProjectManager; import org.apache.kylin.metadata.realization.CapabilityResult; import org.apache.kylin.metadata.realization.SQLDigest; -import org.apache.kylin.guava30.shaded.common.base.Preconditions; -import org.apache.kylin.guava30.shaded.common.collect.ImmutableCollection; -import org.apache.kylin.guava30.shaded.common.collect.ImmutableMultimap; -import org.apache.kylin.guava30.shaded.common.collect.Iterables; -import org.apache.kylin.guava30.shaded.common.collect.Lists; -import org.apache.kylin.guava30.shaded.common.collect.Maps; -import org.apache.kylin.guava30.shaded.common.collect.Sets; - -import lombok.AllArgsConstructor; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -235,7 +233,6 @@ public abstract class IndexMatcher { } @Getter - @AllArgsConstructor @RequiredArgsConstructor public static class MatchResult { @@ -243,13 +240,27 @@ public abstract class IndexMatcher { Map<Integer, DeriveInfo> needDerive = Maps.newHashMap(); - CapabilityResult.IncapableCause cases; + CapabilityResult.IncapableCause incapableCause; + + private int penalty = 0; + + private double influenceFactor = 1.0; public List<CapabilityResult.CapabilityInfluence> influences = Lists.newArrayList(); - public MatchResult(boolean isMatched, Map<Integer, DeriveInfo> needDerive) { + public MatchResult(boolean isMatched, int penalty, Map<Integer, DeriveInfo> needDerive) { + this.isMatched = isMatched; + this.needDerive = needDerive; + this.penalty = penalty; + this.influenceFactor += penalty; + } + + public MatchResult(boolean isMatched, Map<Integer, DeriveInfo> needDerive, + CapabilityResult.IncapableCause reason, List<CapabilityResult.CapabilityInfluence> influences) { this.isMatched = isMatched; this.needDerive = needDerive; + this.incapableCause = reason; + this.influences = influences; } } } diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/NLayoutCandidate.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/NLayoutCandidate.java index c28bf01444..2ab1afccc0 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/NLayoutCandidate.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/NLayoutCandidate.java @@ -25,14 +25,13 @@ import java.util.Set; import javax.annotation.Nonnull; +import org.apache.kylin.guava30.shaded.common.collect.Lists; +import org.apache.kylin.guava30.shaded.common.collect.Maps; +import org.apache.kylin.metadata.cube.model.LayoutEntity; import org.apache.kylin.metadata.model.DeriveInfo; import org.apache.kylin.metadata.model.JoinDesc; import org.apache.kylin.metadata.realization.CapabilityResult; import org.apache.kylin.metadata.realization.IRealizationCandidate; -import org.apache.kylin.metadata.cube.model.LayoutEntity; - -import org.apache.kylin.guava30.shaded.common.collect.Lists; -import org.apache.kylin.guava30.shaded.common.collect.Maps; import lombok.Getter; import lombok.Setter; @@ -42,10 +41,6 @@ public class NLayoutCandidate implements IRealizationCandidate { @Setter private double cost; - @Getter - @Setter - private int layoutUnmatchedColsSize; - @Setter @Getter private CapabilityResult capabilityResult; diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/NQueryLayoutChooser.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/NQueryLayoutChooser.java index 6460675d53..31572941e5 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/NQueryLayoutChooser.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/NQueryLayoutChooser.java @@ -18,7 +18,6 @@ package org.apache.kylin.metadata.cube.cuboid; -import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; @@ -31,28 +30,23 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.MapUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.SegmentOnlineMode; -import org.apache.kylin.common.exception.KylinTimeoutException; -import org.apache.kylin.metadata.cube.model.IndexEntity; +import org.apache.kylin.guava30.shaded.common.collect.ImmutableSet; +import org.apache.kylin.guava30.shaded.common.collect.Lists; +import org.apache.kylin.guava30.shaded.common.collect.Maps; +import org.apache.kylin.guava30.shaded.common.collect.Ordering; +import org.apache.kylin.guava30.shaded.common.collect.Sets; import org.apache.kylin.metadata.cube.model.IndexPlan; import org.apache.kylin.metadata.cube.model.LayoutEntity; import org.apache.kylin.metadata.cube.model.NDataLayout; import org.apache.kylin.metadata.cube.model.NDataSegment; import org.apache.kylin.metadata.cube.model.NDataflow; -import org.apache.kylin.metadata.model.AntiFlatChecker; -import org.apache.kylin.metadata.model.ColExcludedChecker; import org.apache.kylin.metadata.model.DeriveInfo; -import org.apache.kylin.metadata.model.NDataModel; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.metadata.project.NProjectManager; import org.apache.kylin.metadata.realization.CapabilityResult; import org.apache.kylin.metadata.realization.SQLDigest; +import org.apache.kylin.query.util.QueryInterruptChecker; -import org.apache.kylin.guava30.shaded.common.collect.ImmutableSet; -import org.apache.kylin.guava30.shaded.common.collect.Lists; -import org.apache.kylin.guava30.shaded.common.collect.Maps; -import org.apache.kylin.guava30.shaded.common.collect.Ordering; - -import org.apache.kylin.guava30.shaded.common.collect.Sets; import lombok.val; import lombok.var; import lombok.extern.slf4j.Slf4j; @@ -91,85 +85,61 @@ public class NQueryLayoutChooser { return NLayoutCandidate.EMPTY; } - String project = dataflow.getProject(); - NDataModel model = dataflow.getModel(); - KylinConfig projectConfig = NProjectManager.getProjectConfig(project); - ChooserContext chooserContext = new ChooserContext(model); - ColExcludedChecker excludedChecker = new ColExcludedChecker(projectConfig, project, model); - if (log.isDebugEnabled()) { - log.debug("When matching layouts, all deduced excluded columns are: {}", - excludedChecker.getExcludedColNames()); - } - AntiFlatChecker antiFlatChecker = new AntiFlatChecker(model.getJoinTables(), model); - if (log.isDebugEnabled()) { - log.debug("When matching layouts, all deduced anti-flatten lookup tables are: {}", - antiFlatChecker.getAntiFlattenLookups()); - } - - AggIndexMatcher aggIndexMatcher = new AggIndexMatcher(sqlDigest, chooserContext, dataflow, excludedChecker, - antiFlatChecker); - TableIndexMatcher tableIndexMatcher = new TableIndexMatcher(sqlDigest, chooserContext, dataflow, - excludedChecker, antiFlatChecker); - - // bail out if both agg index are invalid - // matcher may be caused by - // 1. cc col is not present in the model - // 2. dynamic params ? present in query like select sum(col/?) from ..., - // see org.apache.kylin.query.DynamicQueryTest.testDynamicParamOnAgg - if (!aggIndexMatcher.isValid() && !tableIndexMatcher.isValid()) { + ChooserContext chooserContext = new ChooserContext(sqlDigest, dataflow); + if (chooserContext.isIndexMatchersInvalid()) { return null; } - IndexPlan indexPlan = dataflow.getIndexPlan(); - List<NLayoutCandidate> candidates = new ArrayList<>(); - Collection<NDataLayout> commonLayouts = getLayoutsFromSegments(prunedSegments, dataflow, chSegmentToLayoutsMap); + Collection<NDataLayout> commonLayouts = getCommonLayouts(prunedSegments, dataflow, chSegmentToLayoutsMap); log.info("Matching dataflow with seg num: {} layout num: {}", prunedSegments.size(), commonLayouts.size()); - for (NDataLayout dataLayout : commonLayouts) { - log.trace("Matching layout {}", dataLayout); - IndexEntity indexEntity = indexPlan.getIndexEntity(dataLayout.getIndexId()); - log.trace("Matching indexEntity {}", indexEntity); + Map<Long, List<NDataLayout>> commonLayoutsMap = commonLayouts.stream() + .collect(Collectors.toMap(NDataLayout::getLayoutId, Lists::newArrayList)); + List<NLayoutCandidate> candidates = collectAllLayoutCandidates(dataflow, chooserContext, commonLayoutsMap); - LayoutEntity layout = indexPlan.getLayoutEntity(dataLayout.getLayoutId()); - NLayoutCandidate candidate = new NLayoutCandidate(layout); - IndexMatcher.MatchResult matchResult = tableIndexMatcher.match(layout); - double influenceFactor = 1.0; + QueryInterruptChecker.checkThreadInterrupted("Interrupted exception occurs.", + "Current step involves gathering all the layouts that " + + "can potentially provide a response to this query."); + + if (candidates.isEmpty()) { + return null; + } + log.info("Matched candidates num : {}", candidates.size()); + sortCandidates(candidates, chooserContext, sqlDigest); + return candidates.get(0); + } + + public static List<NLayoutCandidate> collectAllLayoutCandidates(NDataflow dataflow, ChooserContext chooserContext, + Map<Long, List<NDataLayout>> commonLayoutsMap) { + List<NLayoutCandidate> candidates = Lists.newArrayList(); + for (Map.Entry<Long, List<NDataLayout>> entry : commonLayoutsMap.entrySet()) { + LayoutEntity layout = dataflow.getIndexPlan().getLayoutEntity(entry.getKey()); + log.trace("Matching index: id = {}", entry.getKey()); + IndexMatcher.MatchResult matchResult = chooserContext.getTableIndexMatcher().match(layout); if (!matchResult.isMatched()) { - matchResult = aggIndexMatcher.match(layout); - } else if (projectConfig.useTableIndexAnswerSelectStarEnabled()) { - influenceFactor += tableIndexMatcher.getLayoutUnmatchedColsSize(); - candidate.setLayoutUnmatchedColsSize(tableIndexMatcher.getLayoutUnmatchedColsSize()); + matchResult = chooserContext.getAggIndexMatcher().match(layout); } + if (!matchResult.isMatched()) { log.trace("Matching failed"); continue; } - CapabilityResult tempResult = new CapabilityResult(); - tempResult.influences = matchResult.getInfluences(); - candidate.setCost(dataLayout.getRows() * (tempResult.influences.size() + influenceFactor)); + NLayoutCandidate candidate = new NLayoutCandidate(layout); + CapabilityResult tempResult = new CapabilityResult(matchResult); if (!matchResult.getNeedDerive().isEmpty()) { candidate.setDerivedToHostMap(matchResult.getNeedDerive()); candidate.setDerivedTableSnapshots(candidate.getDerivedToHostMap().keySet().stream() .map(i -> chooserContext.convertToRef(i).getTable()).collect(Collectors.toSet())); } + long allRows = entry.getValue().stream().mapToLong(NDataLayout::getRows).sum(); + candidate.setCost(allRows * (tempResult.influences.size() + matchResult.getInfluenceFactor())); candidate.setCapabilityResult(tempResult); candidates.add(candidate); } - - if (Thread.interrupted()) { - throw new KylinTimeoutException("The query exceeds the set time limit of " - + KylinConfig.getInstanceFromEnv().getQueryTimeoutSeconds() + "s. Current step: Layout chooser. "); - } - - log.info("Matched candidates num : {}", candidates.size()); - if (candidates.isEmpty()) { - return null; - } - sortCandidates(candidates, chooserContext, sqlDigest); - return candidates.get(0); + return candidates; } - private static Collection<NDataLayout> getLayoutsFromSegments(List<NDataSegment> segments, NDataflow dataflow, + private static Collection<NDataLayout> getCommonLayouts(List<NDataSegment> segments, NDataflow dataflow, Map<String, Set<Long>> chSegmentToLayoutsMap) { KylinConfig projectConfig = NProjectManager.getProjectConfig(dataflow.getProject()); if (!projectConfig.isHeterogeneousSegmentEnabled()) { diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/TableIndexMatcher.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/TableIndexMatcher.java index 9604d2ada2..444ba8d04e 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/TableIndexMatcher.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/cuboid/TableIndexMatcher.java @@ -23,6 +23,8 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import org.apache.kylin.guava30.shaded.common.collect.Lists; +import org.apache.kylin.guava30.shaded.common.collect.Maps; import org.apache.kylin.metadata.cube.model.IndexEntity; import org.apache.kylin.metadata.cube.model.LayoutEntity; import org.apache.kylin.metadata.cube.model.NDataflow; @@ -33,20 +35,14 @@ import org.apache.kylin.metadata.project.NProjectManager; import org.apache.kylin.metadata.realization.CapabilityResult; import org.apache.kylin.metadata.realization.SQLDigest; -import org.apache.kylin.guava30.shaded.common.collect.Lists; -import org.apache.kylin.guava30.shaded.common.collect.Maps; - import lombok.extern.slf4j.Slf4j; @Slf4j public class TableIndexMatcher extends IndexMatcher { - private int layoutUnmatchedColsSize; - public TableIndexMatcher(SQLDigest sqlDigest, ChooserContext chooserContext, NDataflow dataflow, ColExcludedChecker excludedChecker, AntiFlatChecker antiFlatChecker) { super(sqlDigest, chooserContext, dataflow, excludedChecker, antiFlatChecker); - this.layoutUnmatchedColsSize = 0; this.valid = fastValidCheckBeforeMatch(); } @@ -66,8 +62,9 @@ public class TableIndexMatcher extends IndexMatcher { log.trace("Matching table index"); final Map<Integer, DeriveInfo> needDerive = Maps.newHashMap(); Set<Integer> unmatchedCols = initUnmatchedColumnIds(layout); + int penaltyFactor = 0; if (NProjectManager.getProjectConfig(project).useTableIndexAnswerSelectStarEnabled()) { - layoutUnmatchedColsSize = unmatchedCols.size(); + penaltyFactor = unmatchedCols.size(); unmatchedCols.removeAll(dataflow.getAllColumnsIndex()); } goThruDerivedDims(layout.getIndex(), needDerive, unmatchedCols); @@ -85,7 +82,7 @@ public class TableIndexMatcher extends IndexMatcher { CapabilityResult.IncapableCause.create(CapabilityResult.IncapableType.TABLE_INDEX_MISSING_COLS), Lists.newArrayList()); } - return new MatchResult(true, needDerive); + return new MatchResult(true, penaltyFactor, needDerive); } @Override @@ -94,8 +91,4 @@ public class TableIndexMatcher extends IndexMatcher { && !nonSupportFunTableIndex(sqlDigest.aggregations); return !index.isTableIndex() || (!sqlDigest.isRawQuery && !isUseTableIndex); } - - public int getLayoutUnmatchedColsSize() { - return layoutUnmatchedColsSize; - } } diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/model/NDataflowCapabilityChecker.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/model/NDataflowCapabilityChecker.java index 6ddf17d276..c99cf27412 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/model/NDataflowCapabilityChecker.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/cube/model/NDataflowCapabilityChecker.java @@ -84,7 +84,6 @@ public class NDataflowCapabilityChecker { if (candidateAndInfluence != null) { chosenCandidate = candidateAndInfluence; result.influences.addAll(candidateAndInfluence.getCapabilityResult().influences); - result.setLayoutUnmatchedColsSize(candidateAndInfluence.getLayoutUnmatchedColsSize()); logger.info("Matched layout {} snapshot in dataflow {} ", chosenCandidate, dataflow); } } diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/realization/CapabilityResult.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/realization/CapabilityResult.java index 95ba127c50..42c1648507 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/realization/CapabilityResult.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/realization/CapabilityResult.java @@ -22,15 +22,24 @@ import java.util.Collection; import java.util.List; import org.apache.kylin.guava30.shaded.common.collect.Lists; +import org.apache.kylin.metadata.cube.cuboid.IndexMatcher; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.MeasureDesc; import org.apache.kylin.metadata.model.TblColRef; import lombok.Getter; +import lombok.NoArgsConstructor; import lombok.Setter; +@NoArgsConstructor public class CapabilityResult { + public CapabilityResult(IndexMatcher.MatchResult matchResult) { + this.layoutUnmatchedColsSize = matchResult.getPenalty(); + this.influences = matchResult.getInfluences(); + this.capable = matchResult.isMatched(); + } + /** * Is capable or not */