This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
commit c9d7f5ec2ff8834f0f1b24610485bb819741206f Author: Zhong <nju_y...@apache.org> AuthorDate: Wed Aug 15 16:33:19 2018 +0800 KYLIN-3490 introduce DictionaryEnumerator to answer single encoded column related queries which will not hit cuboid --- .../org/apache/kylin/common/KylinConfigBase.java | 4 + .../java/org/apache/kylin/cube/model/CubeDesc.java | 81 +++++++----- .../org/apache/kylin/query/ITKylinQueryTest.java | 12 ++ .../query/sql_dict_enumerator/query01.sql | 21 +++ .../query/sql_dict_enumerator/query02.sql | 20 +++ .../query/sql_dict_enumerator/query03.sql | 20 +++ .../query/enumerator/DictionaryEnumerator.java | 142 +++++++++++++++++++++ .../apache/kylin/query/enumerator/OLAPQuery.java | 5 +- .../apache/kylin/query/relnode/OLAPTableScan.java | 3 + .../org/apache/kylin/query/schema/OLAPTable.java | 4 + 10 files changed, 276 insertions(+), 36 deletions(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index dbf22b5..f154eee 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -1227,6 +1227,10 @@ abstract public class KylinConfigBase implements Serializable { // QUERY // ============================================================================ + public boolean isDictionaryEnumeratorEnabled() { + return Boolean.valueOf(getOptional("kylin.query.enable-dict-enumerator", "false")); + } + public Boolean isEnumerableRulesEnabled() { return Boolean.parseBoolean(getOptional("kylin.query.calcite.enumerable-rules-enabled", "false")); } diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java index 5b4a134..95c8b40 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java @@ -18,16 +18,28 @@ package org.apache.kylin.cube.model; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.google.common.base.Joiner; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; + +import java.lang.reflect.Method; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeSet; + import org.apache.commons.codec.binary.Base64; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ArrayUtils; @@ -65,27 +77,16 @@ import org.apache.kylin.metadata.realization.RealizationType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.lang.reflect.Method; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.BitSet; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.TreeSet; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; -import static com.google.common.base.Preconditions.checkState; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.common.base.Joiner; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; /** */ @@ -1298,18 +1299,28 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware { } /** - * Get columns that have dictionary + * Get dimensions that have dictionary */ - public Set<TblColRef> getAllColumnsHaveDictionary() { - Set<TblColRef> result = Sets.newLinkedHashSet(); + public Set<TblColRef> getAllDimsHaveDictionary() { + Set<TblColRef> result = Sets.newHashSet(); - // dictionaries in dimensions for (RowKeyColDesc rowKeyColDesc : rowkey.getRowKeyColumns()) { TblColRef colRef = rowKeyColDesc.getColRef(); if (rowkey.isUseDictionary(colRef)) { result.add(colRef); } } + return result; + } + + /** + * Get columns that have dictionary + */ + public Set<TblColRef> getAllColumnsHaveDictionary() { + Set<TblColRef> result = Sets.newLinkedHashSet(); + + // dictionaries in dimensions + result.addAll(getAllDimsHaveDictionary()); // dictionaries in measures for (MeasureDesc measure : measures) { diff --git a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java index e6afbe0..e01334f 100644 --- a/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java +++ b/kylin-it/src/test/java/org/apache/kylin/query/ITKylinQueryTest.java @@ -428,6 +428,18 @@ public class ITKylinQueryTest extends KylinTestBase { } @Test + public void testDictionaryEnumerator() throws Exception { + boolean ifDictEnumeratorEnabled = config.isDictionaryEnumeratorEnabled(); + if (!ifDictEnumeratorEnabled) { + config.setProperty("kylin.query.enable-dict-enumerator", "true"); + } + batchExecuteQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_dict_enumerator"); + if (!ifDictEnumeratorEnabled) { + config.setProperty("kylin.query.enable-dict-enumerator", "false"); + } + } + + @Test public void testValues() throws Exception { execAndCompQuery(getQueryFolderPrefix() + "src/test/resources/query/sql_values", null, true); } diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql new file mode 100644 index 0000000..963e3b3 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query01.sql @@ -0,0 +1,21 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select SLR_SEGMENT_CD +from TEST_KYLIN_FACT +group by SLR_SEGMENT_CD \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql new file mode 100644 index 0000000..2617e91 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query02.sql @@ -0,0 +1,20 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select DISTINCT SLR_SEGMENT_CD +from TEST_KYLIN_FACT \ No newline at end of file diff --git a/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql b/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql new file mode 100644 index 0000000..cb07666 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_dict_enumerator/query03.sql @@ -0,0 +1,20 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select MAX(SLR_SEGMENT_CD) +from TEST_KYLIN_FACT \ No newline at end of file diff --git a/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java b/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java new file mode 100644 index 0000000..6af65ee --- /dev/null +++ b/query/src/main/java/org/apache/kylin/query/enumerator/DictionaryEnumerator.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.query.enumerator; + +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.calcite.linq4j.Enumerator; +import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.cube.CubeInstance; +import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.metadata.model.SegmentStatusEnum; +import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.metadata.realization.IRealization; +import org.apache.kylin.metadata.tuple.Tuple; +import org.apache.kylin.query.relnode.OLAPContext; +import org.apache.kylin.storage.hybrid.HybridInstance; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +public class DictionaryEnumerator implements Enumerator<Object[]> { + + private final static Logger logger = LoggerFactory.getLogger(DictionaryEnumerator.class); + + private List<Dictionary<String>> dictList; + private final Object[] current; + private final TblColRef dictCol; + private final int dictColIdx; + private Iterator<String> currentDict; + private Iterator<Dictionary<String>> iterator; + + public DictionaryEnumerator(OLAPContext olapContext) { + Preconditions.checkArgument(olapContext.allColumns.size() == 1, "The query should only relate to one column"); + + dictCol = olapContext.allColumns.iterator().next(); + Preconditions.checkArgument(ifColumnHaveDictionary(dictCol, olapContext.realization, false), + "The column " + dictCol + " should be encoded as dictionary for " + olapContext.realization); + + dictList = getAllDictionaries(dictCol, olapContext.realization); + current = new Object[olapContext.returnTupleInfo.size()]; + dictColIdx = olapContext.returnTupleInfo.getColumnIndex(dictCol); + + reset(); + logger.info("Will use DictionaryEnumerator to answer query which is only related to column " + dictCol); + } + + public static boolean ifDictionaryEnumeratorEligible(OLAPContext olapContext) { + if (olapContext.allColumns.size() != 1) { + return false; + } + + TblColRef dictCol = olapContext.allColumns.iterator().next(); + if (!ifColumnHaveDictionary(dictCol, olapContext.realization, true)) { + return false; + } + return true; + } + + private static boolean ifColumnHaveDictionary(TblColRef col, IRealization realization, boolean enableCheck) { + if (realization instanceof CubeInstance) { + final CubeInstance cube = (CubeInstance) realization; + boolean ifEnabled = !enableCheck || cube.getConfig().isDictionaryEnumeratorEnabled(); + return ifEnabled && cube.getDescriptor().getAllDimsHaveDictionary().contains(col); + } else if (realization instanceof HybridInstance) { + final HybridInstance hybridInstance = (HybridInstance) realization; + for (IRealization entry : hybridInstance.getRealizations()) { + if (!ifColumnHaveDictionary(col, entry, enableCheck)) { + return false; + } + } + return true; + } + return false; + } + + public static List<Dictionary<String>> getAllDictionaries(TblColRef col, IRealization realization) { + Set<Dictionary<String>> result = Sets.newHashSet(); + if (realization instanceof CubeInstance) { + final CubeInstance cube = (CubeInstance) realization; + for (CubeSegment segment : cube.getSegments(SegmentStatusEnum.READY)) { + result.add(segment.getDictionary(col)); + } + } else if (realization instanceof HybridInstance) { + final HybridInstance hybridInstance = (HybridInstance) realization; + for (IRealization entry : hybridInstance.getRealizations()) { + result.addAll(getAllDictionaries(col, entry)); + } + } else { + throw new IllegalStateException("All leaf realizations should be CubeInstance"); + } + return Lists.newArrayList(result); + } + + @Override + public boolean moveNext() { + while (currentDict == null || !currentDict.hasNext()) { + if (!iterator.hasNext()) { + return false; + } + final Dictionary<String> dict = iterator.next(); + currentDict = dict.enumeratorValues().iterator(); + } + + current[dictColIdx] = Tuple.convertOptiqCellValue(currentDict.next(), dictCol.getDatatype()); + return true; + } + + @Override + public Object[] current() { + return current; + } + + @Override + public void reset() { + iterator = dictList.iterator(); + } + + @Override + public void close() { + } +} \ No newline at end of file diff --git a/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java b/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java index 84ac5cf..c094ff5 100644 --- a/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java +++ b/query/src/main/java/org/apache/kylin/query/enumerator/OLAPQuery.java @@ -37,7 +37,8 @@ public class OLAPQuery extends AbstractEnumerable<Object[]> implements Enumerabl public enum EnumeratorTypeEnum { OLAP, //finish query with Cube or II, or a combination of both LOOKUP_TABLE, //using a snapshot of lookup table - HIVE //using hive + HIVE, //using hive + COL_DICT // using a column's dictionary } private final DataContext optiqContext; @@ -65,6 +66,8 @@ public class OLAPQuery extends AbstractEnumerable<Object[]> implements Enumerabl : new OLAPEnumerator(olapContext, optiqContext); case LOOKUP_TABLE: return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new LookupTableEnumerator(olapContext); + case COL_DICT: + return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new DictionaryEnumerator(olapContext); case HIVE: return BackdoorToggles.getPrepareOnly() ? new EmptyEnumerator() : new HiveEnumerator(olapContext); default: diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java index c23f1c5..ac6241f 100644 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPTableScan.java @@ -72,6 +72,7 @@ import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.query.enumerator.DictionaryEnumerator; import org.apache.kylin.query.optrule.AggregateMultipleExpandRule; import org.apache.kylin.query.optrule.AggregateProjectReduceRule; import org.apache.kylin.query.optrule.OLAPAggregateRule; @@ -419,6 +420,8 @@ public class OLAPTableScan extends TableScan implements OLAPRel, EnumerableRel { // if the table to scan is not the fact table of cube, then it's a lookup table if (context.realization.getModel().isLookupTable(tableName)) { return "executeLookupTableQuery"; + } else if (DictionaryEnumerator.ifDictionaryEnumeratorEligible(context)) { + return "executeColumnDictionaryQuery"; } else { return "executeOLAPQuery"; } diff --git a/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java b/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java index 216c6d4..60a856d 100644 --- a/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java +++ b/query/src/main/java/org/apache/kylin/query/schema/OLAPTable.java @@ -280,6 +280,10 @@ public class OLAPTable extends AbstractQueryableTable implements TranslatableTab return new OLAPQuery(optiqContext, EnumeratorTypeEnum.LOOKUP_TABLE, ctxSeq); } + public Enumerable<Object[]> executeColumnDictionaryQuery(DataContext optiqContext, int ctxSeq) { + return new OLAPQuery(optiqContext, EnumeratorTypeEnum.COL_DICT, ctxSeq); + } + public Enumerable<Object[]> executeHiveQuery(DataContext optiqContext, int ctxSeq) { return new OLAPQuery(optiqContext, EnumeratorTypeEnum.HIVE, ctxSeq); }