Repository: kylin Updated Branches: refs/heads/master be30f4bef -> 094510cf3
KYLIN-1543 Add FilterResultCache of last record Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/094510cf Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/094510cf Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/094510cf Branch: refs/heads/master Commit: 094510cf3aded8021e73de631e614ac735f1497e Parents: 6e2afbb Author: Li Yang <liy...@apache.org> Authored: Tue Mar 29 18:32:40 2016 +0800 Committer: Li Yang <liy...@apache.org> Committed: Tue Mar 29 18:35:03 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/gridtable/GTFilterScanner.java | 95 ++++++++++++++++++- .../kylin/gridtable/DictGridTableTest.java | 96 ++++++++++++++++++-- 2 files changed, 179 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/094510cf/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java index 8017ca0..69e18ed 100644 --- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java +++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTFilterScanner.java @@ -19,10 +19,15 @@ package org.apache.kylin.gridtable; import java.io.IOException; +import java.util.BitSet; +import java.util.HashSet; import java.util.Iterator; import java.util.NoSuchElementException; +import java.util.Set; import org.apache.kylin.common.util.ByteArray; +import org.apache.kylin.common.util.BytesUtil; +import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.metadata.filter.IFilterCodeSystem; import org.apache.kylin.metadata.filter.TupleFilter; import org.apache.kylin.metadata.model.TblColRef; @@ -32,6 +37,7 @@ public class GTFilterScanner implements IGTScanner { final private IGTScanner inputScanner; final private TupleFilter filter; + final private IFilterCodeSystem<ByteArray> filterCodeSystem; final private IEvaluatableTuple oneTuple; // avoid instance creation private GTRecord next = null; @@ -39,6 +45,7 @@ public class GTFilterScanner implements IGTScanner { public GTFilterScanner(IGTScanner inputScanner, GTScanRequest req) throws IOException { this.inputScanner = inputScanner; this.filter = req.getFilterPushDown(); + this.filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator()); this.oneTuple = new IEvaluatableTuple() { @Override public Object getValue(TblColRef col) { @@ -70,17 +77,16 @@ public class GTFilterScanner implements IGTScanner { return new Iterator<GTRecord>() { private Iterator<GTRecord> inputIterator = inputScanner.iterator(); - private IFilterCodeSystem<ByteArray> filterCodeSystem = GTUtil.wrap(getInfo().codeSystem.getComparator()); + private FilterResultCache resultCache = new FilterResultCache(getInfo(), filter); @Override public boolean hasNext() { if (next != null) return true; - while (inputIterator.hasNext()) { next = inputIterator.next(); - if (filter != null && filter.evaluate(oneTuple, filterCodeSystem) == false) { + if (!evaluate()) { continue; } return true; @@ -89,6 +95,20 @@ public class GTFilterScanner implements IGTScanner { return false; } + private boolean evaluate() { + if (filter == null) + return true; + + // 'next' and 'oneTuple' are referring to the same record + boolean[] cachedResult = resultCache.checkCache(next); + if (cachedResult != null) + return cachedResult[0]; + + boolean result = filter.evaluate(oneTuple, filterCodeSystem); + resultCache.setLastResult(result); + return result; + } + @Override public GTRecord next() { // fetch next record @@ -110,4 +130,73 @@ public class GTFilterScanner implements IGTScanner { }; } + + // cache the last one input and result, can reuse because rowkey are ordered, and same input could come in small group + static class FilterResultCache { + static final int CHECKPOINT = 10000; + static final double HIT_RATE_THRESHOLD = 0.5; + static boolean ENABLED = false; // for debug + + boolean enabled = ENABLED; + ImmutableBitSet colsInFilter; + int count; + int hit; + byte[] lastValues; + boolean[] lastResult; + + public FilterResultCache(GTInfo info, TupleFilter filter) { + colsInFilter = collectColumnsInFilter(filter); + lastValues = new byte[info.getMaxColumnLength(colsInFilter)]; + lastResult = new boolean[1]; + } + + public boolean[] checkCache(GTRecord record) { + if (!enabled) + return null; + + count++; + + // disable the cache if the hit rate is bad + if (count == CHECKPOINT) { + if ((double) hit / (double) count < HIT_RATE_THRESHOLD) { + enabled = false; + } + } + + boolean match = count > 1; + int p = 0; + for (int i = 0; i < colsInFilter.trueBitCount(); i++) { + int c = colsInFilter.trueBitAt(i); + ByteArray col = record.get(c); + if (match) { + match = BytesUtil.compareBytes(col.array(), col.offset(), lastValues, p, col.length()) == 0; + } + if (!match) { + System.arraycopy(col.array(), col.offset(), lastValues, p, col.length()); + } + p += col.length(); + } + + if (match) { + hit++; + return lastResult; + } else { + return null; + } + } + + public void setLastResult(boolean evalResult) { + lastResult[0] = evalResult; + } + + private ImmutableBitSet collectColumnsInFilter(TupleFilter filter) { + Set<TblColRef> columnsInFilter = new HashSet<TblColRef>(); + TupleFilter.collectColumns(filter, columnsInFilter); + BitSet result = new BitSet(); + for (TblColRef col : columnsInFilter) + result.set(col.getColumnDesc().getZeroBasedIndex()); + return new ImmutableBitSet(result); + } + + } } http://git-wip-us.apache.org/repos/asf/kylin/blob/094510cf/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java index 517299f..b1b5ee9 100644 --- a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java +++ b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java @@ -37,6 +37,7 @@ import org.apache.kylin.dict.TrieDictionaryBuilder; import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.dimension.DictionaryDimEnc; import org.apache.kylin.dimension.DimensionEncoding; +import org.apache.kylin.gridtable.GTFilterScanner.FilterResultCache; import org.apache.kylin.gridtable.GTInfo.Builder; import org.apache.kylin.gridtable.memstore.GTSimpleMemStore; import org.apache.kylin.metadata.datatype.DataType; @@ -232,15 +233,15 @@ public class DictGridTableTest { @Test public void verifyFirstRow() throws IOException { - doScanAndVerify(table, new GTScanRequest(table.getInfo(), null, null, null), "[1421193600000, 30, Yang, 10, 10.5]",// - "[1421193600000, 30, Luke, 10, 10.5]",// - "[1421280000000, 20, Dong, 10, 10.5]",// - "[1421280000000, 20, Jason, 10, 10.5]",// - "[1421280000000, 30, Xu, 10, 10.5]",// - "[1421366400000, 20, Mahone, 10, 10.5]",// - "[1421366400000, 20, Qianhao, 10, 10.5]",// - "[1421366400000, 30, George, 10, 10.5]",// - "[1421366400000, 30, Shaofeng, 10, 10.5]",// + doScanAndVerify(table, new GTScanRequest(table.getInfo(), null, null, null), "[1421193600000, 30, Yang, 10, 10.5]", // + "[1421193600000, 30, Luke, 10, 10.5]", // + "[1421280000000, 20, Dong, 10, 10.5]", // + "[1421280000000, 20, Jason, 10, 10.5]", // + "[1421280000000, 30, Xu, 10, 10.5]", // + "[1421366400000, 20, Mahone, 10, 10.5]", // + "[1421366400000, 20, Qianhao, 10, 10.5]", // + "[1421366400000, 30, George, 10, 10.5]", // + "[1421366400000, 30, Shaofeng, 10, 10.5]", // "[1421452800000, 10, Kejia, 10, 10.5]"); } @@ -289,6 +290,39 @@ public class DictGridTableTest { } @Test + public void testFilterScannerPerf() throws IOException { + GridTable table = newTestPerfTable(); + GTInfo info = table.getInfo(); + + CompareTupleFilter fComp1 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14")); + CompareTupleFilter fComp2 = compare(info.colRef(1), FilterOperatorEnum.GT, enc(info, 1, "10")); + LogicalTupleFilter filter = and(fComp1, fComp2); + + FilterResultCache.ENABLED = false; + testFilterScannerPerfInner(table, info, filter); + FilterResultCache.ENABLED = true; + testFilterScannerPerfInner(table, info, filter); + FilterResultCache.ENABLED = false; + testFilterScannerPerfInner(table, info, filter); + FilterResultCache.ENABLED = true; + testFilterScannerPerfInner(table, info, filter); + } + + @SuppressWarnings("unused") + private void testFilterScannerPerfInner(GridTable table, GTInfo info, LogicalTupleFilter filter) throws IOException { + long start = System.currentTimeMillis(); + GTScanRequest req = new GTScanRequest(info, null, null, filter); + IGTScanner scanner = table.scan(req); + int i = 0; + for (GTRecord r : scanner) { + i++; + } + scanner.close(); + long end = System.currentTimeMillis(); + System.out.println((end - start) + "ms with filter cache enabled=" + FilterResultCache.ENABLED + ", " + i + " rows"); + } + + @Test public void verifyConvertFilterConstants1() { GTInfo info = table.getInfo(); @@ -448,6 +482,50 @@ public class DictGridTableTest { return table; } + static GridTable newTestPerfTable() throws IOException { + GTInfo info = newInfo(); + GTSimpleMemStore store = new GTSimpleMemStore(info); + GridTable table = new GridTable(info, store); + + GTRecord r = new GTRecord(table.getInfo()); + GTBuilder builder = table.rebuild(); + + for (int i = 0; i < 100000; i++) { + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-14", "30", "Yang", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-14", "30", "Luke", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-15", "20", "Dong", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-15", "20", "Jason", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-15", "30", "Xu", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-16", "20", "Mahone", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-16", "20", "Qianhao", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-16", "30", "George", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-16", "30", "Shaofeng", new LongMutable(10), new BigDecimal("10.5"))); + + for (int j = 0; j < 10; j++) + builder.write(r.setValues("2015-01-17", "10", "Kejia", new LongMutable(10), new BigDecimal("10.5"))); + } + builder.close(); + + return table; + } + static GTInfo newInfo() { Builder builder = GTInfo.builder(); builder.setCodeSystem(newDictCodeSystem());