Repository: kylin Updated Branches: refs/heads/1.x-staging 109779a80 -> 11d313b2b
KYLIN-1186 Support precise Count Distinct using bitmap Signed-off-by: Yang Li <liy...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/11d313b2 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/11d313b2 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/11d313b2 Branch: refs/heads/1.x-staging Commit: 11d313b2bbc27e92a8898c6cee9b0e5bf037e7a2 Parents: 109779a Author: sunyerui <sunye...@gmail.com> Authored: Tue Jan 5 11:40:38 2016 +0800 Committer: Yang Li <liy...@apache.org> Committed: Sun Jan 10 09:52:31 2016 +0800 ---------------------------------------------------------------------- .../metadata/measure/MeasureCodecTest.java | 9 +- .../test_kylin_cube_without_slr_desc.json | 16 +- ...t_kylin_cube_without_slr_left_join_desc.json | 18 +- metadata/pom.xml | 8 + .../kylin/measure/MeasureTypeFactory.java | 2 + .../kylin/measure/bitmap/BitmapAggregator.java | 57 ++++++ .../kylin/measure/bitmap/BitmapCounter.java | 174 +++++++++++++++++++ .../bitmap/BitmapDistinctCountAggFunc.java | 52 ++++++ .../kylin/measure/bitmap/BitmapMeasureType.java | 109 ++++++++++++ .../kylin/measure/bitmap/BitmapSerializer.java | 81 +++++++++ .../measure/bitmap/BitmapAggregatorTest.java | 57 ++++++ .../kylin/measure/bitmap/BitmapCounterTest.java | 73 ++++++++ .../measure/bitmap/BitmapSerializerTest.java | 55 ++++++ .../apache/kylin/query/test/IIQueryTest.java | 7 + .../apache/kylin/query/test/KylinQueryTest.java | 5 + .../query/sql_distinct_precisely/query00.sql | 24 +++ .../query/sql_distinct_precisely/query01.sql | 25 +++ .../query/sql_distinct_precisely/query02.sql | 26 +++ .../query/sql_distinct_precisely/query03.sql | 26 +++ .../query/sql_distinct_precisely/query04.sql | 27 +++ .../query/sql_distinct_precisely/query05.sql | 25 +++ .../query/sql_distinct_precisely/query06.sql | 26 +++ .../query/sql_distinct_precisely/query07.sql | 24 +++ webapp/app/js/model/cubeConfig.js | 3 +- 24 files changed, 923 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java ---------------------------------------------------------------------- diff --git a/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java b/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java index 0870914..7eebd72 100644 --- a/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java +++ b/cube/src/test/java/org/apache/kylin/metadata/measure/MeasureCodecTest.java @@ -29,6 +29,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.kylin.common.hll.HyperLogLogPlusCounter; import org.apache.kylin.cube.kv.RowConstants; import org.apache.kylin.measure.MeasureCodec; +import org.apache.kylin.measure.bitmap.BitmapCounter; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.MeasureDesc; import org.junit.Test; @@ -41,7 +42,7 @@ public class MeasureCodecTest { @Test public void basicTest() { - MeasureDesc descs[] = new MeasureDesc[] { measure("double"), measure("long"), measure("decimal"), measure("HLLC16"), measure("HLLC16") }; + MeasureDesc descs[] = new MeasureDesc[] { measure("double"), measure("long"), measure("decimal"), measure("HLLC16"), measure("HLLC16"), measure("bitmap") }; MeasureCodec codec = new MeasureCodec(descs); DoubleWritable d = new DoubleWritable(1.0); @@ -53,7 +54,11 @@ public class MeasureCodecTest { HyperLogLogPlusCounter hllc2 = new HyperLogLogPlusCounter(16); hllc.add("1234567"); hllc.add("abcdefg"); - Object values[] = new Object[] { d, l, b, hllc, hllc2 }; + BitmapCounter bitmap = new BitmapCounter(); + bitmap.add(123); + bitmap.add(45678); + bitmap.add(Long.MAX_VALUE - 10); + Object values[] = new Object[] { d, l, b, hllc, hllc2, bitmap }; ByteBuffer buf = ByteBuffer.allocate(RowConstants.ROWVALUE_BUFFER_SIZE); http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json index bf29268..f31e358 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_desc.json @@ -166,6 +166,19 @@ "returntype": "hllc(10)" }, "dependent_measure_ref": null + }, + { + "id": 8, + "name": "LEAF_CATEG_ID_BITMAP", + "function": { + "expression": "COUNT_DISTINCT", + "parameter": { + "type": "column", + "value": "LEAF_CATEG_ID" + }, + "returntype": "bitmap" + }, + "dependent_measure_ref": null } ], "rowkey": { @@ -262,7 +275,8 @@ "qualifier": "m", "measure_refs": [ "seller_cnt_hll", - "seller_format_cnt" + "seller_format_cnt", + "leaf_categ_id_bitmap" ] } ] http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json index 2f314f0..ef27b03 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json @@ -166,6 +166,19 @@ "returntype": "hllc(10)" }, "dependent_measure_ref": null + }, + { + "id": 8, + "name": "LEAF_CATEG_ID_BITMAP", + "function": { + "expression": "COUNT_DISTINCT", + "parameter": { + "type": "column", + "value": "LEAF_CATEG_ID" + }, + "returntype": "bitmap" + }, + "dependent_measure_ref": null } ], "rowkey": { @@ -262,7 +275,8 @@ "qualifier": "m", "measure_refs": [ "seller_cnt_hll", - "seller_format_cnt" + "seller_format_cnt", + "leaf_categ_id_bitmap" ] } ] @@ -270,4 +284,4 @@ ] }, "notify_list": null -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/pom.xml ---------------------------------------------------------------------- diff --git a/metadata/pom.xml b/metadata/pom.xml index c7c849f..1d4a1a0 100644 --- a/metadata/pom.xml +++ b/metadata/pom.xml @@ -68,6 +68,14 @@ <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> </dependency> + <dependency> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-core</artifactId> + </dependency> + <dependency> + <groupId>org.roaringbitmap</groupId> + <artifactId>RoaringBitmap</artifactId> + </dependency> <!-- Env & Test --> http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java ---------------------------------------------------------------------- diff --git a/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java b/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java index 2eaf366..33d94fb 100644 --- a/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java +++ b/metadata/src/main/java/org/apache/kylin/measure/MeasureTypeFactory.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; import org.apache.kylin.measure.basic.BasicMeasureType; +import org.apache.kylin.measure.bitmap.BitmapMeasureType; import org.apache.kylin.measure.hllc.HLLCMeasureType; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.datatype.DataTypeSerializer; @@ -46,6 +47,7 @@ abstract public class MeasureTypeFactory<T> { // two built-in advanced measure types factoryInsts.add(new HLLCMeasureType.Factory()); + factoryInsts.add(new BitmapMeasureType.Factory()); /* * Maybe do classpath search for more custom measure types? http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java ---------------------------------------------------------------------- diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java new file mode 100644 index 0000000..be72090 --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapAggregator.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.measure.MeasureAggregator; + +/** + * Created by sunyerui on 15/12/2. + */ +public class BitmapAggregator extends MeasureAggregator<BitmapCounter> { + + private BitmapCounter sum = null; + + @Override + public void reset() { + sum = null; + } + + @Override + public void aggregate(BitmapCounter value) { + if (sum == null) { + sum = new BitmapCounter(value); + } else { + sum.merge(value); + } + } + + @Override + public BitmapCounter getState() { + return sum; + } + + @Override + public int getMemBytesEstimate() { + if (sum == null) { + return Integer.MIN_VALUE; + } else { + return sum.getMemBytes(); + } + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java ---------------------------------------------------------------------- diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java new file mode 100644 index 0000000..e153fc4 --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapCounter.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.hadoop.io.DataInputByteBuffer; +import org.apache.kylin.common.util.BytesUtil; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Created by sunyerui on 15/12/1. + */ +public class BitmapCounter implements Comparable<BitmapCounter> { + + private MutableRoaringBitmap bitmap = new MutableRoaringBitmap(); + + public BitmapCounter() { + } + + public BitmapCounter(BitmapCounter another) { + merge(another); + } + + public void clear() { + bitmap.clear(); + } + + public void add(int value) { + bitmap.add(value); + } + + public void add(byte[] value) { + if (value == null || value.length == 0) { + return; + } + try { + int l = Integer.parseInt(new String(value)); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(byte[] value, int offset, int length) { + if (value == null || length == 0) { + return; + } + try { + int l = Integer.parseInt(new String(value, offset, length)); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(String value) { + if (value == null || value.isEmpty()) { + return; + } + try { + int l = Integer.parseInt(value); + add(l); + } catch (NumberFormatException e) { + throw e; + } + } + + public void add(long value) { + // TODO we need support long later + add((int) value); + } + + public void merge(BitmapCounter another) { + this.bitmap.or(another.bitmap); + } + + public long getCount() { + return this.bitmap.getCardinality(); + } + + public int getMemBytes() { + return this.bitmap.getSizeInBytes(); + } + + public void writeRegisters(ByteBuffer out) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + bitmap.runOptimize(); + bitmap.serialize(dos); + dos.close(); + ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray()); + out.put(bb); + } + + public void readRegisters(ByteBuffer in) throws IOException { + DataInputByteBuffer input = new DataInputByteBuffer(); + input.reset(new ByteBuffer[] { in }); + bitmap.deserialize(input); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + bitmap.hashCode(); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + BitmapCounter other = (BitmapCounter) obj; + return bitmap.equals(other.bitmap); + } + + @Override + public int compareTo(BitmapCounter o) { + if (o == null) + return 1; + + long e1 = this.getCount(); + long e2 = o.getCount(); + + if (e1 == e2) + return 0; + else if (e1 > e2) + return 1; + else + return -1; + } + + public int peekLength(ByteBuffer in) { + int mark = in.position(); + int len; + + DataInputByteBuffer input = new DataInputByteBuffer(); + input.reset(new ByteBuffer[] { in }); + RoaringBitmap bitmap = new RoaringBitmap(); + try { + bitmap.deserialize(input); + } catch (IOException e) { + } + + len = in.position() - mark; + in.position(mark); + return len; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java ---------------------------------------------------------------------- diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java new file mode 100644 index 0000000..d039b6d --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapDistinctCountAggFunc.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.measure.bitmap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Created by sunyerui on 15/12/22. + */ +public class BitmapDistinctCountAggFunc { + + private static final Logger logger = LoggerFactory.getLogger(BitmapDistinctCountAggFunc.class); + + public static BitmapCounter init() { + return null; + } + + public static BitmapCounter add(BitmapCounter counter, Object v) { + BitmapCounter c = (BitmapCounter) v; + if (counter == null) { + return new BitmapCounter(c); + } else { + counter.merge(c); + return counter; + } + } + + public static BitmapCounter merge(BitmapCounter counter0, Object counter1) { + return add(counter0, counter1); + } + + public static long result(BitmapCounter counter) { + return counter == null ? 0L : counter.getCount(); + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java ---------------------------------------------------------------------- diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java new file mode 100644 index 0000000..df8e765 --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java @@ -0,0 +1,109 @@ +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.measure.MeasureAggregator; +import org.apache.kylin.measure.MeasureIngester; +import org.apache.kylin.measure.MeasureType; +import org.apache.kylin.measure.MeasureTypeFactory; +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; +import org.apache.kylin.metadata.model.FunctionDesc; +import org.apache.kylin.metadata.model.MeasureDesc; +import org.apache.kylin.metadata.model.TblColRef; + +import java.util.List; +import java.util.Map; + +/** + * Created by sunyerui on 15/12/10. + */ +public class BitmapMeasureType extends MeasureType<BitmapCounter> { + public static final String FUNC_COUNT_DISTINCT = "COUNT_DISTINCT"; + public static final String DATATYPE_BITMAP = "bitmap"; + + public static class Factory extends MeasureTypeFactory<BitmapCounter> { + + @Override + public MeasureType<BitmapCounter> createMeasureType(String funcName, DataType dataType) { + return new BitmapMeasureType(funcName, dataType); + } + + @Override + public String getAggrFunctionName() { + return FUNC_COUNT_DISTINCT; + } + + @Override + public String getAggrDataTypeName() { + return DATATYPE_BITMAP; + } + + @Override + public Class<? extends DataTypeSerializer<BitmapCounter>> getAggrDataTypeSerializer() { + return BitmapSerializer.class; + } + } + + public DataType dataType; + + public BitmapMeasureType(String funcName, DataType dataType) { + this.dataType = dataType; + } + + @Override + public void validate(FunctionDesc functionDesc) throws IllegalArgumentException { + if (FUNC_COUNT_DISTINCT.equals(functionDesc.getExpression()) == false) + throw new IllegalArgumentException("BitmapMeasureType func is not " + FUNC_COUNT_DISTINCT + " but " + functionDesc.getExpression()); + + if (DATATYPE_BITMAP.equals(functionDesc.getReturnDataType().getName()) == false) + throw new IllegalArgumentException("BitmapMeasureType datatype is not " + DATATYPE_BITMAP + " but " + functionDesc.getReturnDataType().getName()); + + List<TblColRef> colRefs = functionDesc.getParameter().getColRefs(); + if (colRefs.size() != 1) { + throw new IllegalArgumentException("BitmapMeasureType col parameters count is not 1 but " + colRefs.size()); + } + + TblColRef colRef = colRefs.get(0); + DataType type = colRef.getType(); + if (!type.isIntegerFamily()) { + throw new IllegalArgumentException("BitmapMeasureType col type is not IntegerFamily but " + type.getName() + " of column " + colRef.getCanonicalName()); + } + } + + @Override + public boolean isMemoryHungry() { + return true; + } + + @Override + public MeasureIngester<BitmapCounter> newIngester() { + return new MeasureIngester<BitmapCounter>() { + BitmapCounter current = new BitmapCounter(); + + @Override + public BitmapCounter valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { + BitmapCounter bitmap = current; + bitmap.clear(); + for (String v : values) + bitmap.add(v); + return bitmap; + } + }; + } + + @Override + public MeasureAggregator<BitmapCounter> newAggregator() { + return new BitmapAggregator(); + } + + @Override + public boolean needRewrite() { + return true; + } + + @Override + public Class<?> getRewriteCalciteAggrFunctionClass() { + return BitmapDistinctCountAggFunc.class; + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java ---------------------------------------------------------------------- diff --git a/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java new file mode 100644 index 0000000..657bd77 --- /dev/null +++ b/metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapSerializer.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Created by sunyerui on 15/12/1. + */ +public class BitmapSerializer extends DataTypeSerializer<BitmapCounter> { + + private ThreadLocal<BitmapCounter> current = new ThreadLocal<>(); + + public BitmapSerializer(DataType type) { + } + + @Override + public void serialize(BitmapCounter value, ByteBuffer out) { + try { + value.writeRegisters(out); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private BitmapCounter current() { + BitmapCounter counter = current.get(); + if (counter == null) { + counter = new BitmapCounter(); + current.set(counter); + } + return counter; + } + + @Override + public BitmapCounter deserialize(ByteBuffer in) { + BitmapCounter counter = current(); + try { + counter.readRegisters(in); + } catch (IOException e) { + throw new RuntimeException(e); + } + return counter; + } + + @Override + public int peekLength(ByteBuffer in) { + return current().peekLength(in); + } + + @Override + public int maxLength() { + // the bitmap is non-fixed length, and we just assume 32MB here, maybe change it later + return 32 * 1024 * 1024; + } + + @Override + public int getStorageBytesEstimate() { + return current().getMemBytes(); + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapAggregatorTest.java ---------------------------------------------------------------------- diff --git a/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapAggregatorTest.java b/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapAggregatorTest.java new file mode 100644 index 0000000..20ca05a --- /dev/null +++ b/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapAggregatorTest.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Created by sunyerui on 15/12/31. + */ +public class BitmapAggregatorTest { + + @Test + public void testAggregator() { + BitmapCounter counter = new BitmapCounter(); + counter.add(1); + counter.add(3333); + counter.add("123".getBytes()); + counter.add((long) 123); + assertEquals(3, counter.getCount()); + + BitmapCounter counter2 = new BitmapCounter(); + counter2.add("23456"); + counter2.add(12273456); + counter2.add("4258"); + counter2.add(123); + assertEquals(4, counter2.getCount()); + + BitmapAggregator aggregator = new BitmapAggregator(); + assertNull(aggregator.getState()); + assertEquals(Integer.MIN_VALUE, aggregator.getMemBytesEstimate()); + + aggregator.aggregate(counter); + aggregator.aggregate(counter2); + assertEquals(6, aggregator.getState().getCount()); + aggregator.reset(); + assertNull(aggregator.getState()); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapCounterTest.java ---------------------------------------------------------------------- diff --git a/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapCounterTest.java b/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapCounterTest.java new file mode 100644 index 0000000..4cb874c --- /dev/null +++ b/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapCounterTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; + +/** + * Created by sunyerui on 15/12/31. + */ +public class BitmapCounterTest { + + @Test + public void testAddAndMergeValues() { + BitmapCounter counter = new BitmapCounter(); + counter.add(1); + counter.add(3333); + counter.add("123".getBytes()); + counter.add((long) 123); + assertEquals(3, counter.getCount()); + + BitmapCounter counter2 = new BitmapCounter(); + counter2.add("23456"); + counter2.add(12273456); + counter2.add("4258"); + counter2.add(123); + assertEquals(4, counter2.getCount()); + + counter.merge(counter2); + assertEquals(6, counter.getCount()); + System.out.print("counter size: " + counter.getMemBytes() + ", counter2 size: " + counter2.getMemBytes()); + } + + @Test + public void testSerDeCounter() throws IOException { + BitmapCounter counter = new BitmapCounter(); + for (int i = 1; i < 1000; i++) { + counter.add(i); + } + ByteBuffer buffer = ByteBuffer.allocate(10 * 1024 * 1024); + counter.writeRegisters(buffer); + int len = buffer.position(); + + buffer.position(0); + assertEquals(len, counter.peekLength(buffer)); + assertEquals(0, buffer.position()); + + BitmapCounter counter2 = new BitmapCounter(); + counter2.readRegisters(buffer); + assertEquals(999, counter2.getCount()); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapSerializerTest.java ---------------------------------------------------------------------- diff --git a/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapSerializerTest.java b/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapSerializerTest.java new file mode 100644 index 0000000..e39419b --- /dev/null +++ b/metadata/src/test/java/org/apache/kylin/measure/bitmap/BitmapSerializerTest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.measure.bitmap; + +import org.junit.Test; +import org.apache.kylin.metadata.datatype.DataType; +import java.nio.ByteBuffer; +import static org.junit.Assert.*; + +/** + * Created by sunyerui on 15/12/31. + */ +public class BitmapSerializerTest { + + @Test + public void testSerDeCounter() { + BitmapCounter counter = new BitmapCounter(); + counter.add(1); + counter.add(3333); + counter.add("123".getBytes()); + counter.add((long) 123); + assertEquals(3, counter.getCount()); + + ByteBuffer buffer = ByteBuffer.allocate(10 * 1024 * 1024); + BitmapSerializer serializer = new BitmapSerializer(DataType.ANY); + serializer.serialize(counter, buffer); + int len = buffer.position(); + + buffer.position(0); + BitmapSerializer deSerializer = new BitmapSerializer(DataType.ANY); + BitmapCounter counter2 = deSerializer.deserialize(buffer); + assertEquals(3, counter2.getCount()); + + buffer.position(0); + assertEquals(len, deSerializer.peekLength(buffer)); + assertEquals(32 * 1024 * 1024, deSerializer.maxLength()); + System.out.println("counter size " + deSerializer.getStorageBytesEstimate()); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/java/org/apache/kylin/query/test/IIQueryTest.java ---------------------------------------------------------------------- diff --git a/query/src/test/java/org/apache/kylin/query/test/IIQueryTest.java b/query/src/test/java/org/apache/kylin/query/test/IIQueryTest.java index e8c03ae..db2c1dd 100644 --- a/query/src/test/java/org/apache/kylin/query/test/IIQueryTest.java +++ b/query/src/test/java/org/apache/kylin/query/test/IIQueryTest.java @@ -24,6 +24,7 @@ import org.apache.kylin.metadata.realization.RealizationType; import org.apache.kylin.query.routing.Candidate; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import com.google.common.collect.Maps; @@ -58,4 +59,10 @@ public class IIQueryTest extends KylinQueryTest { execAndCompQuery("src/test/resources/query/sql_ii", null, true); } + @Override + @Test + @Ignore ("Skip Precisely Distinct Count Queries for II") + public void testPreciselyDistinctCountQuery() { + } + } http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java ---------------------------------------------------------------------- diff --git a/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java b/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java index f9e575b..2c6b155 100644 --- a/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java +++ b/query/src/test/java/org/apache/kylin/query/test/KylinQueryTest.java @@ -200,6 +200,11 @@ public class KylinQueryTest extends KylinTestBase { } @Test + public void testPreciselyDistinctCountQuery() throws Exception { + execAndCompQuery("src/test/resources/query/sql_distinct_precisely", null, true); + } + + @Test public void testTableauQuery() throws Exception { batchExecuteQuery("src/test/resources/query/sql_tableau"); } http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/resources/query/sql_distinct_precisely/query00.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql_distinct_precisely/query00.sql b/query/src/test/resources/query/sql_distinct_precisely/query00.sql new file mode 100644 index 0000000..68052e0 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query00.sql @@ -0,0 +1,24 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, cal_dt, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + group by lstg_format_name, cal_dt http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/resources/query/sql_distinct_precisely/query01.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql_distinct_precisely/query01.sql b/query/src/test/resources/query/sql_distinct_precisely/query01.sql new file mode 100644 index 0000000..5289e31 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query01.sql @@ -0,0 +1,25 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + where lstg_format_name='FP-GTC' + group by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/resources/query/sql_distinct_precisely/query02.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql_distinct_precisely/query02.sql b/query/src/test/resources/query/sql_distinct_precisely/query02.sql new file mode 100644 index 0000000..06f6675 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query02.sql @@ -0,0 +1,26 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + where lstg_format_name='FP-GTC' + group by lstg_format_name + having count(distinct seller_id) > 50 http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/resources/query/sql_distinct_precisely/query03.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql_distinct_precisely/query03.sql b/query/src/test/resources/query/sql_distinct_precisely/query03.sql new file mode 100644 index 0000000..2cb0fdb --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query03.sql @@ -0,0 +1,26 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select test_cal_dt.week_beg_dt,sum(test_kylin_fact.price) as GMV + , count(1) as TRANS_CNT, count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + inner JOIN edw.test_cal_dt as test_cal_dt + ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt + where test_kylin_fact.lstg_format_name='FP-GTC' + and test_cal_dt.week_beg_dt between DATE '2013-05-01' and DATE '2013-08-01' + group by test_cal_dt.week_beg_dt http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/resources/query/sql_distinct_precisely/query04.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql_distinct_precisely/query04.sql b/query/src/test/resources/query/sql_distinct_precisely/query04.sql new file mode 100644 index 0000000..5fda81e --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query04.sql @@ -0,0 +1,27 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select test_cal_dt.week_beg_dt,sum(test_kylin_fact.price) as GMV + , count(1) as TRANS_CNT, count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + inner JOIN edw.test_cal_dt as test_cal_dt + ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt + where test_kylin_fact.lstg_format_name='FP-GTC' + and test_cal_dt.week_beg_dt between DATE '2013-05-01' and DATE '2013-08-01' + group by test_cal_dt.week_beg_dt + having count(distinct seller_id) > 2 http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/resources/query/sql_distinct_precisely/query05.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql_distinct_precisely/query05.sql b/query/src/test/resources/query/sql_distinct_precisely/query05.sql new file mode 100644 index 0000000..b305111 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query05.sql @@ -0,0 +1,25 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + group by lstg_format_name + order by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/resources/query/sql_distinct_precisely/query06.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql_distinct_precisely/query06.sql b/query/src/test/resources/query/sql_distinct_precisely/query06.sql new file mode 100644 index 0000000..053a4ba --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query06.sql @@ -0,0 +1,26 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + where lstg_format_name='FP-GTC' + group by lstg_format_name + order by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/query/src/test/resources/query/sql_distinct_precisely/query07.sql ---------------------------------------------------------------------- diff --git a/query/src/test/resources/query/sql_distinct_precisely/query07.sql b/query/src/test/resources/query/sql_distinct_precisely/query07.sql new file mode 100644 index 0000000..fb59fd6 --- /dev/null +++ b/query/src/test/resources/query/sql_distinct_precisely/query07.sql @@ -0,0 +1,24 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +select lstg_format_name, + sum(price) as GMV, + count(1) as TRANS_CNT, + count(distinct leaf_categ_id) as LEAF_CATEG_CNT + from test_kylin_fact + group by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/11d313b2/webapp/app/js/model/cubeConfig.js ---------------------------------------------------------------------- diff --git a/webapp/app/js/model/cubeConfig.js b/webapp/app/js/model/cubeConfig.js index 8662680..e1f25b8 100644 --- a/webapp/app/js/model/cubeConfig.js +++ b/webapp/app/js/model/cubeConfig.js @@ -47,7 +47,8 @@ KylinApp.constant('cubeConfig', { {name: 'Error Rate < 4.88%', value: 'hllc12'}, {name: 'Error Rate < 2.44%', value: 'hllc14'}, {name: 'Error Rate < 1.72%', value: 'hllc15'}, - {name: 'Error Rate < 1.22%', value: 'hllc16'} + {name: 'Error Rate < 1.22%', value: 'hllc16'}, + {name: 'Precisely (Only for Integer Family column)', value: 'bitmap'} ], dftSelections: { measureExpression: 'SUM',