half way
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/650aea4e Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/650aea4e Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/650aea4e Branch: refs/heads/liyang-dimenc Commit: 650aea4e5fc33d59a022c6a4d89f1dc049295133 Parents: 4dd1b34 Author: Li, Yang <liy...@apache.org> Authored: Fri Feb 26 14:43:59 2016 +0800 Committer: Li, Yang <liy...@apache.org> Committed: Fri Feb 26 14:43:59 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/common/util/Dictionary.java | 232 ------------------- .../java/org/apache/kylin/cube/CubeManager.java | 2 +- .../java/org/apache/kylin/cube/CubeSegment.java | 2 +- .../kylin/cube/gridtable/CubeCodeSystem.java | 141 ++--------- .../kylin/cube/gridtable/CubeGridTable.java | 2 +- .../kylin/cube/gridtable/FixLenSerializer.java | 110 --------- .../cube/gridtable/TrimmedCubeCodeSystem.java | 148 +++++++----- .../inmemcubing/AbstractInMemCubeBuilder.java | 2 +- .../cube/inmemcubing/DoggedCubeBuilder.java | 2 +- .../cube/inmemcubing/InMemCubeBuilder.java | 2 +- .../InMemCubeBuilderInputConverter.java | 3 +- .../kylin/cube/kv/AbstractRowKeyEncoder.java | 4 +- .../org/apache/kylin/cube/kv/RowConstants.java | 2 - .../apache/kylin/cube/kv/RowKeyColumnIO.java | 22 +- .../org/apache/kylin/cube/util/CubingUtils.java | 2 +- .../kylin/gridtable/DefaultGTComparator.java | 2 +- .../kylin/cube/DictionaryManagerTest.java | 2 +- .../DoggedCubeBuilderStressTest.java | 2 +- .../cube/inmemcubing/DoggedCubeBuilderTest.java | 2 +- .../cube/inmemcubing/InMemCubeBuilderTest.java | 2 +- .../kylin/gridtable/DictGridTableTest.java | 2 +- .../apache/kylin/dict/DateStrDictionary.java | 2 +- .../org/apache/kylin/dict/DictCodeSystem.java | 4 +- .../apache/kylin/dict/DictionaryGenerator.java | 2 +- .../org/apache/kylin/dict/DictionaryInfo.java | 2 +- .../kylin/dict/DictionaryInfoSerializer.java | 2 +- .../apache/kylin/dict/DictionaryManager.java | 2 +- .../apache/kylin/dict/DictionarySerializer.java | 2 +- .../org/apache/kylin/dict/IDictionaryAware.java | 2 +- .../dict/MultipleDictionaryValueEnumerator.java | 2 +- .../apache/kylin/dict/TimeStrDictionary.java | 2 +- .../org/apache/kylin/dict/TrieDictionary.java | 2 +- .../dict/TupleFilterDictionaryTranslater.java | 3 +- .../apache/kylin/dict/lookup/SnapshotTable.java | 2 +- .../apache/kylin/dict/NumberDictionaryTest.java | 2 +- .../org/apache/kylin/dimension/Dictionary.java | 231 ++++++++++++++++++ .../kylin/dimension/DictionaryDimEnc.java | 133 +++++++++++ .../kylin/dimension/DimensionEncoding.java | 62 +++++ .../apache/kylin/dimension/FixedLenDimEnc.java | 136 +++++++++++ .../apache/kylin/measure/MeasureIngester.java | 2 +- .../org/apache/kylin/measure/MeasureType.java | 2 +- .../kylin/measure/basic/BigDecimalIngester.java | 2 +- .../kylin/measure/basic/DoubleIngester.java | 2 +- .../kylin/measure/basic/LongIngester.java | 2 +- .../kylin/measure/bitmap/BitmapMeasureType.java | 2 +- .../ExtendedColumnMeasureType.java | 2 +- .../kylin/measure/hllc/HLLCMeasureType.java | 2 +- .../kylin/measure/topn/TopNMeasureType.java | 2 +- .../storage/translate/ColumnValueRange.java | 2 +- .../storage/translate/ColumnValueRangeTest.java | 2 +- .../engine/mr/steps/BaseCuboidMapperBase.java | 2 +- .../engine/mr/steps/InMemCuboidMapper.java | 2 +- .../engine/mr/steps/MergeCuboidMapper.java | 2 +- .../engine/mr/steps/MergeCuboidMapperTest.java | 2 +- .../apache/kylin/engine/spark/SparkCubing.java | 2 +- .../streaming/OneOffStreamingBuilder.java | 2 +- .../engine/streaming/StreamingBatchBuilder.java | 2 +- .../streaming/cube/StreamingCubeBuilder.java | 2 +- .../apache/kylin/invertedindex/IISegment.java | 2 +- .../index/CompressedValueContainer.java | 2 +- .../invertedindex/index/RawTableRecord.java | 4 +- .../apache/kylin/invertedindex/index/Slice.java | 2 +- .../kylin/invertedindex/index/SliceBuilder.java | 2 +- .../kylin/invertedindex/index/TableRecord.java | 2 +- .../invertedindex/index/TableRecordInfo.java | 2 +- .../invertedindex/model/IIKeyValueCodec.java | 2 +- .../invertedindex/util/IIDictionaryBuilder.java | 2 +- .../kylin/invertedindex/IIInstanceTest.java | 2 +- .../invertedindex/InvertedIndexLocalTest.java | 2 +- .../hbase/common/coprocessor/AggrKey.java | 4 +- .../common/coprocessor/FilterDecorator.java | 5 +- .../storage/hbase/cube/v1/CubeStorageQuery.java | 2 +- .../hbase/cube/v1/CubeTupleConverter.java | 2 +- .../v1/coprocessor/observer/ObserverTuple.java | 2 +- .../hbase/cube/v2/CubeTupleConverter.java | 2 +- .../endpoint/BitMapFilterEvaluator.java | 2 +- .../endpoint/ClearTextDictionary.java | 2 +- .../ii/coprocessor/endpoint/IIEndpoint.java | 21 +- .../coprocessor/endpoint/LocalDictionary.java | 2 +- .../endpoint/BitMapFilterEvaluatorTest.java | 2 +- 80 files changed, 776 insertions(+), 613 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java b/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java deleted file mode 100644 index 6d3fa62..0000000 --- a/core-common/src/main/java/org/apache/kylin/common/util/Dictionary.java +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.common.util; - -import java.io.PrintStream; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; - -import org.apache.kylin.common.persistence.Writable; - -/** - * A bi-way dictionary that maps from dimension/column values to IDs and vice - * versa. By storing IDs instead of real values, the size of cube is - * significantly reduced. - * - * - IDs are smallest integers possible for the cardinality of a column, for the - * purpose of minimal storage space - IDs preserve ordering of values, such that - * range query can be applied to IDs directly - * - * A dictionary once built, is immutable. This allows optimal memory footprint - * by e.g. flatten the Trie structure into a byte array, replacing node pointers - * with array offsets. - * - * @author yangli9 - */ -@SuppressWarnings("serial") -abstract public class Dictionary<T> implements Writable, Serializable { - - public static final byte NULL = (byte) 0xff; - - // ID with all bit-1 (0xff e.g.) reserved for NULL value - public static final int NULL_ID[] = new int[] { 0, 0xff, 0xffff, 0xffffff, 0xffffffff }; - - abstract public int getMinId(); - - abstract public int getMaxId(); - - public int getSize() { - return getMaxId() - getMinId() + 1; - } - - /** - * @return the size of an ID in bytes, determined by the cardinality of column - */ - abstract public int getSizeOfId(); - - /** - * @return the (maximum) size of value in bytes, determined by the longest value - */ - abstract public int getSizeOfValue(); - - /** - * @return true if each entry of this dict is contained by the dict in param - */ - abstract public boolean contains(Dictionary<?> another); - - /** - * Convenient form of <code>getIdFromValue(value, 0)</code> - */ - final public int getIdFromValue(T value) throws IllegalArgumentException { - return getIdFromValue(value, 0); - } - - /** - * Returns the ID integer of given value. In case of not found - * <p> - * - if roundingFlag=0, throw IllegalArgumentException; <br> - * - if roundingFlag<0, the closest smaller ID integer if exist; <br> - * - if roundingFlag>0, the closest bigger ID integer if exist. <br> - * <p> - * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes() - * - * @throws IllegalArgumentException - * if value is not found in dictionary and rounding is off; - * or if rounding cannot find a smaller or bigger ID - */ - final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException { - if (isNullObjectForm(value)) - return nullId(); - else - return getIdFromValueImpl(value, roundingFlag); - } - - final public boolean containsValue(T value) throws IllegalArgumentException { - if (isNullObjectForm(value)) { - return true; - } else { - try { - //if no key found, it will throw exception - getIdFromValueImpl(value, 0); - } catch (IllegalArgumentException e) { - return false; - } - return true; - } - } - - protected boolean isNullObjectForm(T value) { - return value == null; - } - - abstract protected int getIdFromValueImpl(T value, int roundingFlag); - - /** - * @return the value corresponds to the given ID - * @throws IllegalArgumentException - * if ID is not found in dictionary - */ - final public T getValueFromId(int id) throws IllegalArgumentException { - if (isNullId(id)) - return null; - else - return getValueFromIdImpl(id); - } - - abstract protected T getValueFromIdImpl(int id); - - /** - * Convenient form of - * <code>getIdFromValueBytes(value, offset, len, 0)</code> - */ - final public int getIdFromValueBytes(byte[] value, int offset, int len) throws IllegalArgumentException { - return getIdFromValueBytes(value, offset, len, 0); - } - - /** - * A lower level API, return ID integer from raw value bytes. In case of not found - * <p> - * - if roundingFlag=0, throw IllegalArgumentException; <br> - * - if roundingFlag<0, the closest smaller ID integer if exist; <br> - * - if roundingFlag>0, the closest bigger ID integer if exist. <br> - * <p> - * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value). - * - * @throws IllegalArgumentException - * if value is not found in dictionary and rounding is off; - * or if rounding cannot find a smaller or bigger ID - */ - final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) throws IllegalArgumentException { - if (isNullByteForm(value, offset, len)) - return nullId(); - else { - int id = getIdFromValueBytesImpl(value, offset, len, roundingFlag); - if (id < 0) - throw new IllegalArgumentException("Value not exists!"); - return id; - } - } - - protected boolean isNullByteForm(byte[] value, int offset, int len) { - return value == null; - } - - abstract protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag); - - final public byte[] getValueBytesFromId(int id) { - if (isNullId(id)) - return BytesUtil.EMPTY_BYTE_ARRAY; - else - return getValueBytesFromIdImpl(id); - } - - abstract protected byte[] getValueBytesFromIdImpl(int id); - - /** - * A lower level API, get byte values from ID, return the number of bytes - * written. Bypassing the cache layer, this could be significantly slower - * than getIdFromValue(T value). - * - * @return size of value bytes, 0 if empty string, -1 if null - * - * @throws IllegalArgumentException - * if ID is not found in dictionary - */ - final public int getValueBytesFromId(int id, byte[] returnValue, int offset) throws IllegalArgumentException { - if (isNullId(id)) - return -1; - else - return getValueBytesFromIdImpl(id, returnValue, offset); - } - - abstract protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset); - - abstract public void dump(PrintStream out); - - public int nullId() { - return NULL_ID[getSizeOfId()]; - } - - public boolean isNullId(int id) { - int nullId = NULL_ID[getSizeOfId()]; - return (nullId & id) == nullId; - } - - /** utility that converts a dictionary ID to string, preserving order */ - public static String dictIdToString(byte[] idBytes, int offset, int length) { - try { - return new String(idBytes, offset, length, "ISO-8859-1"); - } catch (UnsupportedEncodingException e) { - // never happen - return null; - } - } - - /** the reverse of dictIdToString(), returns integer ID */ - public static int stringToDictId(String str) { - try { - byte[] bytes = str.getBytes("ISO-8859-1"); - return BytesUtil.readUnsigned(bytes, 0, bytes.length); - } catch (UnsupportedEncodingException e) { - // never happen - return 0; - } - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java index 84dd30a..2a4dea8 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java @@ -32,7 +32,6 @@ import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.Serializer; import org.apache.kylin.common.restclient.Broadcaster; import org.apache.kylin.common.restclient.CaseInsensitiveStringCache; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.DimensionDesc; @@ -42,6 +41,7 @@ import org.apache.kylin.dict.DistinctColumnValuesProvider; import org.apache.kylin.dict.lookup.LookupStringTable; import org.apache.kylin.dict.lookup.SnapshotManager; import org.apache.kylin.dict.lookup.SnapshotTable; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.model.SegmentStatusEnum; import org.apache.kylin.metadata.model.TableDesc; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java index 67dce73..9effb4e 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java @@ -25,12 +25,12 @@ import java.util.TimeZone; import java.util.concurrent.ConcurrentHashMap; import org.apache.kylin.common.persistence.ResourceStore; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ShardingHash; import org.apache.kylin.cube.kv.RowConstants; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc; import org.apache.kylin.dict.IDictionaryAware; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.model.IJoinedFlatTableDesc; import org.apache.kylin.metadata.model.SegmentStatusEnum; import org.apache.kylin.metadata.model.TblColRef; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java index f83f920..105838c 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeCodeSystem.java @@ -4,9 +4,10 @@ import java.nio.ByteBuffer; import java.util.Collections; import java.util.Map; -import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.dimension.DictionaryDimEnc.DictionarySerializer; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.gridtable.DefaultGTComparator; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.gridtable.IGTCodeSystem; @@ -14,8 +15,6 @@ import org.apache.kylin.gridtable.IGTComparator; import org.apache.kylin.measure.MeasureAggregator; import org.apache.kylin.metadata.datatype.DataTypeSerializer; -import com.google.common.collect.Maps; - /** * defines how column values will be encoded to/ decoded from GTRecord * @@ -25,63 +24,44 @@ import com.google.common.collect.Maps; @SuppressWarnings({ "rawtypes", "unchecked" }) public class CubeCodeSystem implements IGTCodeSystem { - // ============================================================================ - private GTInfo info; - private Map<Integer, Dictionary> dictionaryMap; // column index ==> dictionary of column - private Map<Integer, Integer> fixLenMap; // column index ==> fixed length of column - private Map<Integer, Integer> dependentMetricsMap; + private DimensionEncoding[] dimEncs; private DataTypeSerializer[] serializers; private IGTComparator comparator; + private Map<Integer, Integer> dependentMetricsMap; - public CubeCodeSystem(Map<Integer, Dictionary> dictionaryMap) { - this(dictionaryMap, Collections.<Integer, Integer> emptyMap(), Collections.<Integer, Integer> emptyMap()); + public CubeCodeSystem(DimensionEncoding[] dimEncs) { + this(dimEncs, Collections.<Integer, Integer> emptyMap()); } - public CubeCodeSystem(Map<Integer, Dictionary> dictionaryMap, Map<Integer, Integer> fixLenMap, Map<Integer, Integer> dependentMetricsMap) { - this.dictionaryMap = dictionaryMap; - this.fixLenMap = fixLenMap; + public CubeCodeSystem(DimensionEncoding[] dimEncs, Map<Integer, Integer> dependentMetricsMap) { + this.dimEncs = dimEncs; + this.comparator = new DefaultGTComparator(); this.dependentMetricsMap = dependentMetricsMap; } public TrimmedCubeCodeSystem trimForCoprocessor() { - Map<Integer,Integer> dictSizes = Maps.newHashMap(); - Map<Integer,Integer> fixedLengthSizes = Maps.newHashMap(); - - for (int i = 0; i < serializers.length; i++) { - if (serializers[i] instanceof DictionarySerializer) { - dictSizes.put(i,serializers[i].maxLength()); - } else if(serializers[i] instanceof FixLenSerializer) { - fixedLengthSizes.put(i,serializers[i].maxLength()); - } - } - - return new TrimmedCubeCodeSystem(dependentMetricsMap,dictSizes,fixedLengthSizes); + return new TrimmedCubeCodeSystem(dimEncs, dependentMetricsMap); } @Override public void init(GTInfo info) { this.info = info; - serializers = new DataTypeSerializer[info.getColumnCount()]; - for (int i = 0; i < info.getColumnCount(); i++) { - // dimension with dictionary - if (dictionaryMap.get(i) != null) { - serializers[i] = new DictionarySerializer(dictionaryMap.get(i)); - } - // dimension of fixed length - else if (fixLenMap.get(i) != null) { - serializers[i] = new FixLenSerializer(fixLenMap.get(i)); + this.serializers = new DataTypeSerializer[info.getColumnCount()]; + for (int i = 0; i < serializers.length; i++) { + DimensionEncoding dimEnc = i < dimEncs.length ? dimEncs[i] : null; + + // for dimensions + if (dimEnc != null) { + serializers[i] = dimEnc.asDataTypeSerializer(); } - // metrics + // for measures else { serializers[i] = DataTypeSerializer.create(info.getColumnType(i)); } } - - //when changing this, also take care of TrimmedCubeCodeSystem.init - this.comparator = new DefaultGTComparator(); } @Override @@ -108,10 +88,14 @@ public class CubeCodeSystem implements IGTCodeSystem { public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) { DataTypeSerializer serializer = serializers[col]; if (serializer instanceof DictionarySerializer) { - ((DictionarySerializer) serializer).serializeWithRounding(value, roundingFlag, buf); + DictionaryDimEnc dictEnc = ((DictionaryDimEnc) dimEncs[col]); + if (dictEnc.getRoundingFlag() != roundingFlag) { + serializer = dictEnc.copy(roundingFlag).asDataTypeSerializer(); + } + serializer.serialize(value, buf); } else { if (value instanceof String) { - // for dimensions mostly, measures are converted by MeasureIngestor before reaching this point + // for dimensions; measures are converted by MeasureIngestor before reaching this point value = serializer.valueOf((String) value); } serializer.serialize(value, buf); @@ -151,79 +135,4 @@ public class CubeCodeSystem implements IGTCodeSystem { return result; } - static class TrimmedDictionarySerializer extends DataTypeSerializer { - - final int fieldSize; - - public TrimmedDictionarySerializer(int fieldSize) { - this.fieldSize = fieldSize; - } - - @Override - public int peekLength(ByteBuffer in) { - return fieldSize; - } - - @Override - public int maxLength() { - return fieldSize; - } - - @Override - public int getStorageBytesEstimate() { - return fieldSize; - } - - @Override - public void serialize(Object value, ByteBuffer out) { - throw new UnsupportedOperationException(); - } - - @Override - public Object deserialize(ByteBuffer in) { - throw new UnsupportedOperationException(); - } - } - - static class DictionarySerializer extends DataTypeSerializer { - private Dictionary dictionary; - - DictionarySerializer(Dictionary dictionary) { - this.dictionary = dictionary; - } - - public void serializeWithRounding(Object value, int roundingFlag, ByteBuffer buf) { - int id = dictionary.getIdFromValue(value, roundingFlag); - BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), buf); - } - - @Override - public void serialize(Object value, ByteBuffer buf) { - int id = dictionary.getIdFromValue(value); - BytesUtil.writeUnsigned(id, dictionary.getSizeOfId(), buf); - } - - @Override - public Object deserialize(ByteBuffer in) { - int id = BytesUtil.readUnsigned(in, dictionary.getSizeOfId()); - return dictionary.getValueFromId(id); - } - - @Override - public int peekLength(ByteBuffer in) { - return dictionary.getSizeOfId(); - } - - @Override - public int maxLength() { - return dictionary.getSizeOfId(); - } - - @Override - public int getStorageBytesEstimate() { - return dictionary.getSizeOfId(); - } - - } - } http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java index 05fc8a5..bc65ac7 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/CubeGridTable.java @@ -3,11 +3,11 @@ package org.apache.kylin.cube.gridtable; import java.util.List; import java.util.Map; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.cuboid.Cuboid; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.metadata.model.TblColRef; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/gridtable/FixLenSerializer.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/FixLenSerializer.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/FixLenSerializer.java deleted file mode 100644 index 24c4a19..0000000 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/FixLenSerializer.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.cube.gridtable; - -import java.nio.ByteBuffer; -import java.util.Arrays; - -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.cube.kv.RowConstants; -import org.apache.kylin.metadata.datatype.DataTypeSerializer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class FixLenSerializer extends DataTypeSerializer { - - private static Logger logger = LoggerFactory.getLogger(FixLenSerializer.class); - - // be thread-safe and avoid repeated obj creation - private ThreadLocal<byte[]> current = new ThreadLocal<byte[]>(); - - private int fixLen; - transient int avoidVerbose = 0; - - FixLenSerializer(int fixLen) { - this.fixLen = fixLen; - } - - private byte[] currentBuf() { - byte[] buf = current.get(); - if (buf == null) { - buf = new byte[fixLen]; - current.set(buf); - } - return buf; - } - - @Override - public void serialize(Object value, ByteBuffer out) { - byte[] buf = currentBuf(); - if (value == null) { - Arrays.fill(buf, Dictionary.NULL); - out.put(buf); - } else { - byte[] bytes = Bytes.toBytes(value.toString()); - if (bytes.length > fixLen) { - if (avoidVerbose++ % 10000 == 0) { - logger.warn("Expect at most " + fixLen + " bytes, but got " + bytes.length + ", will truncate, value string: " + value.toString() + " times:" + avoidVerbose); - } - } - out.put(bytes, 0, Math.min(bytes.length, fixLen)); - for (int i = bytes.length; i < fixLen; i++) { - out.put(RowConstants.ROWKEY_PLACE_HOLDER_BYTE); - } - } - } - - @Override - public Object deserialize(ByteBuffer in) { - byte[] buf = currentBuf(); - in.get(buf); - - int tail = fixLen; - while (tail > 0 && (buf[tail - 1] == RowConstants.ROWKEY_PLACE_HOLDER_BYTE || buf[tail - 1] == Dictionary.NULL)) { - tail--; - } - - if (tail == 0) { - return buf[0] == Dictionary.NULL ? null : ""; - } - - return Bytes.toString(buf, 0, tail); - } - - @Override - public int peekLength(ByteBuffer in) { - return fixLen; - } - - @Override - public int maxLength() { - return fixLen; - } - - @Override - public int getStorageBytesEstimate() { - return fixLen; - } - - @Override - public Object valueOf(String str) { - return str; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java index 6048ba0..e5169d2 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/gridtable/TrimmedCubeCodeSystem.java @@ -26,6 +26,7 @@ import java.util.Map; import org.apache.kylin.common.util.BytesSerializer; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.common.util.ImmutableBitSet; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.gridtable.DefaultGTComparator; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.gridtable.IGTCodeSystem; @@ -35,44 +36,44 @@ import org.apache.kylin.metadata.datatype.DataTypeSerializer; import com.google.common.collect.Maps; +/** + * A limited code system where dimension value ser/des is disabled. + * Used inside coprocessor only. Because dictionary is not available. + */ @SuppressWarnings({ "rawtypes", "unchecked" }) public class TrimmedCubeCodeSystem implements IGTCodeSystem { - private Map<Integer, Integer> dependentMetricsMap; - private Map<Integer, Integer> dictSizes; - private Map<Integer, Integer> fixedLengthSize; + private GTInfo info; - private transient GTInfo info; - private transient DataTypeSerializer[] serializers; - private transient IGTComparator comparator; + private DimensionEncoding[] dimEncs; + private DataTypeSerializer[] serializers; + private IGTComparator comparator; + private Map<Integer, Integer> dependentMetricsMap; - public TrimmedCubeCodeSystem(Map<Integer, Integer> dependentMetricsMap, Map<Integer, Integer> dictSizes, Map<Integer, Integer> fixedLengthSize) { + public TrimmedCubeCodeSystem(DimensionEncoding[] dimEncs, Map<Integer, Integer> dependentMetricsMap) { + this.dimEncs = dimEncs; + this.comparator = new DefaultGTComparator(); this.dependentMetricsMap = dependentMetricsMap; - this.dictSizes = dictSizes; - this.fixedLengthSize = fixedLengthSize; } @Override public void init(GTInfo info) { this.info = info; - serializers = new DataTypeSerializer[info.getColumnCount()]; - for (int i = 0; i < info.getColumnCount(); i++) { - // dimension with dictionary - if (dictSizes.get(i) != null) { - serializers[i] = new CubeCodeSystem.TrimmedDictionarySerializer(dictSizes.get(i)); - } - // dimension of fixed length - else if (fixedLengthSize.get(i) != null) { - serializers[i] = new FixLenSerializer(fixedLengthSize.get(i)); + this.serializers = new DataTypeSerializer[info.getColumnCount()]; + for (int i = 0; i < serializers.length; i++) { + DimensionEncoding dimEnc = i < dimEncs.length ? dimEncs[i] : null; + + // for dimensions + if (dimEnc != null) { + // use trimmed serializer cause no dictionary in coprocessor + serializers[i] = new TrimmedDimensionSerializer(dimEnc.getLengthOfEncoding()); } - // metrics + // for measures else { serializers[i] = DataTypeSerializer.create(info.getColumnType(i)); } } - - this.comparator = new DefaultGTComparator(); } @Override @@ -98,11 +99,6 @@ public class TrimmedCubeCodeSystem implements IGTCodeSystem { @Override public void encodeColumnValue(int col, Object value, int roundingFlag, ByteBuffer buf) { DataTypeSerializer serializer = serializers[col]; - - // if (((value instanceof String) && !(serializer instanceof StringSerializer || serializer instanceof CubeCodeSystem.FixLenSerializer))) { - // value = serializer.valueOf((String) value); - // } - serializer.serialize(value, buf); } @@ -149,49 +145,95 @@ public class TrimmedCubeCodeSystem implements IGTCodeSystem { BytesUtil.writeVInt(x.getValue(), out); } - BytesUtil.writeVInt(value.dictSizes.size(), out); - for (Map.Entry<Integer, Integer> x : value.dictSizes.entrySet()) { - BytesUtil.writeVInt(x.getKey(), out); - BytesUtil.writeVInt(x.getValue(), out); - } - - BytesUtil.writeVInt(value.fixedLengthSize.size(), out); - for (Map.Entry<Integer, Integer> x : value.fixedLengthSize.entrySet()) { - BytesUtil.writeVInt(x.getKey(), out); - BytesUtil.writeVInt(x.getValue(), out); + BytesUtil.writeVInt(value.dimEncs.length, out); + for (int i = 0; i < value.dimEncs.length; i++) { + DimensionEncoding enc = value.dimEncs[i]; + BytesUtil.writeVInt(enc == null ? 0 : enc.getLengthOfEncoding(), out); } } @Override public TrimmedCubeCodeSystem deserialize(ByteBuffer in) { Map<Integer, Integer> dependentMetricsMap = Maps.newHashMap(); - Map<Integer, Integer> dictSizes = Maps.newHashMap(); - Map<Integer, Integer> fixedLengthSize = Maps.newHashMap(); - - int size = 0; - size = BytesUtil.readVInt(in); + int size = BytesUtil.readVInt(in); for (int i = 0; i < size; ++i) { int key = BytesUtil.readVInt(in); int value = BytesUtil.readVInt(in); dependentMetricsMap.put(key, value); } - size = BytesUtil.readVInt(in); - for (int i = 0; i < size; ++i) { - int key = BytesUtil.readVInt(in); - int value = BytesUtil.readVInt(in); - dictSizes.put(key, value); + DimensionEncoding[] dimEncs = new DimensionEncoding[BytesUtil.readVInt(in)]; + for (int i = 0; i < dimEncs.length; i++) { + int fixedLen = BytesUtil.readVInt(in); + if (fixedLen > 0) + dimEncs[i] = new TrimmedDimEnc(fixedLen); } - size = BytesUtil.readVInt(in); - for (int i = 0; i < size; ++i) { - int key = BytesUtil.readVInt(in); - int value = BytesUtil.readVInt(in); - fixedLengthSize.put(key, value); - } - return new TrimmedCubeCodeSystem(dependentMetricsMap, dictSizes, fixedLengthSize); + return new TrimmedCubeCodeSystem(dimEncs, dependentMetricsMap); } }; + static class TrimmedDimEnc extends DimensionEncoding { + final int fixedLen; + + TrimmedDimEnc(int fixedLen) { + this.fixedLen = fixedLen; + } + + @Override + public int getLengthOfEncoding() { + return fixedLen; + } + + @Override + public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { + throw new UnsupportedOperationException(); + } + + @Override + public String decode(byte[] bytes, int offset, int len) { + throw new UnsupportedOperationException(); + } + + @Override + public DataTypeSerializer<Object> asDataTypeSerializer() { + throw new UnsupportedOperationException(); + } + } + + static class TrimmedDimensionSerializer extends DataTypeSerializer<Object> { + + final int fixedLen; + + public TrimmedDimensionSerializer(int fixedLen) { + this.fixedLen = fixedLen; + } + + @Override + public int peekLength(ByteBuffer in) { + return fixedLen; + } + + @Override + public int maxLength() { + return fixedLen; + } + + @Override + public int getStorageBytesEstimate() { + return fixedLen; + } + + @Override + public void serialize(Object value, ByteBuffer out) { + throw new UnsupportedOperationException(); + } + + @Override + public Object deserialize(ByteBuffer in) { + throw new UnsupportedOperationException(); + } + } + } http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java index 335a769..c567c9e 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/AbstractInMemCubeBuilder.java @@ -21,8 +21,8 @@ import java.util.List; import java.util.Map; import java.util.concurrent.BlockingQueue; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.gridtable.GTScanRequest; import org.apache.kylin.gridtable.GridTable; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java index af6ef82..c210bf9 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilder.java @@ -30,10 +30,10 @@ import java.util.concurrent.ConcurrentNavigableMap; import java.util.concurrent.TimeUnit; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.common.util.MemoryBudgetController; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.gridtable.GTScanRequest; import org.apache.kylin.gridtable.IGTScanner; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java index c270d3f..ee5a757 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilder.java @@ -30,7 +30,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.kylin.measure.topn.Counter; import org.apache.kylin.measure.topn.TopNCounter; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.common.util.MemoryBudgetController; import org.apache.kylin.common.util.MemoryBudgetController.MemoryWaterLevel; @@ -39,6 +38,7 @@ import org.apache.kylin.cube.cuboid.Cuboid; import org.apache.kylin.cube.cuboid.CuboidScheduler; import org.apache.kylin.cube.gridtable.CubeGridTable; import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTAggregateScanner; import org.apache.kylin.gridtable.GTBuilder; import org.apache.kylin.gridtable.GTInfo; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java index a1eb6da..4c62bc6 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderInputConverter.java @@ -22,10 +22,11 @@ import org.apache.kylin.common.util.Bytes; import java.util.List; import java.util.Map; -import org.apache.kylin.common.util.Dictionary; import com.google.common.collect.Lists; + import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTInfo; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.measure.MeasureIngester; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java index 62432f7..37b33aa 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/AbstractRowKeyEncoder.java @@ -21,10 +21,10 @@ package org.apache.kylin.cube.kv; import java.util.Map; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.cuboid.Cuboid; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.TblColRef; import org.slf4j.Logger; @@ -38,7 +38,7 @@ import org.slf4j.LoggerFactory; public abstract class AbstractRowKeyEncoder { protected static final Logger logger = LoggerFactory.getLogger(AbstractRowKeyEncoder.class); - public static final byte DEFAULT_BLANK_BYTE = Dictionary.NULL; + public static final byte DEFAULT_BLANK_BYTE = DimensionEncoding.NULL; protected byte blankByte = DEFAULT_BLANK_BYTE; protected final CubeSegment cubeSeg; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java index 3510915..987fb55 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowConstants.java @@ -22,8 +22,6 @@ public class RowConstants { public static final int ROWKEY_COL_DEFAULT_LENGTH = 256; - // row key fixed length place holder - public static final byte ROWKEY_PLACE_HOLDER_BYTE = 9; // row key lower bound public static final byte ROWKEY_LOWER_BYTE = 0; // row key upper bound http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java index 1d57cf9..bd0537d 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyColumnIO.java @@ -22,8 +22,8 @@ import java.util.Arrays; import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.dict.IDictionaryAware; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.model.TblColRef; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,26 +57,6 @@ public class RowKeyColumnIO { return (Dictionary<String>) IDictionaryAwareness.getDictionary(col); } - public void writeColumnWithoutDictionary(byte[] src, int srcOffset, int srcLength, byte[] dst, int dstOffset, int dstLength) { - if (srcLength >= dstLength) { - System.arraycopy(src, srcOffset, dst, dstOffset, dstLength); - } else { - System.arraycopy(src, srcOffset, dst, dstOffset, srcLength); - Arrays.fill(dst, dstOffset + srcLength, dstOffset + dstLength, RowConstants.ROWKEY_PLACE_HOLDER_BYTE); - } - } - - public void writeColumnWithDictionary(Dictionary<String> dictionary, byte[] src, int srcOffset, int srcLength, byte[] dst, int dstOffset, int dstLength, int roundingFlag, int defaultValue) { - // dict value - try { - int id = dictionary.getIdFromValueBytes(src, srcOffset, srcLength, roundingFlag); - BytesUtil.writeUnsigned(id, dst, dstOffset, dictionary.getSizeOfId()); - } catch (IllegalArgumentException ex) { - Arrays.fill(dst, dstOffset, dstOffset + dstLength, (byte) defaultValue); - logger.error("Can't translate value " + Bytes.toString(src, srcOffset, srcLength) + " to dictionary ID, roundingFlag " + roundingFlag + ". Using default value " + String.format("\\x%02X", defaultValue)); - } - } - public void writeColumn(TblColRef column, byte[] value, int valueLen, byte defaultValue, byte[] output, int outputOffset) { writeColumn(column, value, valueLen, 0, defaultValue, output, outputOffset); } http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java index bcb2caf..7392e4c 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java @@ -43,7 +43,6 @@ import javax.annotation.Nullable; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.cuboid.Cuboid; @@ -54,6 +53,7 @@ import org.apache.kylin.dict.DictionaryGenerator; import org.apache.kylin.dict.DictionaryInfo; import org.apache.kylin.dict.DictionaryManager; import org.apache.kylin.dict.IterableDictionaryValueEnumerator; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.source.ReadableTable; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java b/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java index 714571f..89403ec 100644 --- a/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java +++ b/core-cube/src/main/java/org/apache/kylin/gridtable/DefaultGTComparator.java @@ -1,7 +1,7 @@ package org.apache.kylin.gridtable; import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; public class DefaultGTComparator implements IGTComparator { @Override http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java b/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java index e4dd8f5..4a9c2d3 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java @@ -23,12 +23,12 @@ import static org.junit.Assert.assertTrue; import java.util.HashSet; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.JsonUtil; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.dict.DictionaryInfo; import org.apache.kylin.dict.DictionaryManager; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.model.TblColRef; import org.junit.After; import org.junit.Before; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java index c25bad7..8d8366e 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderStressTest.java @@ -26,10 +26,10 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.TblColRef; import org.junit.AfterClass; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java index 832584c..80e3df1 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/DoggedCubeBuilderTest.java @@ -33,10 +33,10 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.TblColRef; import org.junit.AfterClass; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java index b593e48..af3de74 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/inmemcubing/InMemCubeBuilderTest.java @@ -33,7 +33,6 @@ import java.util.concurrent.Future; import org.apache.commons.io.FileUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; @@ -42,6 +41,7 @@ import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc; import org.apache.kylin.dict.DictionaryGenerator; import org.apache.kylin.dict.IterableDictionaryValueEnumerator; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.MeasureDesc; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java index df69c17..1c95a19 100644 --- a/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java +++ b/core-cube/src/test/java/org/apache/kylin/gridtable/DictGridTableTest.java @@ -29,13 +29,13 @@ import java.util.Map; import org.apache.kylin.common.util.ByteArray; import org.apache.kylin.common.util.BytesSerializer; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.ImmutableBitSet; import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.gridtable.CubeCodeSystem; import org.apache.kylin.dict.NumberDictionaryBuilder; import org.apache.kylin.dict.StringBytesConverter; import org.apache.kylin.dict.TrieDictionaryBuilder; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.gridtable.GTInfo.Builder; import org.apache.kylin.gridtable.memstore.GTSimpleMemStore; import org.apache.kylin.metadata.datatype.DataType; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java index 62b06aa..b666229 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java @@ -30,7 +30,7 @@ import java.io.UnsupportedEncodingException; import java.util.Date; import org.apache.commons.lang.StringUtils; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; /** * A dictionary for date string (date only, no time). http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java index b0326c1..fef1e0e 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictCodeSystem.java @@ -3,7 +3,7 @@ package org.apache.kylin.dict; import java.nio.ByteBuffer; import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.metadata.filter.IFilterCodeSystem; /** @@ -26,7 +26,7 @@ public class DictCodeSystem implements IFilterCodeSystem<String> { String v = value; for (int i = 0, n = v.length(); i < n; i++) { - if ((byte) v.charAt(i) != Dictionary.NULL) + if ((byte) v.charAt(i) != DimensionEncoding.NULL) return false; } return true; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java index 4b01e60..df6781a 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java @@ -27,8 +27,8 @@ import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.JsonUtil; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.source.ReadableTable; import org.slf4j.Logger; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java index 8e41abf..3202892 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java @@ -20,7 +20,7 @@ package org.apache.kylin.dict; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.RootPersistentEntity; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.source.ReadableTable.TableSignature; import com.fasterxml.jackson.annotation.JsonAutoDetect; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java index 69b29fe..47844be 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java @@ -24,8 +24,8 @@ import java.io.IOException; import org.apache.kylin.common.persistence.Serializer; import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.common.util.JsonUtil; +import org.apache.kylin.dimension.Dictionary; /** * @author yangli9 http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java index d49e43d..ce04b55 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java @@ -31,7 +31,7 @@ import java.util.concurrent.TimeUnit; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.model.DataModelDesc; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java index 6b47868..830339f 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionarySerializer.java @@ -10,7 +10,7 @@ import java.io.OutputStream; import org.apache.kylin.common.util.ByteArray; import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; /** */ http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java b/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java index 4586163..cadc02b 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryAware.java @@ -18,7 +18,7 @@ package org.apache.kylin.dict; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.model.TblColRef; /** http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java index df7b1c6..96448a4 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java @@ -21,7 +21,7 @@ package org.apache.kylin.dict; import com.google.common.collect.Lists; import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import java.io.IOException; import java.util.List; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java index 65c6c05..568e23d 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TimeStrDictionary.java @@ -7,7 +7,7 @@ import java.io.PrintStream; import java.io.UnsupportedEncodingException; import org.apache.kylin.common.util.DateFormat; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; /** */ http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java index 552aa92..8aa4eb6 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java @@ -23,7 +23,7 @@ import com.google.common.base.Preconditions; import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import org.slf4j.Logger; import org.slf4j.LoggerFactory; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java index 9ef360d..b07e165 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TupleFilterDictionaryTranslater.java @@ -19,7 +19,8 @@ package org.apache.kylin.dict; import com.google.common.primitives.Primitives; -import org.apache.kylin.common.util.Dictionary; + +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.filter.ColumnTupleFilter; import org.apache.kylin.metadata.filter.CompareTupleFilter; import org.apache.kylin.metadata.filter.ConstantTupleFilter; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java b/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java index 6297906..9337829 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java @@ -30,10 +30,10 @@ import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.RootPersistentEntity; -import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.dict.StringBytesConverter; import org.apache.kylin.dict.TrieDictionary; import org.apache.kylin.dict.TrieDictionaryBuilder; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.source.ReadableTable; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java index 8020729..8060fab 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java @@ -29,7 +29,7 @@ import java.util.Random; import java.util.Set; import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.dimension.Dictionary; import org.apache.kylin.metadata.datatype.DataType; import org.junit.Test; http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-metadata/src/main/java/org/apache/kylin/dimension/Dictionary.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/Dictionary.java b/core-metadata/src/main/java/org/apache/kylin/dimension/Dictionary.java new file mode 100644 index 0000000..9be233b --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/Dictionary.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.dimension; + +import java.io.PrintStream; +import java.io.Serializable; +import java.io.UnsupportedEncodingException; + +import org.apache.kylin.common.persistence.Writable; +import org.apache.kylin.common.util.BytesUtil; + +/** + * A bi-way dictionary that maps from dimension/column values to IDs and vice + * versa. By storing IDs instead of real values, the size of cube is + * significantly reduced. + * + * - IDs are smallest integers possible for the cardinality of a column, for the + * purpose of minimal storage space - IDs preserve ordering of values, such that + * range query can be applied to IDs directly + * + * A dictionary once built, is immutable. This allows optimal memory footprint + * by e.g. flatten the Trie structure into a byte array, replacing node pointers + * with array offsets. + * + * @author yangli9 + */ +@SuppressWarnings("serial") +abstract public class Dictionary<T> implements Writable, Serializable { + + // ID with all bit-1 (0xff e.g.) reserved for NULL value + public static final int NULL_ID[] = new int[] { 0, 0xff, 0xffff, 0xffffff, 0xffffffff }; + + abstract public int getMinId(); + + abstract public int getMaxId(); + + public int getSize() { + return getMaxId() - getMinId() + 1; + } + + /** + * @return the size of an ID in bytes, determined by the cardinality of column + */ + abstract public int getSizeOfId(); + + /** + * @return the (maximum) size of value in bytes, determined by the longest value + */ + abstract public int getSizeOfValue(); + + /** + * @return true if each entry of this dict is contained by the dict in param + */ + abstract public boolean contains(Dictionary<?> another); + + /** + * Convenient form of <code>getIdFromValue(value, 0)</code> + */ + final public int getIdFromValue(T value) throws IllegalArgumentException { + return getIdFromValue(value, 0); + } + + /** + * Returns the ID integer of given value. In case of not found + * <p> + * - if roundingFlag=0, throw IllegalArgumentException; <br> + * - if roundingFlag<0, the closest smaller ID integer if exist; <br> + * - if roundingFlag>0, the closest bigger ID integer if exist. <br> + * <p> + * The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes() + * + * @throws IllegalArgumentException + * if value is not found in dictionary and rounding is off; + * or if rounding cannot find a smaller or bigger ID + */ + final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException { + if (isNullObjectForm(value)) + return nullId(); + else + return getIdFromValueImpl(value, roundingFlag); + } + + final public boolean containsValue(T value) throws IllegalArgumentException { + if (isNullObjectForm(value)) { + return true; + } else { + try { + //if no key found, it will throw exception + getIdFromValueImpl(value, 0); + } catch (IllegalArgumentException e) { + return false; + } + return true; + } + } + + protected boolean isNullObjectForm(T value) { + return value == null; + } + + abstract protected int getIdFromValueImpl(T value, int roundingFlag); + + /** + * @return the value corresponds to the given ID + * @throws IllegalArgumentException + * if ID is not found in dictionary + */ + final public T getValueFromId(int id) throws IllegalArgumentException { + if (isNullId(id)) + return null; + else + return getValueFromIdImpl(id); + } + + abstract protected T getValueFromIdImpl(int id); + + /** + * Convenient form of + * <code>getIdFromValueBytes(value, offset, len, 0)</code> + */ + final public int getIdFromValueBytes(byte[] value, int offset, int len) throws IllegalArgumentException { + return getIdFromValueBytes(value, offset, len, 0); + } + + /** + * A lower level API, return ID integer from raw value bytes. In case of not found + * <p> + * - if roundingFlag=0, throw IllegalArgumentException; <br> + * - if roundingFlag<0, the closest smaller ID integer if exist; <br> + * - if roundingFlag>0, the closest bigger ID integer if exist. <br> + * <p> + * Bypassing the cache layer, this could be significantly slower than getIdFromValue(T value). + * + * @throws IllegalArgumentException + * if value is not found in dictionary and rounding is off; + * or if rounding cannot find a smaller or bigger ID + */ + final public int getIdFromValueBytes(byte[] value, int offset, int len, int roundingFlag) throws IllegalArgumentException { + if (isNullByteForm(value, offset, len)) + return nullId(); + else { + int id = getIdFromValueBytesImpl(value, offset, len, roundingFlag); + if (id < 0) + throw new IllegalArgumentException("Value not exists!"); + return id; + } + } + + protected boolean isNullByteForm(byte[] value, int offset, int len) { + return value == null; + } + + abstract protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag); + + final public byte[] getValueBytesFromId(int id) { + if (isNullId(id)) + return BytesUtil.EMPTY_BYTE_ARRAY; + else + return getValueBytesFromIdImpl(id); + } + + abstract protected byte[] getValueBytesFromIdImpl(int id); + + /** + * A lower level API, get byte values from ID, return the number of bytes + * written. Bypassing the cache layer, this could be significantly slower + * than getIdFromValue(T value). + * + * @return size of value bytes, 0 if empty string, -1 if null + * + * @throws IllegalArgumentException + * if ID is not found in dictionary + */ + final public int getValueBytesFromId(int id, byte[] returnValue, int offset) throws IllegalArgumentException { + if (isNullId(id)) + return -1; + else + return getValueBytesFromIdImpl(id, returnValue, offset); + } + + abstract protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset); + + abstract public void dump(PrintStream out); + + public int nullId() { + return NULL_ID[getSizeOfId()]; + } + + public boolean isNullId(int id) { + int nullId = NULL_ID[getSizeOfId()]; + return (nullId & id) == nullId; + } + + /** utility that converts a dictionary ID to string, preserving order */ + public static String dictIdToString(byte[] idBytes, int offset, int length) { + try { + return new String(idBytes, offset, length, "ISO-8859-1"); + } catch (UnsupportedEncodingException e) { + // never happen + return null; + } + } + + /** the reverse of dictIdToString(), returns integer ID */ + public static int stringToDictId(String str) { + try { + byte[] bytes = str.getBytes("ISO-8859-1"); + return BytesUtil.readUnsigned(bytes, 0, bytes.length); + } catch (UnsupportedEncodingException e) { + // never happen + return 0; + } + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java new file mode 100644 index 0000000..f4bd350 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.dimension; + +import java.nio.ByteBuffer; + +import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.common.util.BytesUtil; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DictionaryDimEnc extends DimensionEncoding { + + private static final Logger logger = LoggerFactory.getLogger(DictionaryDimEnc.class); + + // could use a lazy loading trick here, to prevent loading all dictionaries of a segment at once + private final Dictionary<String> dict; + private final int fixedLen; + + // used in encode(), when a value does not exist in dictionary + private final int roundingFlag; + private final byte defaultByte; + + public DictionaryDimEnc(Dictionary<String> dict) { + this(dict, 0, NULL); + } + + public DictionaryDimEnc(Dictionary<String> dict, int roundingFlag, byte defaultByte) { + this.dict = dict; + this.fixedLen = dict.getSizeOfId(); + this.roundingFlag = roundingFlag; + this.defaultByte = defaultByte; + } + + public int getRoundingFlag() { + return roundingFlag; + } + + public DictionaryDimEnc copy(int roundingFlag) { + if (this.roundingFlag == roundingFlag) + return this; + else + return new DictionaryDimEnc(dict, roundingFlag, defaultByte); + } + + public DictionaryDimEnc copy(int roundingFlag, byte defaultByte) { + if (this.roundingFlag == roundingFlag && this.defaultByte == defaultByte) + return this; + else + return new DictionaryDimEnc(dict, roundingFlag, defaultByte); + } + + @Override + public int getLengthOfEncoding() { + return fixedLen; + } + + @Override + public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { + try { + int id = dict.getIdFromValueBytes(value, 0, valueLen, roundingFlag); + BytesUtil.writeUnsigned(id, output, outputOffset, fixedLen); + } catch (IllegalArgumentException ex) { + for (int i = outputOffset; i < outputOffset + fixedLen; i++) { + output[i] = defaultByte; + } + logger.error("Can't translate value " + Bytes.toString(value, 0, valueLen) + " to dictionary ID, roundingFlag " + roundingFlag + ". Using default value " + String.format("\\x%02X", defaultByte)); + } + } + + @Override + public String decode(byte[] bytes, int offset, int len) { + int id = BytesUtil.readUnsigned(bytes, offset, len); + try { + String value = dict.getValueFromId(id); + return value; + } catch (IllegalArgumentException e) { + logger.error("Can't get dictionary value from " + dict + " (id = " + id + ")"); + return ""; + } + } + + @Override + public DataTypeSerializer<Object> asDataTypeSerializer() { + return new DictionarySerializer(); + } + + public class DictionarySerializer extends DataTypeSerializer<Object> { + @Override + public void serialize(Object value, ByteBuffer buf) { + int id = dict.getIdFromValue(value == null ? null : value.toString(), roundingFlag); + BytesUtil.writeUnsigned(id, dict.getSizeOfId(), buf); + } + + @Override + public Object deserialize(ByteBuffer in) { + int id = BytesUtil.readUnsigned(in, dict.getSizeOfId()); + return dict.getValueFromId(id); + } + + @Override + public int peekLength(ByteBuffer in) { + return dict.getSizeOfId(); + } + + @Override + public int maxLength() { + return dict.getSizeOfId(); + } + + @Override + public int getStorageBytesEstimate() { + return dict.getSizeOfId(); + } + }; +} http://git-wip-us.apache.org/repos/asf/kylin/blob/650aea4e/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java b/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java new file mode 100644 index 0000000..8254213 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/DimensionEncoding.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.dimension; + +import org.apache.kylin.metadata.datatype.DataTypeSerializer; + +/** + * Dimension encoding maps a dimension (String) to bytes of fixed length. + * + * It is similar to Dictionary in 1) the bytes is fixed length; 2) bi-way mapping; + * 3) the mapping preserves order, but is also different to Dictionary as the target + * bytes can be very long while dictionary ID is 4 bytes at most. This means it is + * hard to enumerate all values of a encoding, thus TupleFilterDictionaryTranslater + * cannot work on DimensionEncoding. + */ +public abstract class DimensionEncoding { + + // it's convention that all 0xff means NULL + public static final byte NULL = (byte) 0xff; + + public boolean isNull(byte[] bytes, int offset, int length) { + // all 0xFF is NULL + if (length == 0) { + return false; + } + for (int i = 0; i < bytes.length; i++) { + if (bytes[i + offset] != NULL) { + return false; + } + } + return true; + } + + /** return the fixed length of encoded bytes */ + abstract public int getLengthOfEncoding(); + + /** encode given value (a string in byte form) to bytes */ + abstract public void encode(byte[] value, int valueLen, byte[] output, int outputOffset); + + /** decode given bytes to value string */ + abstract public String decode(byte[] bytes, int offset, int len); + + /** return a DataTypeSerializer that does the same encoding/decoding on ByteBuffer */ + abstract public DataTypeSerializer<Object> asDataTypeSerializer(); + +}