KYLIN-2199 refined KYLIN-2191 due to KYLIN-2198
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/ac4e9ec5 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/ac4e9ec5 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/ac4e9ec5 Branch: refs/heads/yang21-hbase1.x Commit: ac4e9ec563b6e14e387c5803d441c8dbcbc06f87 Parents: e1acc41 Author: Hongbin Ma <mahong...@apache.org> Authored: Wed Nov 16 14:48:08 2016 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Thu Nov 17 11:14:24 2016 +0800 ---------------------------------------------------------------------- .../gridtable/DimEncodingPreserveOrderTest.java | 4 +- .../apache/kylin/dimension/IntegerDimEnc.java | 9 +- .../apache/kylin/dimension/IntegerDimEncV2.java | 228 +++++++++++++++++++ .../apache/kylin/dimension/IntDimEncTest.java | 2 +- .../kylin/dimension/IntegerDimEncTest.java | 18 +- 5 files changed, 248 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java b/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java index 87d37be..d572e56 100644 --- a/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java +++ b/core-cube/src/test/java/org/apache/kylin/gridtable/DimEncodingPreserveOrderTest.java @@ -27,7 +27,7 @@ import org.apache.kylin.common.util.ByteArray; import org.apache.kylin.common.util.Bytes; import org.apache.kylin.dimension.DimensionEncoding; import org.apache.kylin.dimension.FixedLenHexDimEnc; -import org.apache.kylin.dimension.IntegerDimEnc; +import org.apache.kylin.dimension.IntegerDimEncV2; import org.apache.kylin.dimension.OneMoreByteVLongDimEnc; import org.junit.BeforeClass; import org.junit.Test; @@ -99,7 +99,7 @@ public class DimEncodingPreserveOrderTest { @Test public void testVLongDimEncPreserveOrder() { for (int i = 1; i <= successValue.size(); i++) { - IntegerDimEnc enc = new IntegerDimEnc(i); + IntegerDimEncV2 enc = new IntegerDimEncV2(i); List<ByteArray> encodedValues = Lists.newArrayList(); for (long value : successValue.get(i - 1)) { encodedValues.add(encode(enc, value)); http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java index e55a0a8..983af9a 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEnc.java @@ -31,8 +31,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * replacement for IntegerDimEnc, the diff is VLongDimEnc supports negative values + * replacement for IntDimEnc, the diff is IntegerDimEnc supports negative values + * for IntegerDimEnc(N), the supported range is (-2^(8*N-1),2^(8*N-1)) + * + * -2^(8*N-1) is not supported because the slot is reserved for null values. + * -2^(8*N-1) will be encoded with warn, and its output will be null */ +@Deprecated//due to a fatal bug (KYLIN-2191) public class IntegerDimEnc extends DimensionEncoding { private static final long serialVersionUID = 1L; @@ -127,7 +132,7 @@ public class IntegerDimEnc extends DimensionEncoding { //only take useful bytes integer = integer & MASK[fixedLen]; - boolean positive = (integer & ((0x80L) << ((fixedLen - 1) << 3))) == 0; + boolean positive = (integer & ((0x80) << ((fixedLen - 1) << 3))) == 0; if (!positive) { integer |= (~MASK[fixedLen]); } http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java new file mode 100644 index 0000000..1a54664 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/IntegerDimEncV2.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.dimension; + +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.ByteBuffer; +import java.util.Arrays; + +import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.common.util.BytesUtil; +import org.apache.kylin.metadata.datatype.DataTypeSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * replacement for IntDimEnc, the diff is IntegerDimEnc supports negative values + * for IntegerDimEnc(N), the supported range is (-2^(8*N-1),2^(8*N-1)) + * + * -2^(8*N-1) is not supported because the slot is reserved for null values. + * -2^(8*N-1) will be encoded with warn, and its output will be null + */ +public class IntegerDimEncV2 extends DimensionEncoding { + private static final long serialVersionUID = 1L; + + private static Logger logger = LoggerFactory.getLogger(IntegerDimEncV2.class); + + private static final long[] CAP = { 0, 0x7fL, 0x7fffL, 0x7fffffL, 0x7fffffffL, 0x7fffffffffL, 0x7fffffffffffL, 0x7fffffffffffffL, 0x7fffffffffffffffL }; + private static final long[] MASK = { 0, 0xffL, 0xffffL, 0xffffffL, 0xffffffffL, 0xffffffffffL, 0xffffffffffffL, 0xffffffffffffffL, 0xffffffffffffffffL }; + private static final long[] TAIL = { 0, 0x80L, 0x8000L, 0x800000L, 0x80000000L, 0x8000000000L, 0x800000000000L, 0x80000000000000L, 0x8000000000000000L }; + static { + for (int i = 1; i < TAIL.length; ++i) { + long head = ~MASK[i]; + TAIL[i] = head | TAIL[i]; + } + } + + public static final String ENCODING_NAME = "integer"; + + public static class Factory extends DimensionEncodingFactory { + @Override + public String getSupportedEncodingName() { + return ENCODING_NAME; + } + + @Override + protected int getCurrentVersion() { + return 2; + } + + @Override + public DimensionEncoding createDimensionEncoding(String encodingName, String[] args) { + return new IntegerDimEncV2(Integer.parseInt(args[0])); + } + }; + + // ============================================================================ + + private int fixedLen; + + transient private int avoidVerbose = 0; + transient private int avoidVerbose2 = 0; + + //no-arg constructor is required for Externalizable + public IntegerDimEncV2() { + } + + public IntegerDimEncV2(int len) { + if (len <= 0 || len >= CAP.length) + throw new IllegalArgumentException(); + + this.fixedLen = len; + } + + @Override + public int getLengthOfEncoding() { + return fixedLen; + } + + @Override + public void encode(byte[] value, int valueLen, byte[] output, int outputOffset) { + if (value == null) { + Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); + return; + } + + encode(Bytes.toString(value, 0, valueLen), output, outputOffset); + } + + void encode(String valueStr, byte[] output, int outputOffset) { + if (valueStr == null) { + Arrays.fill(output, outputOffset, outputOffset + fixedLen, NULL); + return; + } + + long integer = Long.parseLong(valueStr); + if (integer > CAP[fixedLen] || integer < TAIL[fixedLen]) { + if (avoidVerbose++ % 10000 == 0) { + logger.warn("Expect at most " + fixedLen + " bytes, but got " + valueStr + ", will truncate, hit times:" + avoidVerbose); + } + } + + if (integer == TAIL[fixedLen]) { + if (avoidVerbose2++ % 10000 == 0) { + logger.warn("Value " + valueStr + " does not fit into " + fixedLen + " bytes "); + } + } + + BytesUtil.writeLong(integer + CAP[fixedLen], output, outputOffset, fixedLen);//apply an offset to preserve binary order, overflow is okay + } + + @Override + public String decode(byte[] bytes, int offset, int len) { + if (isNull(bytes, offset, len)) { + return null; + } + + long integer = BytesUtil.readLong(bytes, offset, len) - CAP[fixedLen]; + + //only take useful bytes + integer = integer & MASK[fixedLen]; + boolean positive = (integer & ((0x80L) << ((fixedLen - 1) << 3))) == 0; + if (!positive) { + integer |= (~MASK[fixedLen]); + } + + return String.valueOf(integer); + } + + @Override + public DataTypeSerializer<Object> asDataTypeSerializer() { + return new IntegerSerializer(); + } + + public class IntegerSerializer extends DataTypeSerializer<Object> { + // be thread-safe and avoid repeated obj creation + private ThreadLocal<byte[]> current = new ThreadLocal<byte[]>(); + + private byte[] currentBuf() { + byte[] buf = current.get(); + if (buf == null) { + buf = new byte[fixedLen]; + current.set(buf); + } + return buf; + } + + @Override + public void serialize(Object value, ByteBuffer out) { + byte[] buf = currentBuf(); + String valueStr = value == null ? null : value.toString(); + encode(valueStr, buf, 0); + out.put(buf); + } + + @Override + public Object deserialize(ByteBuffer in) { + byte[] buf = currentBuf(); + in.get(buf); + return decode(buf, 0, buf.length); + } + + @Override + public int peekLength(ByteBuffer in) { + return fixedLen; + } + + @Override + public int maxLength() { + return fixedLen; + } + + @Override + public int getStorageBytesEstimate() { + return fixedLen; + } + + @Override + public Object valueOf(String str) { + return str; + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeShort(fixedLen); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + fixedLen = in.readShort(); + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + + IntegerDimEncV2 that = (IntegerDimEncV2) o; + + return fixedLen == that.fixedLen; + + } + + @Override + public int hashCode() { + return fixedLen; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java index 280a242..d228dd5 100644 --- a/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/dimension/IntDimEncTest.java @@ -26,7 +26,7 @@ import org.junit.Assert; import org.junit.Test; /** - * Deprecated. use VLongDimEnc instead + * Deprecated. use integer encoding instead * @deprecated */ public class IntDimEncTest { http://git-wip-us.apache.org/repos/asf/kylin/blob/ac4e9ec5/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java b/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java index a9b2511..9924053 100644 --- a/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/dimension/IntegerDimEncTest.java @@ -76,24 +76,24 @@ public class IntegerDimEncTest { @Test public void testConstructor() { try { - new IntegerDimEnc(0); + new IntegerDimEncV2(0); Assert.fail(); } catch (IllegalArgumentException e) { // expect } try { - new IntegerDimEnc(9); + new IntegerDimEncV2(9); Assert.fail(); } catch (IllegalArgumentException e) { // expect } - new IntegerDimEnc(8); + new IntegerDimEncV2(8); } @Test public void testNull() { for (int i = 1; i < 9; i++) { - IntegerDimEnc enc = new IntegerDimEnc(i); + IntegerDimEncV2 enc = new IntegerDimEncV2(i); byte[] buf = new byte[enc.getLengthOfEncoding()]; enc.encode(null, 0, buf, 0); @@ -113,7 +113,7 @@ public class IntegerDimEncTest { @Test public void testEncodeDecode() { for (int i = 1; i <= successValue.size(); i++) { - IntegerDimEnc enc = new IntegerDimEnc(i); + IntegerDimEncV2 enc = new IntegerDimEncV2(i); for (long value : successValue.get(i - 1)) { testEncodeDecode(enc, value); } @@ -129,7 +129,7 @@ public class IntegerDimEncTest { } } - private void testEncodeDecode(IntegerDimEnc enc, long value) { + private void testEncodeDecode(IntegerDimEncV2 enc, long value) { String valueStr = "" + value; byte[] buf = new byte[enc.getLengthOfEncoding()]; byte[] bytes = Bytes.toBytes(valueStr); @@ -141,7 +141,9 @@ public class IntegerDimEncTest { @Test public void testSerDes() { for (int i = 1; i <= successValue.size(); i++) { - IntegerDimEnc enc = new IntegerDimEnc(i); + IntegerDimEncV2 enc = new IntegerDimEncV2(i); + + testSerDes(enc, 127); for (long value : successValue.get(i - 1)) { testSerDes(enc, value); } @@ -156,7 +158,7 @@ public class IntegerDimEncTest { } } - private void testSerDes(IntegerDimEnc enc, long value) { + private void testSerDes(IntegerDimEncV2 enc, long value) { DataTypeSerializer<Object> ser = enc.asDataTypeSerializer(); byte[] buf = new byte[enc.getLengthOfEncoding()]; String valueStr = "" + value;