KYLIN-1453 cuboid sharding based on UHC column
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/dfab0c14 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/dfab0c14 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/dfab0c14 Branch: refs/heads/master Commit: dfab0c14cf77c0c77687d66858e68500ae3776ab Parents: 1a13952 Author: Hongbin Ma <[email protected]> Authored: Fri Mar 18 11:26:01 2016 +0800 Committer: Hongbin Ma <[email protected]> Committed: Thu Mar 24 14:45:02 2016 +0800 ---------------------------------------------------------------------- .../java/org/apache/kylin/cube/CubeSegment.java | 33 +-- .../org/apache/kylin/cube/kv/RowKeyEncoder.java | 19 +- .../org/apache/kylin/cube/model/CubeDesc.java | 5 + .../apache/kylin/cube/model/RowKeyColDesc.java | 243 ++++++++++--------- .../org/apache/kylin/cube/model/RowKeyDesc.java | 13 + 5 files changed, 181 insertions(+), 132 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java index ead6a41..8dd294a 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java @@ -18,12 +18,14 @@ package org.apache.kylin.cube; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; -import com.fasterxml.jackson.annotation.JsonBackReference; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import java.text.SimpleDateFormat; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; +import java.util.concurrent.ConcurrentHashMap; + import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.util.ShardingHash; import org.apache.kylin.cube.kv.CubeDimEncMap; @@ -37,12 +39,12 @@ import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.metadata.realization.IRealization; import org.apache.kylin.metadata.realization.IRealizationSegment; -import java.text.SimpleDateFormat; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.TimeZone; -import java.util.concurrent.ConcurrentHashMap; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; +import com.fasterxml.jackson.annotation.JsonBackReference; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) public class CubeSegment implements Comparable<CubeSegment>, IRealizationSegment { @@ -80,7 +82,6 @@ public class CubeSegment implements Comparable<CubeSegment>, IRealizationSegment @JsonProperty("blackout_cuboids") private List<Long> blackoutCuboids = Lists.newArrayList(); - @JsonProperty("binary_signature") private String binarySignature; // a hash of cube schema and dictionary ID, used for sanity check @@ -280,7 +281,7 @@ public class CubeSegment implements Comparable<CubeSegment>, IRealizationSegment public Dictionary<String> getDictionary(TblColRef col) { return CubeManager.getInstance(this.getCubeInstance().getConfig()).getDictionary(this, col); } - + public CubeDimEncMap getDimensionEncodingMap() { return new CubeDimEncMap(this); } @@ -382,6 +383,10 @@ public class CubeSegment implements Comparable<CubeSegment>, IRealizationSegment return getCubeDesc().isEnableSharding(); } + public Set<TblColRef> getUHCColumns() { + return getCubeDesc().getUHCColumns(); + } + public int getRowKeyPreambleSize() { return isEnableSharding() ? RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN : RowConstants.ROWKEY_CUBOIDID_LEN; } http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java index 05afdbf..656ddc1 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.kylin.common.util.ByteArray; import org.apache.kylin.common.util.Bytes; @@ -39,13 +40,24 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder { private int bodyLength = 0; private RowKeyColumnIO colIO; + protected boolean enableSharding; + private int UHCOffset = -1;//it's a offset to the beginning of body + private int UHCLength = -1; public RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) { super(cubeSeg, cuboid); enableSharding = cubeSeg.isEnableSharding(); + Set<TblColRef> UHCColumns = cubeSeg.getUHCColumns(); + if (UHCColumns.size() > 1) { + throw new IllegalStateException("Does not support multiple UHC now"); + } colIO = new RowKeyColumnIO(cubeSeg.getDimensionEncodingMap()); for (TblColRef column : cuboid.getColumns()) { + if (UHCColumns.contains(column)) { + UHCOffset = bodyLength; + UHCLength = colIO.getColumnLength(column); + } bodyLength += colIO.getColumnLength(column); } } @@ -60,9 +72,10 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder { protected short calculateShard(byte[] key) { if (enableSharding) { - int bodyOffset = RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN; + int shardSeedOffset = UHCOffset == -1 ? 0 : UHCOffset; + int shardSeedLength = UHCLength == -1 ? bodyLength : UHCLength; short cuboidShardNum = cubeSeg.getCuboidShardNum(cuboid.getId()); - short shardOffset = ShardingHash.getShard(key, bodyOffset, bodyLength, cuboidShardNum); + short shardOffset = ShardingHash.getShard(key, RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN + shardSeedOffset, shardSeedLength, cuboidShardNum); return ShardingHash.normalize(cubeSeg.getCuboidBaseShard(cuboid.getId()), shardOffset, cubeSeg.getTotalShards()); } else { throw new RuntimeException("If enableSharding false, you should never calculate shard"); @@ -90,7 +103,7 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder { @Override public void encode(ByteArray bodyBytes, ByteArray outputBuf) { Preconditions.checkState(bodyBytes.length() == bodyLength); - Preconditions.checkState(bodyBytes.length() + getHeaderLength() == outputBuf.length(),// + Preconditions.checkState(bodyBytes.length() + getHeaderLength() == outputBuf.length(), // "bodybytes length: " + bodyBytes.length() + " outputBuf length: " + outputBuf.length() + " header length: " + getHeaderLength()); System.arraycopy(bodyBytes.array(), bodyBytes.offset(), outputBuf.array(), getHeaderLength(), bodyLength); http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java index cb94447..165ab60 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java @@ -166,6 +166,11 @@ public class CubeDesc extends RootPersistentEntity { return storageType == IStorageAware.ID_SHARDED_HBASE; } + public Set<TblColRef> getUHCColumns() { + return getRowkey().getUHCColumns(); + } + + /** * Error messages during resolving json metadata */ http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java index 452d689..e72011c 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java @@ -1,115 +1,128 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.cube.model; - -import org.apache.commons.lang.StringUtils; -import org.apache.kylin.common.util.StringUtil; -import org.apache.kylin.dimension.DictionaryDimEnc; -import org.apache.kylin.dimension.DimensionEncodingFactory; -import org.apache.kylin.metadata.model.TblColRef; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; - -/** - * @author yangli9 - * - */ -@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) -public class RowKeyColDesc { - - @JsonProperty("column") - private String column; - @JsonProperty("encoding") - private String encoding; - - // computed - private String encodingName; - private String[] encodingArgs; - private int bitIndex; - private TblColRef colRef; - - public void init() { - Preconditions.checkState(StringUtils.isNotEmpty(this.encoding)); - - String[] parts = this.encoding.split("\\s*[(),:]\\s*"); - if (parts == null || parts.length == 0 || parts[0].isEmpty()) - throw new IllegalArgumentException("Not supported row key col encoding: '" + this.encoding + "'"); - - this.encodingName = parts[0]; - this.encodingArgs = parts[parts.length - 1].isEmpty() // - ? StringUtil.subArray(parts, 1, parts.length - 1) : StringUtil.subArray(parts, 1, parts.length); - - if (!DimensionEncodingFactory.isVaildEncoding(this.encodingName)) - throw new IllegalArgumentException("Not supported row key col encoding: '" + this.encoding + "'"); - } - - public String getEncoding() { - return encoding; - } - - public void setEncoding(String encoding) { - this.encoding = encoding; - } - - public String getColumn() { - return column; - } - - public void setColumn(String column) { - this.column = column; - } - - public String getEncodingName() { - return encodingName; - } - - public String[] getEncodingArgs() { - return encodingArgs; - } - - public boolean isUsingDictionary() { - return DictionaryDimEnc.ENCODING_NAME.equals(encodingName); - } - - public int getBitIndex() { - return bitIndex; - } - - void setBitIndex(int index) { - this.bitIndex = index; - } - - public TblColRef getColRef() { - return colRef; - } - - void setColRef(TblColRef colRef) { - this.colRef = colRef; - } - - @Override - public String toString() { - return Objects.toStringHelper(this).add("column", column).add("encoding", encoding).toString(); - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.model; + +import org.apache.commons.lang.StringUtils; +import org.apache.kylin.common.util.StringUtil; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.dimension.DimensionEncodingFactory; +import org.apache.kylin.metadata.model.TblColRef; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; + +/** + * @author yangli9 + * + */ +@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) +public class RowKeyColDesc { + + @JsonProperty("column") + private String column; + @JsonProperty("encoding") + private String encoding; + @JsonProperty("isUHC") + private boolean isUHC;//is ultra high cardinality column + + + // computed + private String encodingName; + private String[] encodingArgs; + private int bitIndex; + private TblColRef colRef; + + public void init() { + Preconditions.checkState(StringUtils.isNotEmpty(this.encoding)); + + String[] parts = this.encoding.split("\\s*[(),:]\\s*"); + if (parts == null || parts.length == 0 || parts[0].isEmpty()) + throw new IllegalArgumentException("Not supported row key col encoding: '" + this.encoding + "'"); + + this.encodingName = parts[0]; + this.encodingArgs = parts[parts.length - 1].isEmpty() // + ? StringUtil.subArray(parts, 1, parts.length - 1) : StringUtil.subArray(parts, 1, parts.length); + + if (!DimensionEncodingFactory.isVaildEncoding(this.encodingName)) + throw new IllegalArgumentException("Not supported row key col encoding: '" + this.encoding + "'"); + } + + public String getEncoding() { + return encoding; + } + + public void setEncoding(String encoding) { + this.encoding = encoding; + } + + public String getColumn() { + return column; + } + + public void setColumn(String column) { + this.column = column; + } + + public boolean isUHC() { + return isUHC; + } + + public void setUHC(boolean UHC) { + isUHC = UHC; + } + + + + public String getEncodingName() { + return encodingName; + } + + public String[] getEncodingArgs() { + return encodingArgs; + } + + public boolean isUsingDictionary() { + return DictionaryDimEnc.ENCODING_NAME.equals(encodingName); + } + + public int getBitIndex() { + return bitIndex; + } + + void setBitIndex(int index) { + this.bitIndex = index; + } + + public TblColRef getColRef() { + return colRef; + } + + void setColRef(TblColRef colRef) { + this.colRef = colRef; + } + + @Override + public String toString() { + return Objects.toStringHelper(this).add("column", column).add("encoding", encoding).toString(); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java index 6052c08..2df1764 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java @@ -20,7 +20,9 @@ package org.apache.kylin.cube.model; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.apache.kylin.metadata.model.TblColRef; @@ -41,6 +43,7 @@ public class RowKeyDesc { private long fullMask; private CubeDesc cubeDesc; private Map<TblColRef, RowKeyColDesc> columnMap; + private Set<TblColRef> UHCColumns; public RowKeyColDesc[] getRowKeyColumns() { return rowkeyColumns; @@ -65,6 +68,10 @@ public class RowKeyDesc { return getColDesc(col).isUsingDictionary(); } + public Set<TblColRef> getUHCColumns() { + return UHCColumns; + } + public void init(CubeDesc cubeDesc) { setCubeDesc(cubeDesc); @@ -84,6 +91,7 @@ public class RowKeyDesc { private void buildRowKey(Map<String, TblColRef> colNameAbbr) { columnMap = new HashMap<TblColRef, RowKeyColDesc>(); + UHCColumns = new HashSet<>(); for (int i = 0; i < rowkeyColumns.length; i++) { RowKeyColDesc rowKeyColDesc = rowkeyColumns[i]; @@ -97,6 +105,11 @@ public class RowKeyDesc { } columnMap.put(rowKeyColDesc.getColRef(), rowKeyColDesc); + + if (rowKeyColDesc.isUHC()) { + UHCColumns.add(rowKeyColDesc.getColRef()); + + } } this.fullMask = 0L;
