KYLIN-1453 cuboid sharding based on UHC column

Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/dfab0c14
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/dfab0c14
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/dfab0c14

Branch: refs/heads/master
Commit: dfab0c14cf77c0c77687d66858e68500ae3776ab
Parents: 1a13952
Author: Hongbin Ma <[email protected]>
Authored: Fri Mar 18 11:26:01 2016 +0800
Committer: Hongbin Ma <[email protected]>
Committed: Thu Mar 24 14:45:02 2016 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/cube/CubeSegment.java |  33 +--
 .../org/apache/kylin/cube/kv/RowKeyEncoder.java |  19 +-
 .../org/apache/kylin/cube/model/CubeDesc.java   |   5 +
 .../apache/kylin/cube/model/RowKeyColDesc.java  | 243 ++++++++++---------
 .../org/apache/kylin/cube/model/RowKeyDesc.java |  13 +
 5 files changed, 181 insertions(+), 132 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java 
b/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
index ead6a41..8dd294a 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
@@ -18,12 +18,14 @@
 
 package org.apache.kylin.cube;
 
-import com.fasterxml.jackson.annotation.JsonAutoDetect;
-import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
-import com.fasterxml.jackson.annotation.JsonBackReference;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
+import java.text.SimpleDateFormat;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.concurrent.ConcurrentHashMap;
+
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.util.ShardingHash;
 import org.apache.kylin.cube.kv.CubeDimEncMap;
@@ -37,12 +39,12 @@ import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.metadata.realization.IRealization;
 import org.apache.kylin.metadata.realization.IRealizationSegment;
 
-import java.text.SimpleDateFormat;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-import java.util.concurrent.ConcurrentHashMap;
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
+import com.fasterxml.jackson.annotation.JsonBackReference;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = 
Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = 
Visibility.NONE)
 public class CubeSegment implements Comparable<CubeSegment>, 
IRealizationSegment {
@@ -80,7 +82,6 @@ public class CubeSegment implements Comparable<CubeSegment>, 
IRealizationSegment
     @JsonProperty("blackout_cuboids")
     private List<Long> blackoutCuboids = Lists.newArrayList();
 
-
     @JsonProperty("binary_signature")
     private String binarySignature; // a hash of cube schema and dictionary 
ID, used for sanity check
 
@@ -280,7 +281,7 @@ public class CubeSegment implements 
Comparable<CubeSegment>, IRealizationSegment
     public Dictionary<String> getDictionary(TblColRef col) {
         return 
CubeManager.getInstance(this.getCubeInstance().getConfig()).getDictionary(this, 
col);
     }
-    
+
     public CubeDimEncMap getDimensionEncodingMap() {
         return new CubeDimEncMap(this);
     }
@@ -382,6 +383,10 @@ public class CubeSegment implements 
Comparable<CubeSegment>, IRealizationSegment
         return getCubeDesc().isEnableSharding();
     }
 
+    public Set<TblColRef> getUHCColumns() {
+        return getCubeDesc().getUHCColumns();
+    }
+
     public int getRowKeyPreambleSize() {
         return isEnableSharding() ? RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN : 
RowConstants.ROWKEY_CUBOIDID_LEN;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java 
b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java
index 05afdbf..656ddc1 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/kv/RowKeyEncoder.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.kylin.common.util.ByteArray;
 import org.apache.kylin.common.util.Bytes;
@@ -39,13 +40,24 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder {
 
     private int bodyLength = 0;
     private RowKeyColumnIO colIO;
+
     protected boolean enableSharding;
+    private int UHCOffset = -1;//it's a offset to the beginning of body
+    private int UHCLength = -1;
 
     public RowKeyEncoder(CubeSegment cubeSeg, Cuboid cuboid) {
         super(cubeSeg, cuboid);
         enableSharding = cubeSeg.isEnableSharding();
+        Set<TblColRef> UHCColumns = cubeSeg.getUHCColumns();
+        if (UHCColumns.size() > 1) {
+            throw new IllegalStateException("Does not support multiple UHC 
now");
+        }
         colIO = new RowKeyColumnIO(cubeSeg.getDimensionEncodingMap());
         for (TblColRef column : cuboid.getColumns()) {
+            if (UHCColumns.contains(column)) {
+                UHCOffset = bodyLength;
+                UHCLength = colIO.getColumnLength(column);
+            }
             bodyLength += colIO.getColumnLength(column);
         }
     }
@@ -60,9 +72,10 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder {
 
     protected short calculateShard(byte[] key) {
         if (enableSharding) {
-            int bodyOffset = RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN;
+            int shardSeedOffset = UHCOffset == -1 ? 0 : UHCOffset;
+            int shardSeedLength = UHCLength == -1 ? bodyLength : UHCLength;
             short cuboidShardNum = cubeSeg.getCuboidShardNum(cuboid.getId());
-            short shardOffset = ShardingHash.getShard(key, bodyOffset, 
bodyLength, cuboidShardNum);
+            short shardOffset = ShardingHash.getShard(key, 
RowConstants.ROWKEY_SHARD_AND_CUBOID_LEN + shardSeedOffset, shardSeedLength, 
cuboidShardNum);
             return 
ShardingHash.normalize(cubeSeg.getCuboidBaseShard(cuboid.getId()), shardOffset, 
cubeSeg.getTotalShards());
         } else {
             throw new RuntimeException("If enableSharding false, you should 
never calculate shard");
@@ -90,7 +103,7 @@ public class RowKeyEncoder extends AbstractRowKeyEncoder {
     @Override
     public void encode(ByteArray bodyBytes, ByteArray outputBuf) {
         Preconditions.checkState(bodyBytes.length() == bodyLength);
-        Preconditions.checkState(bodyBytes.length() + getHeaderLength() == 
outputBuf.length(),//
+        Preconditions.checkState(bodyBytes.length() + getHeaderLength() == 
outputBuf.length(), //
                 "bodybytes length: " + bodyBytes.length() + " outputBuf 
length: " + outputBuf.length() + " header length: " + getHeaderLength());
         System.arraycopy(bodyBytes.array(), bodyBytes.offset(), 
outputBuf.array(), getHeaderLength(), bodyLength);
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java 
b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index cb94447..165ab60 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -166,6 +166,11 @@ public class CubeDesc extends RootPersistentEntity {
         return storageType == IStorageAware.ID_SHARDED_HBASE;
     }
 
+    public Set<TblColRef> getUHCColumns() {
+        return getRowkey().getUHCColumns();
+    }
+
+
     /**
      * Error messages during resolving json metadata
      */

http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java 
b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java
index 452d689..e72011c 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java
@@ -1,115 +1,128 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.cube.model;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.kylin.common.util.StringUtil;
-import org.apache.kylin.dimension.DictionaryDimEnc;
-import org.apache.kylin.dimension.DimensionEncodingFactory;
-import org.apache.kylin.metadata.model.TblColRef;
-
-import com.fasterxml.jackson.annotation.JsonAutoDetect;
-import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.google.common.base.Objects;
-import com.google.common.base.Preconditions;
-
-/**
- * @author yangli9
- * 
- */
-@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = 
Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = 
Visibility.NONE)
-public class RowKeyColDesc {
-
-    @JsonProperty("column")
-    private String column;
-    @JsonProperty("encoding")
-    private String encoding;
-
-    // computed
-    private String encodingName;
-    private String[] encodingArgs;
-    private int bitIndex;
-    private TblColRef colRef;
-
-    public void init() {
-        Preconditions.checkState(StringUtils.isNotEmpty(this.encoding));
-
-        String[] parts = this.encoding.split("\\s*[(),:]\\s*");
-        if (parts == null || parts.length == 0 || parts[0].isEmpty())
-            throw new IllegalArgumentException("Not supported row key col 
encoding: '" + this.encoding + "'");
-
-        this.encodingName = parts[0];
-        this.encodingArgs = parts[parts.length - 1].isEmpty() //
-                ? StringUtil.subArray(parts, 1, parts.length - 1) : 
StringUtil.subArray(parts, 1, parts.length);
-
-        if (!DimensionEncodingFactory.isVaildEncoding(this.encodingName))
-            throw new IllegalArgumentException("Not supported row key col 
encoding: '" + this.encoding + "'");
-    }
-
-    public String getEncoding() {
-        return encoding;
-    }
-
-    public void setEncoding(String encoding) {
-        this.encoding = encoding;
-    }
-
-    public String getColumn() {
-        return column;
-    }
-
-    public void setColumn(String column) {
-        this.column = column;
-    }
-
-    public String getEncodingName() {
-        return encodingName;
-    }
-
-    public String[] getEncodingArgs() {
-        return encodingArgs;
-    }
-
-    public boolean isUsingDictionary() {
-        return DictionaryDimEnc.ENCODING_NAME.equals(encodingName);
-    }
-
-    public int getBitIndex() {
-        return bitIndex;
-    }
-
-    void setBitIndex(int index) {
-        this.bitIndex = index;
-    }
-
-    public TblColRef getColRef() {
-        return colRef;
-    }
-
-    void setColRef(TblColRef colRef) {
-        this.colRef = colRef;
-    }
-
-    @Override
-    public String toString() {
-        return Objects.toStringHelper(this).add("column", 
column).add("encoding", encoding).toString();
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.model;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.kylin.common.util.StringUtil;
+import org.apache.kylin.dimension.DictionaryDimEnc;
+import org.apache.kylin.dimension.DimensionEncodingFactory;
+import org.apache.kylin.metadata.model.TblColRef;
+
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+
+/**
+ * @author yangli9
+ * 
+ */
+@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = 
Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = 
Visibility.NONE)
+public class RowKeyColDesc {
+
+    @JsonProperty("column")
+    private String column;
+    @JsonProperty("encoding")
+    private String encoding;
+    @JsonProperty("isUHC")
+    private boolean isUHC;//is ultra high cardinality column
+
+
+    // computed
+    private String encodingName;
+    private String[] encodingArgs;
+    private int bitIndex;
+    private TblColRef colRef;
+
+    public void init() {
+        Preconditions.checkState(StringUtils.isNotEmpty(this.encoding));
+
+        String[] parts = this.encoding.split("\\s*[(),:]\\s*");
+        if (parts == null || parts.length == 0 || parts[0].isEmpty())
+            throw new IllegalArgumentException("Not supported row key col 
encoding: '" + this.encoding + "'");
+
+        this.encodingName = parts[0];
+        this.encodingArgs = parts[parts.length - 1].isEmpty() //
+                ? StringUtil.subArray(parts, 1, parts.length - 1) : 
StringUtil.subArray(parts, 1, parts.length);
+
+        if (!DimensionEncodingFactory.isVaildEncoding(this.encodingName))
+            throw new IllegalArgumentException("Not supported row key col 
encoding: '" + this.encoding + "'");
+    }
+
+    public String getEncoding() {
+        return encoding;
+    }
+
+    public void setEncoding(String encoding) {
+        this.encoding = encoding;
+    }
+
+    public String getColumn() {
+        return column;
+    }
+
+    public void setColumn(String column) {
+        this.column = column;
+    }
+
+    public boolean isUHC() {
+        return isUHC;
+    }
+
+    public void setUHC(boolean UHC) {
+        isUHC = UHC;
+    }
+
+
+
+    public String getEncodingName() {
+        return encodingName;
+    }
+
+    public String[] getEncodingArgs() {
+        return encodingArgs;
+    }
+
+    public boolean isUsingDictionary() {
+        return DictionaryDimEnc.ENCODING_NAME.equals(encodingName);
+    }
+
+    public int getBitIndex() {
+        return bitIndex;
+    }
+
+    void setBitIndex(int index) {
+        this.bitIndex = index;
+    }
+
+    public TblColRef getColRef() {
+        return colRef;
+    }
+
+    void setColRef(TblColRef colRef) {
+        this.colRef = colRef;
+    }
+
+    @Override
+    public String toString() {
+        return Objects.toStringHelper(this).add("column", 
column).add("encoding", encoding).toString();
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/kylin/blob/dfab0c14/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java 
b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java
index 6052c08..2df1764 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java
@@ -20,7 +20,9 @@ package org.apache.kylin.cube.model;
 
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.kylin.metadata.model.TblColRef;
 
@@ -41,6 +43,7 @@ public class RowKeyDesc {
     private long fullMask;
     private CubeDesc cubeDesc;
     private Map<TblColRef, RowKeyColDesc> columnMap;
+    private Set<TblColRef> UHCColumns;
 
     public RowKeyColDesc[] getRowKeyColumns() {
         return rowkeyColumns;
@@ -65,6 +68,10 @@ public class RowKeyDesc {
         return getColDesc(col).isUsingDictionary();
     }
 
+    public Set<TblColRef> getUHCColumns() {
+        return UHCColumns;
+    }
+
     public void init(CubeDesc cubeDesc) {
 
         setCubeDesc(cubeDesc);
@@ -84,6 +91,7 @@ public class RowKeyDesc {
 
     private void buildRowKey(Map<String, TblColRef> colNameAbbr) {
         columnMap = new HashMap<TblColRef, RowKeyColDesc>();
+        UHCColumns = new HashSet<>();
 
         for (int i = 0; i < rowkeyColumns.length; i++) {
             RowKeyColDesc rowKeyColDesc = rowkeyColumns[i];
@@ -97,6 +105,11 @@ public class RowKeyDesc {
             }
 
             columnMap.put(rowKeyColDesc.getColRef(), rowKeyColDesc);
+
+            if (rowKeyColDesc.isUHC()) {
+                UHCColumns.add(rowKeyColDesc.getColRef());
+              
+            }
         }
 
         this.fullMask = 0L;

Reply via email to