KYLIN-1954 BuildInFunctionTransformer should be executed per CubeSegmentScanner
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/cc9acbc2 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/cc9acbc2 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/cc9acbc2 Branch: refs/heads/1.5.x-CDH5.7 Commit: cc9acbc2e30b5a2c4a2a0d4b6bb056bc716f1fbb Parents: f48f7fa Author: Hongbin Ma <mahong...@apache.org> Authored: Mon Aug 29 18:28:30 2016 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Mon Aug 29 18:28:37 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/job/dataGen/ColumnConfig.java | 9 ++++ .../kylin/job/dataGen/FactTableGenerator.java | 44 ++++++++++++++++++-- .../org/apache/kylin/job/dataGen/GenConfig.java | 11 +++++ .../storage/gtrecord/CubeSegmentScanner.java | 9 +++- .../localmeta/data/data_gen_config.json | 4 +- 5 files changed, 72 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java index 44ba8f4..5e1c09f 100644 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java +++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java @@ -35,6 +35,8 @@ public class ColumnConfig { private boolean exclusive; @JsonProperty("asRange") private boolean asRange; + @JsonProperty("differentiateByDateBoundary") + private boolean differentiateByDateBoundary; public boolean isAsRange() { return asRange; @@ -68,4 +70,11 @@ public class ColumnConfig { this.valueSet = valueSet; } + public boolean isDifferentiateByDateBoundary() { + return differentiateByDateBoundary; + } + + public void setDifferentiateByDateBoundary(boolean differentiateByDateBoundary) { + this.differentiateByDateBoundary = differentiateByDateBoundary; + } } http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java index 368f509..9373a02 100644 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java +++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java @@ -50,6 +50,8 @@ import org.apache.kylin.metadata.model.JoinDesc; import org.apache.kylin.metadata.model.MeasureDesc; import org.apache.kylin.metadata.model.TblColRef; +import com.google.common.collect.Lists; + /** */ public class FactTableGenerator { @@ -70,6 +72,11 @@ public class FactTableGenerator { double conflictRatio; double linkableRatio; + long differentiateBoundary = -1; + List<Integer> differentiateColumns = Lists.newArrayList(); + + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); + // the names of lookup table columns which is in relation with fact // table(appear as fk in fact table) TreeMap<String, LinkedList<String>> lookupTableKeys = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); @@ -207,6 +214,25 @@ public class FactTableGenerator { // load config loadConfig(); + int index = 0; + for (ColumnDesc cDesc : MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()).getTableDesc(factTableName).getColumns()) { + ColumnConfig cConfig = genConf.getColumnConfigByName(cDesc.getName()); + + if (cConfig != null && cConfig.isDifferentiateByDateBoundary()) { + if (!cDesc.getType().isStringFamily()) { + throw new IllegalStateException("differentiateByDateBoundary only applies to text types, actual:" + cDesc.getType()); + } + if (genConf.getDifferentiateBoundary() == null) { + throw new IllegalStateException("differentiateBoundary not provided"); + } + if (differentiateBoundary == -1) { + differentiateBoundary = format.parse(genConf.getDifferentiateBoundary()).getTime(); + } + differentiateColumns.add(index); + } + index++; + } + TreeSet<String> factTableColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); for (DimensionDesc dim : desc.getDimensions()) { @@ -359,7 +385,6 @@ public class FactTableGenerator { throw new RuntimeException("Does not support " + type); } - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); Date start = format.parse(range.get(0)); Date end = format.parse(range.get(1)); long diff = end.getTime() - start.getTime(); @@ -533,6 +558,8 @@ public class FactTableGenerator { KylinConfig config = KylinConfig.getInstanceFromEnv(); LinkedList<String> columnValues = new LinkedList<String>(); + long currentRowTime = -1; + for (ColumnDesc cDesc : MetadataManager.getInstance(config).getTableDesc(factTableName).getColumns()) { String colName = cDesc.getName(); @@ -544,8 +571,7 @@ public class FactTableGenerator { columnValues.add(candidates.get(r.nextInt(candidates.size()))); } else if (usedCols.contains(colName)) { - - // if the current column is a metric column in fact table + // if the current column is a metric or dimension column in fact table columnValues.add(createCell(cDesc)); } else { @@ -553,6 +579,18 @@ public class FactTableGenerator { columnValues.add(createDefaultsCell(cDesc.getTypeName())); defaultColumns.add(colName); } + + if (cDesc.getRef().equals(this.cube.getDescriptor().getModel().getPartitionDesc().getPartitionDateColumnRef())) { + currentRowTime = format.parse(columnValues.get(columnValues.size() - 1)).getTime(); + } + } + + for (Integer index : differentiateColumns) { + if (currentRowTime >= differentiateBoundary) { + columnValues.set(index, columnValues.get(index) + "_B"); + } else { + columnValues.set(index, columnValues.get(index) + "_A"); + } } return columnValues; http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java index c58cfb6..5204d2a 100644 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java +++ b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java @@ -38,8 +38,19 @@ public class GenConfig { @JsonProperty("columnConfigs") private ArrayList<ColumnConfig> columnConfigs; + @JsonProperty("differentiateBoundary") + private String differentiateBoundary; //data before and after the provided date will be different, so that different segments will have different segments + private HashMap<String, ColumnConfig> cache = new HashMap<String, ColumnConfig>(); + public String getDifferentiateBoundary() { + return differentiateBoundary; + } + + public void setDifferentiateBoundary(String differentiateBoundary) { + this.differentiateBoundary = differentiateBoundary; + } + public ArrayList<ColumnConfig> getColumnConfigs() { return columnConfigs; } http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java ---------------------------------------------------------------------- diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java index 4365ee2..6ed7d3b 100644 --- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java +++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeSegmentScanner.java @@ -34,7 +34,9 @@ import org.apache.kylin.gridtable.GTScanRequest; import org.apache.kylin.gridtable.IGTScanner; import org.apache.kylin.gridtable.ScannerWorker; import org.apache.kylin.metadata.filter.ITupleFilterTransformer; +import org.apache.kylin.metadata.filter.StringCodeSystem; import org.apache.kylin.metadata.filter.TupleFilter; +import org.apache.kylin.metadata.filter.TupleFilterSerializer; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.storage.StorageContext; @@ -52,10 +54,15 @@ public class CubeSegmentScanner implements IGTScanner { final GTScanRequest scanRequest; public CubeSegmentScanner(CubeSegment cubeSeg, Cuboid cuboid, Set<TblColRef> dimensions, Set<TblColRef> groups, // - Collection<FunctionDesc> metrics, TupleFilter filter, StorageContext context, String gtStorage) { + Collection<FunctionDesc> metrics, TupleFilter originalfilter, StorageContext context, String gtStorage) { this.cuboid = cuboid; this.cubeSeg = cubeSeg; + //the filter might be changed later in this CubeSegmentScanner (In ITupleFilterTransformer) + //to avoid issues like in https://issues.apache.org/jira/browse/KYLIN-1954, make sure each CubeSegmentScanner + //is working on its own copy + byte[] serialize = TupleFilterSerializer.serialize(originalfilter, StringCodeSystem.INSTANCE); + TupleFilter filter = TupleFilterSerializer.deserialize(serialize, StringCodeSystem.INSTANCE); // translate FunctionTupleFilter to IN clause ITupleFilterTransformer translator = new BuildInFunctionTransformer(cubeSeg.getDimensionEncodingMap()); filter = translator.transform(filter); http://git-wip-us.apache.org/repos/asf/kylin/blob/cc9acbc2/examples/test_case_data/localmeta/data/data_gen_config.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/data/data_gen_config.json b/examples/test_case_data/localmeta/data/data_gen_config.json index ff3f676..f730058 100644 --- a/examples/test_case_data/localmeta/data/data_gen_config.json +++ b/examples/test_case_data/localmeta/data/data_gen_config.json @@ -1,4 +1,5 @@ { + "differentiateBoundary": "2013-01-01", "columnConfigs": [ { "columnName": "lstg_format_name", @@ -9,7 +10,8 @@ "Auction", "Others" ], - "exclusive": true + "exclusive": true, + "differentiateByDateBoundary": true }, { "columnName": "SELLER_ID",