KYLIN-2283 bug fix
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/c272daae Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/c272daae Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/c272daae Branch: refs/heads/KYLIN-2283 Commit: c272daae62e6e7e24d6f62f8590498677fad8ac2 Parents: f119a55 Author: Yang Li <liy...@apache.org> Authored: Sat Dec 17 10:41:35 2016 +0800 Committer: Yang Li <liy...@apache.org> Committed: Sun Dec 18 08:24:21 2016 +0800 ---------------------------------------------------------------------- .../kylin/source/datagen/ColumnGenConfig.java | 2 + .../kylin/source/datagen/ColumnGenerator.java | 58 ++++--- .../source/datagen/ModelDataGenerator.java | 163 ++++++++++++++----- .../kylin/source/datagen/TableGenConfig.java | 16 +- .../org/apache/kylin/source/datagen/Util.java | 4 +- .../kylin/source/datagen/DataGenTest.java | 21 ++- .../table/DEFAULT.TEST_KYLIN_FACT.json | 2 +- 7 files changed, 195 insertions(+), 71 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/c272daae/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java index 3d04cf2..62da805 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java @@ -51,6 +51,7 @@ public class ColumnGenConfig { boolean genNull; double genNullPct; boolean order; + boolean unique; public ColumnGenConfig(ColumnDesc col, ModelDataGenerator modelGen) throws IOException { init(col, modelGen); @@ -83,6 +84,7 @@ public class ColumnGenConfig { genNull = Util.parseBoolean(config, "null", guessGenNull(col.getName())); genNullPct = Util.parseDouble(config, "nullpct", 0.01); order = Util.parseBoolean(config, "order", false); + unique = Util.parseBoolean(config, "uniq", modelGen.isPK(col)); } private int guessCardinality(String col) { http://git-wip-us.apache.org/repos/asf/kylin/blob/c272daae/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java index fb7ec36..f171237 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java @@ -21,6 +21,7 @@ package org.apache.kylin.source.datagen; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; @@ -68,8 +69,8 @@ public class ColumnGenerator { result = new AddNullFilter(result, conf.genNullPct); } - if (conf.order) { - result = new OrderFilter(result, targetRows); + if (conf.order || conf.unique) { + result = new OrderFilter(result, conf.unique, targetRows); } return result; @@ -82,7 +83,7 @@ public class ColumnGenerator { } } - private static class RandomGen extends Base { + private class RandomGen extends Base { private DataType type; private String format; @@ -206,7 +207,7 @@ public class ColumnGenerator { } - private static class IDGen extends Base { + private class IDGen extends Base { int next; @@ -225,7 +226,7 @@ public class ColumnGenerator { } } - private static class DiscreteGen extends Base { + private class DiscreteGen extends Base { private List<String> values; private Random rand; @@ -254,7 +255,7 @@ public class ColumnGenerator { } } - private static class CardinalityFilter extends Base { + private class CardinalityFilter extends Base { private Iterator<String> input; private int card; @@ -286,7 +287,7 @@ public class ColumnGenerator { } } - private static class AddNullFilter extends Base { + private class AddNullFilter extends Base { private Iterator<String> input; private double nullPct; @@ -309,27 +310,38 @@ public class ColumnGenerator { } } - private static class OrderFilter extends Base { + final private Comparator<String> comp = new Comparator<String>() { + @Override + public int compare(String s1, String s2) { + if (s1 == null) { + return s2 == null ? 0 : -1; + } else if (s2 == null) { + return 1; + } else { + if (targetCol.getType().isNumberFamily()) + return Double.compare(Double.parseDouble(s1), Double.parseDouble(s2)); + else + return s1.compareTo(s2); + } + } + }; + + private class OrderFilter extends Base { private Iterator<String> iter; - public OrderFilter(Iterator<String> input, int targetRows) { - ArrayList<String> cache = new ArrayList<>(targetRows); - for (int i = 0; i < targetRows; i++) { + public OrderFilter(Iterator<String> input, boolean unique, int targetRows) { + Collection<String> cache = unique ? new TreeSet<String>(comp) : new ArrayList<String>(targetRows); + int cap = targetRows * 100; + for (int i = 0; cache.size() < targetRows; i++) { cache.add(input.next()); + if (i >= cap) + throw new IllegalStateException(); + } + + if (cache instanceof List) { + Collections.sort((List<String>) cache, comp); } - Collections.sort(cache, new Comparator<String>() { - @Override - public int compare(String s1, String s2) { - if (s1 == null) { - return s2 == null ? 0 : -1; - } else if (s2 == null) { - return 1; - } else { - return s1.compareTo(s2); - } - } - }); iter = cache.iterator(); } http://git-wip-us.apache.org/repos/asf/kylin/blob/c272daae/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java index b85703c..1319528 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java @@ -28,14 +28,18 @@ import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.metadata.model.DataModelDesc; +import org.apache.kylin.metadata.model.JoinDesc; import org.apache.kylin.metadata.model.JoinTableDesc; import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.model.TblColRef; @@ -48,58 +52,55 @@ public class ModelDataGenerator { final private int targetRows; final private ResourceStore outputStore; final private String outputPath; - + boolean outprint = false; // for debug - + public ModelDataGenerator(DataModelDesc model, int nRows) { this(model, nRows, ResourceStore.getStore(model.getConfig()), "/data"); } - + public ModelDataGenerator(DataModelDesc model, int nRows, ResourceStore outputStore, String outputPath) { this.model = model; this.targetRows = nRows; this.outputStore = outputStore; this.outputPath = outputPath; } - + public void generate() throws IOException { Set<TableDesc> generated = new HashSet<>(); - + Set<TableDesc> allTableDesc = new LinkedHashSet<>(); + JoinTableDesc[] allTables = model.getJoinTables(); for (int i = allTables.length - 1; i >= -1; i--) { TableDesc table = (i == -1) ? model.getRootFactTable().getTableDesc() : allTables[i].getTableRef().getTableDesc(); + allTableDesc.add(table); + if (generated.contains(table)) continue; - + boolean gen = generateTable(table); - + if (gen) generated.add(table); } - - generateDDL(generated); + + generateDDL(allTableDesc); } private boolean generateTable(TableDesc table) throws IOException { - TableGenConfig config = new TableGenConfig(table); + TableGenConfig config = new TableGenConfig(table, this); if (!config.needGen) return false; - + ByteArrayOutputStream bout = new ByteArrayOutputStream(); PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8")); - + generateTableInternal(table, config, pout); - + pout.close(); bout.close(); - - byte[] content = bout.toByteArray(); - if (outprint) { - System.out.println("Generated " + path(table)); - System.out.println(Bytes.toString(content)); - } - - outputStore.putResource(path(table), new ByteArrayInputStream(content), System.currentTimeMillis()); + + saveResource(bout.toByteArray(), path(table)); return true; } @@ -107,33 +108,115 @@ public class ModelDataGenerator { ColumnDesc[] columns = table.getColumns(); ColumnGenerator[] colGens = new ColumnGenerator[columns.length]; Iterator<String>[] colIters = new Iterator[columns.length]; - + // config.rows is either a multiplier (0,1] or an absolute row number int tableRows = (int) ((config.rows > 1) ? config.rows : targetRows * config.rows); tableRows = Math.max(1, tableRows); - + // same seed for all columns, to ensure composite FK columns generate correct pairs long seed = System.currentTimeMillis(); - + for (int i = 0; i < columns.length; i++) { colGens[i] = new ColumnGenerator(columns[i], tableRows, this); colIters[i] = colGens[i].generate(seed); } - + for (int i = 0; i < tableRows; i++) { for (int c = 0; c < columns.length; c++) { if (c > 0) out.print(","); - + String v = colIters[c].next(); Preconditions.checkState(v == null || !v.contains(",")); - + out.print(v == null ? "\\N" : v); // \N is null for hive } out.print("\n"); } } + private void generateDDL(Set<TableDesc> tables) throws IOException { + + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8")); + + generateDatabaseDDL(tables, pout); + generateCreateTableDDL(tables, pout); + generateLoadDataDDL(tables, pout); + + pout.close(); + bout.close(); + + saveResource(bout.toByteArray(), path(model)); + } + + private void generateDatabaseDDL(Set<TableDesc> tables, PrintWriter out) { + Set<String> dbs = new HashSet<>(); + for (TableDesc t : tables) { + String db = t.getDatabase(); + if (StringUtils.isBlank(db) == false && "DEFAULT".equals(db) == false) + dbs.add(db); + } + + for (String db : dbs) { + out.print("CREATE DATABASE IF NOT EXISTS " + db + ";\n"); + } + out.print("\n"); + } + + private void generateCreateTableDDL(Set<TableDesc> tables, PrintWriter out) { + for (TableDesc t : tables) { + out.print("DROP TABLE IF EXISTS " + t.getIdentity() + ";\n"); + + out.print("CREATE TABLE " + t.getIdentity() + "(" + "\n"); + + for (int i = 0; i < t.getColumns().length; i++) { + ColumnDesc col = t.getColumns()[i]; + out.print(" "); + if (i > 0) { + out.print(","); + } + out.print(col.getName() + " " + hiveType(col.getType()) + "\n"); + } + + out.print(")" + "\n"); + out.print("ROW FORMAT DELIMITED FIELDS TERMINATED BY ','" + "\n"); + out.print("STORED AS TEXTFILE" + ";\n"); + out.print("\n"); + } + } + + private String hiveType(DataType type) { + String t = type.toString(); + if (t.startsWith("varchar")) + return "string"; + else if (t.startsWith("integer")) + return "int"; + else + return t; + } + + private void generateLoadDataDDL(Set<TableDesc> tables, PrintWriter out) { + for (TableDesc t : tables) { + out.print("LOAD DATA LOCAL INPATH '" + t.getIdentity() + ".csv' OVERWRITE INTO TABLE " + t.getIdentity() + ";\n"); + } + } + + public boolean existsInStore(TableDesc table) throws IOException { + return outputStore.exists(path(table)); + } + + public boolean isPK(ColumnDesc col) { + for (JoinTableDesc joinTable : model.getJoinTables()) { + JoinDesc join = joinTable.getJoin(); + for (TblColRef pk : join.getPrimaryKeyColumns()) { + if (pk.getColumnDesc().equals(col)) + return true; + } + } + return false; + } + public List<String> getPkValuesIfIsFk(ColumnDesc fk) throws IOException { JoinTableDesc[] joinTables = model.getJoinTables(); for (int i = 0; i < joinTables.length; i++) { @@ -141,7 +224,7 @@ public class ModelDataGenerator { ColumnDesc pk = findPk(joinTable, fk); if (pk == null) continue; - + List<String> pkValues = getPkValues(pk); if (pkValues != null) return pkValues; @@ -157,15 +240,14 @@ public class ModelDataGenerator { } return null; } - + private List<String> getPkValues(ColumnDesc pk) throws IOException { - String path = path(pk.getTable()); - if (outputStore.exists(path) == false) + if (existsInStore(pk.getTable()) == false) return null; List<String> r = new ArrayList<>(); - - BufferedReader in = new BufferedReader(new InputStreamReader(outputStore.getResource(path).inputStream, "UTF-8")); + + BufferedReader in = new BufferedReader(new InputStreamReader(outputStore.getResource(path(pk.getTable())).inputStream, "UTF-8")); try { String line; while ((line = in.readLine()) != null) { @@ -177,15 +259,22 @@ public class ModelDataGenerator { return r; } - private void generateDDL(Set<TableDesc> generated) { - // TODO Auto-generated method stub - + private void saveResource(byte[] content, String path) throws IOException { + if (outprint) { + System.out.println("Generated " + path); + System.out.println(Bytes.toString(content)); + } + outputStore.putResource(path, new ByteArrayInputStream(content), System.currentTimeMillis()); } private String path(TableDesc table) { return outputPath + "/" + table.getIdentity() + ".csv"; } - + + private String path(DataModelDesc model) { + return outputPath + "/" + "ddl_" + model.getName() + ".sql"; + } + public DataModelDesc getModle() { return model; } http://git-wip-us.apache.org/repos/asf/kylin/blob/c272daae/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java index 1c00d3d..be948c1 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java @@ -18,6 +18,7 @@ package org.apache.kylin.source.datagen; +import java.io.IOException; import java.util.Map; import org.apache.kylin.metadata.model.TableDesc; @@ -27,18 +28,21 @@ public class TableGenConfig { boolean needGen; double rows; - public TableGenConfig(TableDesc table) { - init(table.getDataGen()); - } - - private void init(String dataGen) { + public TableGenConfig(TableDesc table, ModelDataGenerator modelGen) throws IOException { + String dataGen = table.getDataGen(); + if (dataGen == null && modelGen.existsInStore(table) == false) { + dataGen = ""; + } + if (dataGen == null) return; needGen = true; Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "rows"); - rows = Util.parseDouble(config, "rows", 1.0); + + // config.rows is either a multiplier (0,1] or an absolute row number + rows = Util.parseDouble(config, "rows", modelGen.getModle().isFactTable(table.getIdentity()) ? 1.0 : 20); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/c272daae/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java index f2e8dbf..ca27bbf 100644 --- a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java @@ -21,12 +21,14 @@ package org.apache.kylin.source.datagen; import java.util.LinkedHashMap; import java.util.Map; +import org.apache.commons.lang3.StringUtils; + public class Util { static Map<String, String> parseEqualCommaPairs(String equalCommaPairs, String defaultKey) { Map<String, String> r = new LinkedHashMap<>(); - if (equalCommaPairs == null) + if (StringUtils.isBlank(equalCommaPairs)) return r; for (String s : equalCommaPairs.split(",")) { http://git-wip-us.apache.org/repos/asf/kylin/blob/c272daae/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java b/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java index 70aba04..82455ab 100644 --- a/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/source/datagen/DataGenTest.java @@ -41,12 +41,27 @@ public class DataGenTest extends LocalFileMetadataTestCase { } @Test - public void testBasics() throws IOException { - MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()); - DataModelDesc model = mgr.getDataModelDesc("test_kylin_inner_join_model_desc"); + public void testCIConfigured() throws IOException { + DataModelDesc model = getModel("test_kylin_inner_join_model_desc"); + ModelDataGenerator gen = new ModelDataGenerator(model, 100); + gen.outprint = true; + + gen.generate(); + } + + @Test + public void testSSBNoConfig() throws IOException { + DataModelDesc model = getModel("ssb"); ModelDataGenerator gen = new ModelDataGenerator(model, 100); gen.outprint = true; gen.generate(); } + + private DataModelDesc getModel(String name) { + MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()); + DataModelDesc model = mgr.getDataModelDesc(name); + return model; + } + } http://git-wip-us.apache.org/repos/asf/kylin/blob/c272daae/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json b/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json index 37d8e56..74eb045 100644 --- a/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json +++ b/examples/test_case_data/localmeta/table/DEFAULT.TEST_KYLIN_FACT.json @@ -11,7 +11,7 @@ "id" : "2", "name" : "CAL_DT", "datatype" : "date", - "data_gen" : "FK,order,null" + "data_gen" : "FK,order" }, { "id" : "3", "name" : "LSTG_FORMAT_NAME",