KYLIN-2283 replace old with new
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/f119a559 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/f119a559 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/f119a559 Branch: refs/heads/KYLIN-2283 Commit: f119a5590b3f8e0203a543f767fab0fbe674f717 Parents: a74140e Author: Yang Li <liy...@apache.org> Authored: Sat Dec 17 08:03:10 2016 +0800 Committer: Yang Li <liy...@apache.org> Committed: Sun Dec 18 08:24:21 2016 +0800 ---------------------------------------------------------------------- .../java/org/apache/kylin/job/DataGenTest.java | 56 -- .../java/org/apache/kylin/job/DeployUtil.java | 27 +- .../apache/kylin/job/dataGen/ColumnConfig.java | 80 --- .../kylin/job/dataGen/FactTableGenerator.java | 696 ------------------- .../org/apache/kylin/job/dataGen/GenConfig.java | 92 --- .../localmeta/data/data_gen_config.json | 65 -- 6 files changed, 8 insertions(+), 1008 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java b/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java deleted file mode 100644 index af4f9fb..0000000 --- a/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.job; - -import static org.junit.Assert.assertTrue; - -import org.apache.kylin.common.util.LocalFileMetadataTestCase; -import org.apache.kylin.job.dataGen.FactTableGenerator; -import org.apache.kylin.metadata.MetadataManager; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -/** - * - */ -public class DataGenTest extends LocalFileMetadataTestCase { - - @Before - public void before() throws Exception { - this.createTestMetadata(); - MetadataManager.clearCache(); - } - - @After - public void after() throws Exception { - this.cleanupTestMetadata(); - } - - @Test - public void testBasics() throws Exception { - String content = FactTableGenerator.generate("test_kylin_cube_with_slr_ready", "10000", "1", null);// default settings - //System.out.println(content); - assertTrue(content.contains("FP-non GTC")); - assertTrue(content.contains("ABIN")); - - //DeployUtil.overrideFactTableData(content, "default.test_kylin_fact"); - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java b/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java index 23b3670..f65d2a4 100644 --- a/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java +++ b/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java @@ -33,20 +33,21 @@ import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.ResourceTool; +import org.apache.kylin.common.util.HiveCmdBuilder; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeDescManager; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; -import org.apache.kylin.job.dataGen.FactTableGenerator; import org.apache.kylin.job.streaming.StreamDataLoader; import org.apache.kylin.job.streaming.StreamingTableDataGenerator; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.model.ColumnDesc; +import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.source.datagen.ModelDataGenerator; import org.apache.kylin.source.hive.HiveClientFactory; -import org.apache.kylin.common.util.HiveCmdBuilder; import org.apache.kylin.source.hive.IHiveClient; import org.apache.kylin.source.kafka.TimedJsonStreamParser; import org.apache.maven.model.Model; @@ -131,16 +132,15 @@ public class DeployUtil { public static void prepareTestDataForNormalCubes(String cubeName) throws Exception { - String factTableName = TABLE_KYLIN_FACT.toUpperCase(); - String content = null; - boolean buildCubeUsingProvidedData = Boolean.parseBoolean(System.getProperty("buildCubeUsingProvidedData")); if (!buildCubeUsingProvidedData) { System.out.println("build cube with random dataset"); + // data is generated according to cube descriptor and saved in resource store - content = FactTableGenerator.generate(cubeName, "10000", "0.6", null); - assert content != null; - overrideFactTableData(content, factTableName); + MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()); + DataModelDesc model = mgr.getDataModelDesc("test_kylin_inner_join_model_desc"); + ModelDataGenerator gen = new ModelDataGenerator(model, 1000); + gen.generate(); } else { System.out.println("build normal cubes with provided dataset"); } @@ -168,17 +168,6 @@ public class DeployUtil { appendFactTableData(sb.toString(), cubeInstance.getRootFactTable()); } - public static void overrideFactTableData(String factTableContent, String factTableName) throws IOException { - // Write to resource store - ResourceStore store = ResourceStore.getStore(config()); - - InputStream in = new ByteArrayInputStream(factTableContent.getBytes("UTF-8")); - String factTablePath = "/data/" + factTableName + ".csv"; - store.deleteResource(factTablePath); - store.putResource(factTablePath, in, System.currentTimeMillis()); - in.close(); - } - public static void appendFactTableData(String factTableContent, String factTableName) throws IOException { // Write to resource store ResourceStore store = ResourceStore.getStore(config()); http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java deleted file mode 100644 index 5e1c09f..0000000 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.job.dataGen; - -import java.util.ArrayList; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonProperty; - -/** - */ -@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE, isGetterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE) -public class ColumnConfig { - @JsonProperty("columnName") - private String columnName; - @JsonProperty("valueSet") - private ArrayList<String> valueSet; - @JsonProperty("exclusive") - private boolean exclusive; - @JsonProperty("asRange") - private boolean asRange; - @JsonProperty("differentiateByDateBoundary") - private boolean differentiateByDateBoundary; - - public boolean isAsRange() { - return asRange; - } - - public void setAsRange(boolean asRange) { - this.asRange = asRange; - } - - public boolean isExclusive() { - return exclusive; - } - - public void setExclusive(boolean exclusive) { - this.exclusive = exclusive; - } - - public String getColumnName() { - return columnName; - } - - public void setColumnName(String columnName) { - this.columnName = columnName; - } - - public ArrayList<String> getValueSet() { - return valueSet; - } - - public void setValueSet(ArrayList<String> valueSet) { - this.valueSet = valueSet; - } - - public boolean isDifferentiateByDateBoundary() { - return differentiateByDateBoundary; - } - - public void setDifferentiateByDateBoundary(boolean differentiateByDateBoundary) { - this.differentiateByDateBoundary = differentiateByDateBoundary; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java deleted file mode 100644 index 011035b..0000000 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java +++ /dev/null @@ -1,696 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.job.dataGen; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Collections; -import java.util.Comparator; -import java.util.Date; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.TreeMap; -import java.util.TreeSet; - -import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.common.persistence.ResourceStore; -import org.apache.kylin.common.util.Array; -import org.apache.kylin.cube.CubeInstance; -import org.apache.kylin.cube.CubeManager; -import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.cube.model.DimensionDesc; -import org.apache.kylin.metadata.MetadataManager; -import org.apache.kylin.metadata.datatype.DataType; -import org.apache.kylin.metadata.model.ColumnDesc; -import org.apache.kylin.metadata.model.JoinDesc; -import org.apache.kylin.metadata.model.MeasureDesc; -import org.apache.kylin.metadata.model.TblColRef; - -import com.google.common.collect.Lists; - -/** - */ -public class FactTableGenerator { - CubeInstance cube = null; - CubeDesc desc = null; - ResourceStore store = null; - String factTableName = null; - - GenConfig genConf = null; - - Random r = null; - - String cubeName; - long randomSeed; - int rowCount; - int unlinkableRowCount; - int unlinkableRowCountMax; - double conflictRatio; - double linkableRatio; - - long differentiateBoundary = -1; - List<Integer> differentiateColumns = Lists.newArrayList(); - - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); - - // the names of lookup table columns which is in relation with fact - // table(appear as fk in fact table) - TreeMap<String, LinkedList<String>> lookupTableKeys = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - - // possible values of lookupTableKeys, extracted from existing lookup tables. - // The key is in the format of tablename/columnname - TreeMap<String, ArrayList<String>> feasibleValues = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - - // lookup table name -> sets of all composite keys - TreeMap<String, HashSet<Array<String>>> lookupTableCompositeKeyValues = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - - private void init(String cubeName, int rowCount, double conflictRaio, double linkableRatio, long randomSeed) { - this.rowCount = rowCount; - this.conflictRatio = conflictRaio; - this.cubeName = cubeName; - this.randomSeed = randomSeed; - this.linkableRatio = linkableRatio; - - this.unlinkableRowCountMax = (int) (this.rowCount * (1 - linkableRatio)); - this.unlinkableRowCount = 0; - - r = new Random(randomSeed); - - KylinConfig config = KylinConfig.getInstanceFromEnv(); - cube = CubeManager.getInstance(config).getCube(cubeName); - desc = cube.getDescriptor(); - factTableName = cube.getRootFactTable(); - store = ResourceStore.getStore(config); - } - - /* - * users can specify the value preference for each column - */ - private void loadConfig() { - try { - InputStream configStream = store.getResource("/data/data_gen_config.json").inputStream; - this.genConf = GenConfig.loadConfig(configStream); - - if (configStream != null) - configStream.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - private void loadLookupTableValues(String lookupTableName, LinkedList<String> columnNames, int distinctRowCount) throws Exception { - KylinConfig config = KylinConfig.getInstanceFromEnv(); - - // only deal with composite keys - if (columnNames.size() > 1 && !lookupTableCompositeKeyValues.containsKey(lookupTableName)) { - lookupTableCompositeKeyValues.put(lookupTableName, new HashSet<Array<String>>()); - } - - InputStream tableStream = null; - BufferedReader tableReader = null; - try { - TreeMap<String, Integer> zeroBasedInice = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - for (String columnName : columnNames) { - ColumnDesc cDesc = MetadataManager.getInstance(config).getTableDesc(lookupTableName).findColumnByName(columnName); - zeroBasedInice.put(columnName, cDesc.getZeroBasedIndex()); - } - - String path = "/data/" + lookupTableName + ".csv"; - tableStream = store.getResource(path).inputStream; - tableReader = new BufferedReader(new InputStreamReader(tableStream)); - tableReader.mark(0); - int rowCount = 0; - int curRowNum = 0; - String curRow; - - while (tableReader.readLine() != null) - rowCount++; - - HashSet<Integer> rows = new HashSet<Integer>(); - distinctRowCount = (distinctRowCount < rowCount) ? distinctRowCount : rowCount; - while (rows.size() < distinctRowCount) { - rows.add(r.nextInt(rowCount)); - } - - // reopen the stream - tableReader.close(); - tableStream.close(); - tableStream = null; - tableReader = null; - - tableStream = store.getResource(path).inputStream; - tableReader = new BufferedReader(new InputStreamReader(tableStream)); - - while ((curRow = tableReader.readLine()) != null) { - if (rows.contains(curRowNum)) { - String[] tokens = curRow.split(","); - - String[] comboKeys = null; - int index = 0; - if (columnNames.size() > 1) - comboKeys = new String[columnNames.size()]; - - for (String columnName : columnNames) { - int zeroBasedIndex = zeroBasedInice.get(columnName); - if (!feasibleValues.containsKey(lookupTableName + "/" + columnName)) - feasibleValues.put(lookupTableName + "/" + columnName, new ArrayList<String>()); - feasibleValues.get(lookupTableName + "/" + columnName).add(tokens[zeroBasedIndex]); - - if (columnNames.size() > 1) { - comboKeys[index] = tokens[zeroBasedIndex]; - index++; - } - } - - if (columnNames.size() > 1) { - Array<String> wrap = new Array<String>(comboKeys); - if (lookupTableCompositeKeyValues.get(lookupTableName).contains(wrap)) { - throw new Exception("The composite key already exist in the lookup table"); - } - lookupTableCompositeKeyValues.get(lookupTableName).add(wrap); - } - } - curRowNum++; - } - - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } finally { - if (tableStream != null) - tableStream.close(); - if (tableReader != null) - tableReader.close(); - } - } - - // prepare the candidate values for each joined column - private void prepare() throws Exception { - // load config - loadConfig(); - - int index = 0; - for (ColumnDesc cDesc : MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()).getTableDesc(factTableName).getColumns()) { - ColumnConfig cConfig = genConf.getColumnConfigByName(cDesc.getName()); - - if (cConfig != null && cConfig.isDifferentiateByDateBoundary()) { - if (!cDesc.getType().isStringFamily()) { - throw new IllegalStateException("differentiateByDateBoundary only applies to text types, actual:" + cDesc.getType()); - } - if (genConf.getDifferentiateBoundary() == null) { - throw new IllegalStateException("differentiateBoundary not provided"); - } - if (differentiateBoundary == -1) { - differentiateBoundary = format.parse(genConf.getDifferentiateBoundary()).getTime(); - } - differentiateColumns.add(index); - } - index++; - } - - TreeSet<String> factTableColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - - for (DimensionDesc dim : desc.getDimensions()) { - for (TblColRef col : dim.getColumnRefs()) { - if (col.getTable().equals(factTableName)) - factTableColumns.add(col.getName()); - } - - JoinDesc join = dim.getJoin(); - if (join != null) { - String lookupTable = dim.getTableRef().getTableIdentity(); - for (String column : dropAlias(join.getPrimaryKey())) { - if (!lookupTableKeys.containsKey(lookupTable)) { - lookupTableKeys.put(lookupTable, new LinkedList<String>()); - } - - if (!lookupTableKeys.get(lookupTable).contains(column)) - lookupTableKeys.get(lookupTable).add(column); - } - } - } - - int distinctRowCount = (int) (this.rowCount / this.conflictRatio); - distinctRowCount = (distinctRowCount == 0) ? 1 : distinctRowCount; - // lookup tables - for (String lookupTable : lookupTableKeys.keySet()) { - this.loadLookupTableValues(lookupTable, lookupTableKeys.get(lookupTable), distinctRowCount); - } - } - - private List<DimensionDesc> getSortedDimentsionDescs() { - List<DimensionDesc> dimensions = desc.getDimensions(); - Collections.sort(dimensions, new Comparator<DimensionDesc>() { - @Override - public int compare(DimensionDesc o1, DimensionDesc o2) { - JoinDesc j1 = o2.getJoin(); - JoinDesc j2 = o1.getJoin(); - return Integer.valueOf(j1 != null ? j1.getPrimaryKey().length : 0).compareTo(j2 != null ? j2.getPrimaryKey().length : 0); - } - }); - return dimensions; - } - - /** - * Generate the fact table and return it as text - * - * @return - * @throws Exception - */ - private String cookData() throws Exception { - // the columns on the fact table can be classified into three groups: - // 1. foreign keys - TreeMap<String, String> factTableCol2LookupCol = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - // 2. metrics or directly used dimensions - TreeSet<String> usedCols = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - // 3. others, not referenced anywhere - - TreeMap<String, String> lookupCol2factTableCol = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - - // find fact table columns in fks - List<DimensionDesc> dimensions = getSortedDimentsionDescs(); - for (DimensionDesc dim : dimensions) { - JoinDesc jDesc = dim.getJoin(); - if (jDesc != null) { - String[] fks = dropAlias(jDesc.getForeignKey()); - String[] pks = dropAlias(jDesc.getPrimaryKey()); - int num = fks.length; - for (int i = 0; i < num; ++i) { - String value = dim.getTableRef().getTableIdentity() + "/" + pks[i]; - - lookupCol2factTableCol.put(value, fks[i]); - - if (factTableCol2LookupCol.containsKey(fks[i])) { - if (!factTableCol2LookupCol.get(fks[i]).equals(value)) { - System.out.println("Warning: Disambiguation on the mapping of column " + fks[i] + ", " + factTableCol2LookupCol.get(fks[i]) + "(chosen) or " + value); - continue; - } - } - factTableCol2LookupCol.put(fks[i], value); - } - } - //else, deal with it in next roung - } - - // find fact table columns in direct dimension - // DO NOT merge this with the previous loop - for (DimensionDesc dim : dimensions) { - JoinDesc jDesc = dim.getJoin(); - if (jDesc == null) { - // column on fact table used directly as a dimension - String aColumn = dim.getColumn(); - if (!factTableCol2LookupCol.containsKey(aColumn)) - usedCols.add(aColumn); - } - } - - // find fact table columns in measures - for (MeasureDesc mDesc : desc.getMeasures()) { - List<TblColRef> pcols = mDesc.getFunction().getParameter().getColRefs(); - if (pcols != null) { - for (TblColRef col : pcols) { - if (!factTableCol2LookupCol.containsKey(col.getName())) - usedCols.add(col.getName()); - } - } - } - - return createTable(this.rowCount, factTableCol2LookupCol, lookupCol2factTableCol, usedCols); - } - - private String[] dropAlias(String[] aliasDotCol) { - String[] result = new String[aliasDotCol.length]; - for (int i = 0; i < aliasDotCol.length; i++) { - String str = aliasDotCol[i]; - int cut = str.lastIndexOf('.'); - if (cut >= 0) { - str = str.substring(cut + 1); - } - result[i] = str; - } - return result; - } - - private String normToTwoDigits(int v) { - if (v < 10) - return "0" + v; - else - return Integer.toString(v); - } - - private String randomPick(ArrayList<String> candidates) { - int index = r.nextInt(candidates.size()); - return candidates.get(index); - } - - private String createRandomCell(ColumnDesc cDesc, ArrayList<String> range) throws Exception { - DataType type = cDesc.getType(); - if (type.isStringFamily()) { - throw new Exception("Can't handle range values for string"); - - } else if (type.isIntegerFamily()) { - int low = Integer.parseInt(range.get(0)); - int high = Integer.parseInt(range.get(1)); - return Integer.toString(r.nextInt(high - low) + low); - - } else if (type.isDouble()) { - double low = Double.parseDouble(range.get(0)); - double high = Double.parseDouble(range.get(1)); - return String.format("%.4f", r.nextDouble() * (high - low) + low); - - } else if (type.isFloat()) { - float low = Float.parseFloat(range.get(0)); - float high = Float.parseFloat(range.get(1)); - return String.format("%.4f", r.nextFloat() * (high - low) + low); - - } else if (type.isDecimal()) { - double low = Double.parseDouble(range.get(0)); - double high = Double.parseDouble(range.get(1)); - return String.format("%.4f", r.nextDouble() * (high - low) + low); - - } else if (type.isDateTimeFamily()) { - if (!type.isDate()) { - throw new RuntimeException("Does not support " + type); - } - - Date start = format.parse(range.get(0)); - Date end = format.parse(range.get(1)); - long diff = end.getTime() - start.getTime(); - Date temp = new Date(start.getTime() + (long) (diff * r.nextDouble())); - Calendar cal = Calendar.getInstance(); - cal.setTime(temp); - // first day - cal.set(Calendar.DAY_OF_WEEK, cal.getFirstDayOfWeek()); - - return cal.get(Calendar.YEAR) + "-" + normToTwoDigits(cal.get(Calendar.MONTH) + 1) + "-" + normToTwoDigits(cal.get(Calendar.DAY_OF_MONTH)); - } else { - System.out.println("The data type " + type + "is not recognized"); - System.exit(1); - } - return null; - } - - private String createRandomCell(ColumnDesc cDesc) { - DataType type =cDesc.getType(); - String s = type.getName(); - if (s.equals("char") || s.equals("varchar")) { - StringBuilder sb = new StringBuilder(); - int len = Math.min(type.getPrecision(), 3); - for (int i = 0; i < len; i++) { - sb.append((char) ('a' + r.nextInt(10))); // cardinality at most 10x10x10 - } - return sb.toString(); - } else if (s.equals("bigint") || s.equals("int") || s.equals("tinyint") || s.equals("smallint")) { - return Integer.toString(r.nextInt(128)); - } else if (s.equals("double")) { - return String.format("%.4f", r.nextDouble() * 100); - } else if (s.equals("float")) { - return String.format("%.4f", r.nextFloat() * 100); - } else if (s.equals("decimal")) { - return String.format("%.4f", r.nextDouble() * 100); - } else if (s.equals("date")) { - long date20131231 = 61349312153265L; - long date20010101 = 60939158400000L; - long diff = date20131231 - date20010101; - Date temp = new Date(date20010101 + (long) (diff * r.nextDouble())); - Calendar cal = Calendar.getInstance(); - cal.setTime(temp); - // first day - cal.set(Calendar.DAY_OF_WEEK, cal.getFirstDayOfWeek()); - - return cal.get(Calendar.YEAR) + "-" + normToTwoDigits(cal.get(Calendar.MONTH) + 1) + "-" + normToTwoDigits(cal.get(Calendar.DAY_OF_MONTH)); - } else { - System.out.println("The data type " + type + "is not recognized"); - System.exit(1); - } - return null; - } - - private String createDefaultsCell(String type) { - String s = type.toLowerCase(); - if (s.equals("string") || s.equals("char") || s.equals("varchar")) { - return "abcde"; - } else if (s.equals("bigint") || s.equals("int") || s.equals("tinyint") || s.equals("smallint")) { - return "0"; - } else if (s.equals("double")) { - return "0"; - } else if (s.equals("float")) { - return "0"; - } else if (s.equals("decimal")) { - return "0"; - } else if (s.equals("date")) { - return "1970-01-01"; - } else { - System.out.println("The data type " + type + "is not recognized"); - System.exit(1); - } - return null; - } - - private void printColumnMappings(TreeMap<String, String> factTableCol2LookupCol, TreeSet<String> usedCols, TreeSet<String> defaultColumns) { - - System.out.println("======================================================================="); - System.out.format("%-30s %s", "FACT_TABLE_COLUMN", "MAPPING"); - System.out.println(); - System.out.println(); - for (Map.Entry<String, String> entry : factTableCol2LookupCol.entrySet()) { - System.out.format("%-30s %s", entry.getKey(), entry.getValue()); - System.out.println(); - } - for (String key : usedCols) { - System.out.format("%-30s %s", key, "Random Values"); - System.out.println(); - } - for (String key : defaultColumns) { - System.out.format("%-30s %s", key, "Default Values"); - System.out.println(); - } - System.out.println("======================================================================="); - - System.out.println("Parameters:"); - System.out.println(); - System.out.println("CubeName: " + cubeName); - System.out.println("RowCount: " + rowCount); - System.out.println("ConflictRatio: " + conflictRatio); - System.out.println("LinkableRatio: " + linkableRatio); - System.out.println("Seed: " + randomSeed); - System.out.println(); - System.out.println("The number of actual unlinkable fact rows is: " + this.unlinkableRowCount); - System.out.println("You can vary the above parameters to generate different datasets."); - System.out.println(); - } - - // Any row in the column must finally appear in the flatten big table. - // for single-column joins the generated row is guaranteed to have a match - // in lookup table - // for composite keys we'll need an extra check - private boolean matchAllCompositeKeys(TreeMap<String, String> lookupCol2FactTableCol, LinkedList<String> columnValues) { - KylinConfig config = KylinConfig.getInstanceFromEnv(); - - for (String lookupTable : lookupTableKeys.keySet()) { - if (lookupTableKeys.get(lookupTable).size() == 1) - continue; - - String[] comboKey = new String[lookupTableKeys.get(lookupTable).size()]; - int index = 0; - for (String column : lookupTableKeys.get(lookupTable)) { - String key = lookupTable + "/" + column; - String factTableCol = lookupCol2FactTableCol.get(key); - int cardinal = MetadataManager.getInstance(config).getTableDesc(factTableName).findColumnByName(factTableCol).getZeroBasedIndex(); - comboKey[index] = columnValues.get(cardinal); - - index++; - } - Array<String> wrap = new Array<String>(comboKey); - if (!lookupTableCompositeKeyValues.get(lookupTable).contains(wrap)) { - // System.out.println("Try " + wrap + " Failed, continue..."); - return false; - } - } - return true; - } - - private String createCell(ColumnDesc cDesc) throws Exception { - ColumnConfig cConfig = null; - - if ((cConfig = genConf.getColumnConfigByName(cDesc.getName())) == null) { - // if the column is not configured, use random values - return (createRandomCell(cDesc)); - - } else { - // the column has a configuration - if (!cConfig.isAsRange() && !cConfig.isExclusive() && r.nextBoolean()) { - // if the column still allows random values - return (createRandomCell(cDesc)); - - } else { - // use specified values - ArrayList<String> valueSet = cConfig.getValueSet(); - if (valueSet == null || valueSet.size() == 0) - throw new Exception("Did you forget to specify value set for " + cDesc.getName()); - - if (!cConfig.isAsRange()) { - return (randomPick(valueSet)); - } else { - if (valueSet.size() != 2) - throw new Exception("Only two values can be set for range values, the column: " + cDesc.getName()); - - return (createRandomCell(cDesc, valueSet)); - } - } - - } - } - - private LinkedList<String> createRow(TreeMap<String, String> factTableCol2LookupCol, TreeSet<String> usedCols, TreeSet<String> defaultColumns) throws Exception { - LinkedList<String> columnValues = new LinkedList<String>(); - - long currentRowTime = -1; - - for (TblColRef col : cube.getModel().getRootFactTable().getColumns()) { - - String colName = col.getName(); - - if (factTableCol2LookupCol.containsKey(colName)) { - - // if the current column is a fk column in fact table - ArrayList<String> candidates = this.feasibleValues.get(factTableCol2LookupCol.get(colName)); - - columnValues.add(candidates.get(r.nextInt(candidates.size()))); - } else if (usedCols.contains(colName)) { - // if the current column is a metric or dimension column in fact table - columnValues.add(createCell(col.getColumnDesc())); - } else { - - // otherwise this column is not useful in OLAP - columnValues.add(createDefaultsCell(col.getColumnDesc().getTypeName())); - defaultColumns.add(colName); - } - - if (col.equals(cube.getModel().getPartitionDesc().getPartitionDateColumnRef())) { - currentRowTime = format.parse(columnValues.get(columnValues.size() - 1)).getTime(); - } - } - - for (Integer index : differentiateColumns) { - if (r.nextBoolean()) {//only change half of data - if (currentRowTime >= differentiateBoundary) { - columnValues.set(index, columnValues.get(index) + "_B"); - } else { - columnValues.set(index, columnValues.get(index) + "_A"); - } - } - } - - return columnValues; - } - - /** - * return the text of table contents(one line one row) - * - * @param rowCount - * @param factTableCol2LookupCol - * @param lookupCol2FactTableCol - * @param usedCols - * @return - * @throws Exception - */ - private String createTable(int rowCount, TreeMap<String, String> factTableCol2LookupCol, TreeMap<String, String> lookupCol2FactTableCol, TreeSet<String> usedCols) throws Exception { - try { - TreeSet<String> defaultColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < rowCount;) { - - LinkedList<String> columnValues = createRow(factTableCol2LookupCol, usedCols, defaultColumns); - - if (!matchAllCompositeKeys(lookupCol2FactTableCol, columnValues)) { - if (unlinkableRowCount < unlinkableRowCountMax) { - unlinkableRowCount++; - } else { - continue; - } - } - - for (String c : columnValues) - sb.append(c + ","); - sb.deleteCharAt(sb.length() - 1); - sb.append(System.getProperty("line.separator")); - - i++; - - // System.out.println("Just generated the " + i + "th record"); - } - - printColumnMappings(factTableCol2LookupCol, usedCols, defaultColumns); - - return sb.toString(); - - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - - return null; - } - - /** - * Randomly create a fact table and return the table content - * - * @param cubeName name of the cube - * @param rowCount expected row count generated - * @param linkableRatio the percentage of fact table rows that can be linked with all - * lookup table by INNER join - * @param randomSeed random seed - */ - public static String generate(String cubeName, String rowCount, String linkableRatio, String randomSeed) throws Exception { - - if (rowCount == null) - rowCount = "10000"; - if (linkableRatio == null) - linkableRatio = "0.6"; - - //if (randomSeed == null) - // don't give it value - - // String conflictRatio = "5";//this parameter do not allow configuring - // any more - - FactTableGenerator generator = new FactTableGenerator(); - long seed; - if (randomSeed != null) { - seed = Long.parseLong(randomSeed); - } else { - Random r = new Random(); - seed = r.nextLong(); - } - - generator.init(cubeName, Integer.parseInt(rowCount), 5, Double.parseDouble(linkableRatio), seed); - generator.prepare(); - return generator.cookData(); - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java deleted file mode 100644 index 5204d2a..0000000 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.job.dataGen; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.HashMap; - -import org.apache.kylin.common.util.JsonUtil; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.databind.JsonMappingException; - -/** - */ -@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE, isGetterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE) -public class GenConfig { - - @JsonProperty("columnConfigs") - private ArrayList<ColumnConfig> columnConfigs; - - @JsonProperty("differentiateBoundary") - private String differentiateBoundary; //data before and after the provided date will be different, so that different segments will have different segments - - private HashMap<String, ColumnConfig> cache = new HashMap<String, ColumnConfig>(); - - public String getDifferentiateBoundary() { - return differentiateBoundary; - } - - public void setDifferentiateBoundary(String differentiateBoundary) { - this.differentiateBoundary = differentiateBoundary; - } - - public ArrayList<ColumnConfig> getColumnConfigs() { - return columnConfigs; - } - - public void setColumnConfigs(ArrayList<ColumnConfig> columnConfigs) { - this.columnConfigs = columnConfigs; - } - - public ColumnConfig getColumnConfigByName(String columnName) { - columnName = columnName.toLowerCase(); - - if (cache.containsKey(columnName)) - return cache.get(columnName); - - for (ColumnConfig cConfig : columnConfigs) { - if (cConfig.getColumnName().toLowerCase().equals(columnName)) { - cache.put(columnName, cConfig); - return cConfig; - } - } - cache.put(columnName, null); - return null; - } - - public static GenConfig loadConfig(InputStream stream) { - try { - GenConfig config = JsonUtil.readValue(stream, GenConfig.class); - return config; - } catch (JsonMappingException e) { - e.printStackTrace(); - } catch (JsonParseException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - - return null; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/f119a559/examples/test_case_data/localmeta/data/data_gen_config.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/data/data_gen_config.json b/examples/test_case_data/localmeta/data/data_gen_config.json deleted file mode 100644 index 15b3fd0..0000000 --- a/examples/test_case_data/localmeta/data/data_gen_config.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "differentiateBoundary": "2013-07-01", - "columnConfigs": [ - { - "columnName": "lstg_format_name", - "valueSet": [ - "FP-GTC", - "FP-non GTC", - "ABIN", - "Auction", - "Others" - ], - "exclusive": true, - "differentiateByDateBoundary": true - }, - { - "columnName": "BUYER_COUNTRY", - "valueSet": [ - "CN", - "DE", - "FR", - "JP", - "UK", - "US" - ], - "exclusive": true - }, - { - "columnName": "SELLER_COUNTRY", - "valueSet": [ - "CN", - "DE", - "FR", - "JP", - "UK", - "US" - ], - "exclusive": true - }, - { - "columnName": "SELLER_ID", - "valueSet": [ - "10000000", - "10001000" - ], - "asRange": true - }, - { - "columnName": "ITEM_COUNT", - "valueSet": [ - "0", - "2000000" - ], - "asRange": true - }, - { - "columnName": "PRICE", - "valueSet": [ - "0", - "1000" - ], - "asRange": true - } - ] -}