KYLIN-2283 A new data gen tool for CI
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d1175d2c Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d1175d2c Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d1175d2c Branch: refs/heads/master-cdh5.7 Commit: d1175d2c414853f4d223601a2db096f7f963ce72 Parents: e14f4e1 Author: Li Yang <liy...@apache.org> Authored: Thu Dec 15 18:51:00 2016 +0800 Committer: Yang Li <liy...@apache.org> Committed: Sun Dec 18 17:23:18 2016 +0800 ---------------------------------------------------------------------- .../java/org/apache/kylin/job/DataGenTest.java | 56 -- .../java/org/apache/kylin/job/DeployUtil.java | 27 +- .../apache/kylin/job/dataGen/ColumnConfig.java | 80 --- .../kylin/job/dataGen/FactTableGenerator.java | 696 ------------------- .../org/apache/kylin/job/dataGen/GenConfig.java | 92 --- .../apache/kylin/metadata/model/ColumnDesc.java | 8 + .../apache/kylin/metadata/model/TableDesc.java | 9 + .../kylin/source/datagen/ColumnGenConfig.java | 110 +++ .../kylin/source/datagen/ColumnGenerator.java | 361 ++++++++++ .../source/datagen/ModelDataGenerator.java | 282 ++++++++ .../kylin/source/datagen/TableGenConfig.java | 48 ++ .../org/apache/kylin/source/datagen/Util.java | 75 ++ .../topn/TopNCounterSerializerTest.java | 76 -- .../hllc/NewHyperLogLogBenchmarkTest.java | 2 + .../measure/topn/TopNCounterSerializerTest.java | 76 ++ .../kylin/source/datagen/DataGenTest.java | 67 ++ .../localmeta/data/data_gen_config.json | 65 -- .../localmeta/data/flatten_data_for_ii.csv | 402 ----------- .../table/DEFAULT.TEST_KYLIN_FACT.json | 32 +- .../org/apache/kylin/jdbc/ITJDBCDriverTest.java | 4 +- .../resources/query/sql_timeout/query02.sql | 2 +- 21 files changed, 1070 insertions(+), 1500 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java b/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java deleted file mode 100644 index af4f9fb..0000000 --- a/assembly/src/test/java/org/apache/kylin/job/DataGenTest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.job; - -import static org.junit.Assert.assertTrue; - -import org.apache.kylin.common.util.LocalFileMetadataTestCase; -import org.apache.kylin.job.dataGen.FactTableGenerator; -import org.apache.kylin.metadata.MetadataManager; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -/** - * - */ -public class DataGenTest extends LocalFileMetadataTestCase { - - @Before - public void before() throws Exception { - this.createTestMetadata(); - MetadataManager.clearCache(); - } - - @After - public void after() throws Exception { - this.cleanupTestMetadata(); - } - - @Test - public void testBasics() throws Exception { - String content = FactTableGenerator.generate("test_kylin_cube_with_slr_ready", "10000", "1", null);// default settings - //System.out.println(content); - assertTrue(content.contains("FP-non GTC")); - assertTrue(content.contains("ABIN")); - - //DeployUtil.overrideFactTableData(content, "default.test_kylin_fact"); - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java b/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java index 23b3670..8fc583d 100644 --- a/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java +++ b/assembly/src/test/java/org/apache/kylin/job/DeployUtil.java @@ -33,20 +33,21 @@ import org.apache.commons.lang.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.ResourceTool; +import org.apache.kylin.common.util.HiveCmdBuilder; import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.cube.CubeDescManager; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; -import org.apache.kylin.job.dataGen.FactTableGenerator; import org.apache.kylin.job.streaming.StreamDataLoader; import org.apache.kylin.job.streaming.StreamingTableDataGenerator; import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.model.ColumnDesc; +import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.source.datagen.ModelDataGenerator; import org.apache.kylin.source.hive.HiveClientFactory; -import org.apache.kylin.common.util.HiveCmdBuilder; import org.apache.kylin.source.hive.IHiveClient; import org.apache.kylin.source.kafka.TimedJsonStreamParser; import org.apache.maven.model.Model; @@ -131,16 +132,15 @@ public class DeployUtil { public static void prepareTestDataForNormalCubes(String cubeName) throws Exception { - String factTableName = TABLE_KYLIN_FACT.toUpperCase(); - String content = null; - boolean buildCubeUsingProvidedData = Boolean.parseBoolean(System.getProperty("buildCubeUsingProvidedData")); if (!buildCubeUsingProvidedData) { System.out.println("build cube with random dataset"); + // data is generated according to cube descriptor and saved in resource store - content = FactTableGenerator.generate(cubeName, "10000", "0.6", null); - assert content != null; - overrideFactTableData(content, factTableName); + MetadataManager mgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()); + DataModelDesc model = mgr.getDataModelDesc("test_kylin_inner_join_model_desc"); + ModelDataGenerator gen = new ModelDataGenerator(model, 10000); + gen.generate(); } else { System.out.println("build normal cubes with provided dataset"); } @@ -168,17 +168,6 @@ public class DeployUtil { appendFactTableData(sb.toString(), cubeInstance.getRootFactTable()); } - public static void overrideFactTableData(String factTableContent, String factTableName) throws IOException { - // Write to resource store - ResourceStore store = ResourceStore.getStore(config()); - - InputStream in = new ByteArrayInputStream(factTableContent.getBytes("UTF-8")); - String factTablePath = "/data/" + factTableName + ".csv"; - store.deleteResource(factTablePath); - store.putResource(factTablePath, in, System.currentTimeMillis()); - in.close(); - } - public static void appendFactTableData(String factTableContent, String factTableName) throws IOException { // Write to resource store ResourceStore store = ResourceStore.getStore(config()); http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java deleted file mode 100644 index 5e1c09f..0000000 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/ColumnConfig.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.job.dataGen; - -import java.util.ArrayList; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonProperty; - -/** - */ -@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE, isGetterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE) -public class ColumnConfig { - @JsonProperty("columnName") - private String columnName; - @JsonProperty("valueSet") - private ArrayList<String> valueSet; - @JsonProperty("exclusive") - private boolean exclusive; - @JsonProperty("asRange") - private boolean asRange; - @JsonProperty("differentiateByDateBoundary") - private boolean differentiateByDateBoundary; - - public boolean isAsRange() { - return asRange; - } - - public void setAsRange(boolean asRange) { - this.asRange = asRange; - } - - public boolean isExclusive() { - return exclusive; - } - - public void setExclusive(boolean exclusive) { - this.exclusive = exclusive; - } - - public String getColumnName() { - return columnName; - } - - public void setColumnName(String columnName) { - this.columnName = columnName; - } - - public ArrayList<String> getValueSet() { - return valueSet; - } - - public void setValueSet(ArrayList<String> valueSet) { - this.valueSet = valueSet; - } - - public boolean isDifferentiateByDateBoundary() { - return differentiateByDateBoundary; - } - - public void setDifferentiateByDateBoundary(boolean differentiateByDateBoundary) { - this.differentiateByDateBoundary = differentiateByDateBoundary; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java deleted file mode 100644 index 011035b..0000000 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/FactTableGenerator.java +++ /dev/null @@ -1,696 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.job.dataGen; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Collections; -import java.util.Comparator; -import java.util.Date; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.TreeMap; -import java.util.TreeSet; - -import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.common.persistence.ResourceStore; -import org.apache.kylin.common.util.Array; -import org.apache.kylin.cube.CubeInstance; -import org.apache.kylin.cube.CubeManager; -import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.cube.model.DimensionDesc; -import org.apache.kylin.metadata.MetadataManager; -import org.apache.kylin.metadata.datatype.DataType; -import org.apache.kylin.metadata.model.ColumnDesc; -import org.apache.kylin.metadata.model.JoinDesc; -import org.apache.kylin.metadata.model.MeasureDesc; -import org.apache.kylin.metadata.model.TblColRef; - -import com.google.common.collect.Lists; - -/** - */ -public class FactTableGenerator { - CubeInstance cube = null; - CubeDesc desc = null; - ResourceStore store = null; - String factTableName = null; - - GenConfig genConf = null; - - Random r = null; - - String cubeName; - long randomSeed; - int rowCount; - int unlinkableRowCount; - int unlinkableRowCountMax; - double conflictRatio; - double linkableRatio; - - long differentiateBoundary = -1; - List<Integer> differentiateColumns = Lists.newArrayList(); - - SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); - - // the names of lookup table columns which is in relation with fact - // table(appear as fk in fact table) - TreeMap<String, LinkedList<String>> lookupTableKeys = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - - // possible values of lookupTableKeys, extracted from existing lookup tables. - // The key is in the format of tablename/columnname - TreeMap<String, ArrayList<String>> feasibleValues = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - - // lookup table name -> sets of all composite keys - TreeMap<String, HashSet<Array<String>>> lookupTableCompositeKeyValues = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - - private void init(String cubeName, int rowCount, double conflictRaio, double linkableRatio, long randomSeed) { - this.rowCount = rowCount; - this.conflictRatio = conflictRaio; - this.cubeName = cubeName; - this.randomSeed = randomSeed; - this.linkableRatio = linkableRatio; - - this.unlinkableRowCountMax = (int) (this.rowCount * (1 - linkableRatio)); - this.unlinkableRowCount = 0; - - r = new Random(randomSeed); - - KylinConfig config = KylinConfig.getInstanceFromEnv(); - cube = CubeManager.getInstance(config).getCube(cubeName); - desc = cube.getDescriptor(); - factTableName = cube.getRootFactTable(); - store = ResourceStore.getStore(config); - } - - /* - * users can specify the value preference for each column - */ - private void loadConfig() { - try { - InputStream configStream = store.getResource("/data/data_gen_config.json").inputStream; - this.genConf = GenConfig.loadConfig(configStream); - - if (configStream != null) - configStream.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - private void loadLookupTableValues(String lookupTableName, LinkedList<String> columnNames, int distinctRowCount) throws Exception { - KylinConfig config = KylinConfig.getInstanceFromEnv(); - - // only deal with composite keys - if (columnNames.size() > 1 && !lookupTableCompositeKeyValues.containsKey(lookupTableName)) { - lookupTableCompositeKeyValues.put(lookupTableName, new HashSet<Array<String>>()); - } - - InputStream tableStream = null; - BufferedReader tableReader = null; - try { - TreeMap<String, Integer> zeroBasedInice = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - for (String columnName : columnNames) { - ColumnDesc cDesc = MetadataManager.getInstance(config).getTableDesc(lookupTableName).findColumnByName(columnName); - zeroBasedInice.put(columnName, cDesc.getZeroBasedIndex()); - } - - String path = "/data/" + lookupTableName + ".csv"; - tableStream = store.getResource(path).inputStream; - tableReader = new BufferedReader(new InputStreamReader(tableStream)); - tableReader.mark(0); - int rowCount = 0; - int curRowNum = 0; - String curRow; - - while (tableReader.readLine() != null) - rowCount++; - - HashSet<Integer> rows = new HashSet<Integer>(); - distinctRowCount = (distinctRowCount < rowCount) ? distinctRowCount : rowCount; - while (rows.size() < distinctRowCount) { - rows.add(r.nextInt(rowCount)); - } - - // reopen the stream - tableReader.close(); - tableStream.close(); - tableStream = null; - tableReader = null; - - tableStream = store.getResource(path).inputStream; - tableReader = new BufferedReader(new InputStreamReader(tableStream)); - - while ((curRow = tableReader.readLine()) != null) { - if (rows.contains(curRowNum)) { - String[] tokens = curRow.split(","); - - String[] comboKeys = null; - int index = 0; - if (columnNames.size() > 1) - comboKeys = new String[columnNames.size()]; - - for (String columnName : columnNames) { - int zeroBasedIndex = zeroBasedInice.get(columnName); - if (!feasibleValues.containsKey(lookupTableName + "/" + columnName)) - feasibleValues.put(lookupTableName + "/" + columnName, new ArrayList<String>()); - feasibleValues.get(lookupTableName + "/" + columnName).add(tokens[zeroBasedIndex]); - - if (columnNames.size() > 1) { - comboKeys[index] = tokens[zeroBasedIndex]; - index++; - } - } - - if (columnNames.size() > 1) { - Array<String> wrap = new Array<String>(comboKeys); - if (lookupTableCompositeKeyValues.get(lookupTableName).contains(wrap)) { - throw new Exception("The composite key already exist in the lookup table"); - } - lookupTableCompositeKeyValues.get(lookupTableName).add(wrap); - } - } - curRowNum++; - } - - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } finally { - if (tableStream != null) - tableStream.close(); - if (tableReader != null) - tableReader.close(); - } - } - - // prepare the candidate values for each joined column - private void prepare() throws Exception { - // load config - loadConfig(); - - int index = 0; - for (ColumnDesc cDesc : MetadataManager.getInstance(KylinConfig.getInstanceFromEnv()).getTableDesc(factTableName).getColumns()) { - ColumnConfig cConfig = genConf.getColumnConfigByName(cDesc.getName()); - - if (cConfig != null && cConfig.isDifferentiateByDateBoundary()) { - if (!cDesc.getType().isStringFamily()) { - throw new IllegalStateException("differentiateByDateBoundary only applies to text types, actual:" + cDesc.getType()); - } - if (genConf.getDifferentiateBoundary() == null) { - throw new IllegalStateException("differentiateBoundary not provided"); - } - if (differentiateBoundary == -1) { - differentiateBoundary = format.parse(genConf.getDifferentiateBoundary()).getTime(); - } - differentiateColumns.add(index); - } - index++; - } - - TreeSet<String> factTableColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - - for (DimensionDesc dim : desc.getDimensions()) { - for (TblColRef col : dim.getColumnRefs()) { - if (col.getTable().equals(factTableName)) - factTableColumns.add(col.getName()); - } - - JoinDesc join = dim.getJoin(); - if (join != null) { - String lookupTable = dim.getTableRef().getTableIdentity(); - for (String column : dropAlias(join.getPrimaryKey())) { - if (!lookupTableKeys.containsKey(lookupTable)) { - lookupTableKeys.put(lookupTable, new LinkedList<String>()); - } - - if (!lookupTableKeys.get(lookupTable).contains(column)) - lookupTableKeys.get(lookupTable).add(column); - } - } - } - - int distinctRowCount = (int) (this.rowCount / this.conflictRatio); - distinctRowCount = (distinctRowCount == 0) ? 1 : distinctRowCount; - // lookup tables - for (String lookupTable : lookupTableKeys.keySet()) { - this.loadLookupTableValues(lookupTable, lookupTableKeys.get(lookupTable), distinctRowCount); - } - } - - private List<DimensionDesc> getSortedDimentsionDescs() { - List<DimensionDesc> dimensions = desc.getDimensions(); - Collections.sort(dimensions, new Comparator<DimensionDesc>() { - @Override - public int compare(DimensionDesc o1, DimensionDesc o2) { - JoinDesc j1 = o2.getJoin(); - JoinDesc j2 = o1.getJoin(); - return Integer.valueOf(j1 != null ? j1.getPrimaryKey().length : 0).compareTo(j2 != null ? j2.getPrimaryKey().length : 0); - } - }); - return dimensions; - } - - /** - * Generate the fact table and return it as text - * - * @return - * @throws Exception - */ - private String cookData() throws Exception { - // the columns on the fact table can be classified into three groups: - // 1. foreign keys - TreeMap<String, String> factTableCol2LookupCol = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - // 2. metrics or directly used dimensions - TreeSet<String> usedCols = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - // 3. others, not referenced anywhere - - TreeMap<String, String> lookupCol2factTableCol = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); - - // find fact table columns in fks - List<DimensionDesc> dimensions = getSortedDimentsionDescs(); - for (DimensionDesc dim : dimensions) { - JoinDesc jDesc = dim.getJoin(); - if (jDesc != null) { - String[] fks = dropAlias(jDesc.getForeignKey()); - String[] pks = dropAlias(jDesc.getPrimaryKey()); - int num = fks.length; - for (int i = 0; i < num; ++i) { - String value = dim.getTableRef().getTableIdentity() + "/" + pks[i]; - - lookupCol2factTableCol.put(value, fks[i]); - - if (factTableCol2LookupCol.containsKey(fks[i])) { - if (!factTableCol2LookupCol.get(fks[i]).equals(value)) { - System.out.println("Warning: Disambiguation on the mapping of column " + fks[i] + ", " + factTableCol2LookupCol.get(fks[i]) + "(chosen) or " + value); - continue; - } - } - factTableCol2LookupCol.put(fks[i], value); - } - } - //else, deal with it in next roung - } - - // find fact table columns in direct dimension - // DO NOT merge this with the previous loop - for (DimensionDesc dim : dimensions) { - JoinDesc jDesc = dim.getJoin(); - if (jDesc == null) { - // column on fact table used directly as a dimension - String aColumn = dim.getColumn(); - if (!factTableCol2LookupCol.containsKey(aColumn)) - usedCols.add(aColumn); - } - } - - // find fact table columns in measures - for (MeasureDesc mDesc : desc.getMeasures()) { - List<TblColRef> pcols = mDesc.getFunction().getParameter().getColRefs(); - if (pcols != null) { - for (TblColRef col : pcols) { - if (!factTableCol2LookupCol.containsKey(col.getName())) - usedCols.add(col.getName()); - } - } - } - - return createTable(this.rowCount, factTableCol2LookupCol, lookupCol2factTableCol, usedCols); - } - - private String[] dropAlias(String[] aliasDotCol) { - String[] result = new String[aliasDotCol.length]; - for (int i = 0; i < aliasDotCol.length; i++) { - String str = aliasDotCol[i]; - int cut = str.lastIndexOf('.'); - if (cut >= 0) { - str = str.substring(cut + 1); - } - result[i] = str; - } - return result; - } - - private String normToTwoDigits(int v) { - if (v < 10) - return "0" + v; - else - return Integer.toString(v); - } - - private String randomPick(ArrayList<String> candidates) { - int index = r.nextInt(candidates.size()); - return candidates.get(index); - } - - private String createRandomCell(ColumnDesc cDesc, ArrayList<String> range) throws Exception { - DataType type = cDesc.getType(); - if (type.isStringFamily()) { - throw new Exception("Can't handle range values for string"); - - } else if (type.isIntegerFamily()) { - int low = Integer.parseInt(range.get(0)); - int high = Integer.parseInt(range.get(1)); - return Integer.toString(r.nextInt(high - low) + low); - - } else if (type.isDouble()) { - double low = Double.parseDouble(range.get(0)); - double high = Double.parseDouble(range.get(1)); - return String.format("%.4f", r.nextDouble() * (high - low) + low); - - } else if (type.isFloat()) { - float low = Float.parseFloat(range.get(0)); - float high = Float.parseFloat(range.get(1)); - return String.format("%.4f", r.nextFloat() * (high - low) + low); - - } else if (type.isDecimal()) { - double low = Double.parseDouble(range.get(0)); - double high = Double.parseDouble(range.get(1)); - return String.format("%.4f", r.nextDouble() * (high - low) + low); - - } else if (type.isDateTimeFamily()) { - if (!type.isDate()) { - throw new RuntimeException("Does not support " + type); - } - - Date start = format.parse(range.get(0)); - Date end = format.parse(range.get(1)); - long diff = end.getTime() - start.getTime(); - Date temp = new Date(start.getTime() + (long) (diff * r.nextDouble())); - Calendar cal = Calendar.getInstance(); - cal.setTime(temp); - // first day - cal.set(Calendar.DAY_OF_WEEK, cal.getFirstDayOfWeek()); - - return cal.get(Calendar.YEAR) + "-" + normToTwoDigits(cal.get(Calendar.MONTH) + 1) + "-" + normToTwoDigits(cal.get(Calendar.DAY_OF_MONTH)); - } else { - System.out.println("The data type " + type + "is not recognized"); - System.exit(1); - } - return null; - } - - private String createRandomCell(ColumnDesc cDesc) { - DataType type =cDesc.getType(); - String s = type.getName(); - if (s.equals("char") || s.equals("varchar")) { - StringBuilder sb = new StringBuilder(); - int len = Math.min(type.getPrecision(), 3); - for (int i = 0; i < len; i++) { - sb.append((char) ('a' + r.nextInt(10))); // cardinality at most 10x10x10 - } - return sb.toString(); - } else if (s.equals("bigint") || s.equals("int") || s.equals("tinyint") || s.equals("smallint")) { - return Integer.toString(r.nextInt(128)); - } else if (s.equals("double")) { - return String.format("%.4f", r.nextDouble() * 100); - } else if (s.equals("float")) { - return String.format("%.4f", r.nextFloat() * 100); - } else if (s.equals("decimal")) { - return String.format("%.4f", r.nextDouble() * 100); - } else if (s.equals("date")) { - long date20131231 = 61349312153265L; - long date20010101 = 60939158400000L; - long diff = date20131231 - date20010101; - Date temp = new Date(date20010101 + (long) (diff * r.nextDouble())); - Calendar cal = Calendar.getInstance(); - cal.setTime(temp); - // first day - cal.set(Calendar.DAY_OF_WEEK, cal.getFirstDayOfWeek()); - - return cal.get(Calendar.YEAR) + "-" + normToTwoDigits(cal.get(Calendar.MONTH) + 1) + "-" + normToTwoDigits(cal.get(Calendar.DAY_OF_MONTH)); - } else { - System.out.println("The data type " + type + "is not recognized"); - System.exit(1); - } - return null; - } - - private String createDefaultsCell(String type) { - String s = type.toLowerCase(); - if (s.equals("string") || s.equals("char") || s.equals("varchar")) { - return "abcde"; - } else if (s.equals("bigint") || s.equals("int") || s.equals("tinyint") || s.equals("smallint")) { - return "0"; - } else if (s.equals("double")) { - return "0"; - } else if (s.equals("float")) { - return "0"; - } else if (s.equals("decimal")) { - return "0"; - } else if (s.equals("date")) { - return "1970-01-01"; - } else { - System.out.println("The data type " + type + "is not recognized"); - System.exit(1); - } - return null; - } - - private void printColumnMappings(TreeMap<String, String> factTableCol2LookupCol, TreeSet<String> usedCols, TreeSet<String> defaultColumns) { - - System.out.println("======================================================================="); - System.out.format("%-30s %s", "FACT_TABLE_COLUMN", "MAPPING"); - System.out.println(); - System.out.println(); - for (Map.Entry<String, String> entry : factTableCol2LookupCol.entrySet()) { - System.out.format("%-30s %s", entry.getKey(), entry.getValue()); - System.out.println(); - } - for (String key : usedCols) { - System.out.format("%-30s %s", key, "Random Values"); - System.out.println(); - } - for (String key : defaultColumns) { - System.out.format("%-30s %s", key, "Default Values"); - System.out.println(); - } - System.out.println("======================================================================="); - - System.out.println("Parameters:"); - System.out.println(); - System.out.println("CubeName: " + cubeName); - System.out.println("RowCount: " + rowCount); - System.out.println("ConflictRatio: " + conflictRatio); - System.out.println("LinkableRatio: " + linkableRatio); - System.out.println("Seed: " + randomSeed); - System.out.println(); - System.out.println("The number of actual unlinkable fact rows is: " + this.unlinkableRowCount); - System.out.println("You can vary the above parameters to generate different datasets."); - System.out.println(); - } - - // Any row in the column must finally appear in the flatten big table. - // for single-column joins the generated row is guaranteed to have a match - // in lookup table - // for composite keys we'll need an extra check - private boolean matchAllCompositeKeys(TreeMap<String, String> lookupCol2FactTableCol, LinkedList<String> columnValues) { - KylinConfig config = KylinConfig.getInstanceFromEnv(); - - for (String lookupTable : lookupTableKeys.keySet()) { - if (lookupTableKeys.get(lookupTable).size() == 1) - continue; - - String[] comboKey = new String[lookupTableKeys.get(lookupTable).size()]; - int index = 0; - for (String column : lookupTableKeys.get(lookupTable)) { - String key = lookupTable + "/" + column; - String factTableCol = lookupCol2FactTableCol.get(key); - int cardinal = MetadataManager.getInstance(config).getTableDesc(factTableName).findColumnByName(factTableCol).getZeroBasedIndex(); - comboKey[index] = columnValues.get(cardinal); - - index++; - } - Array<String> wrap = new Array<String>(comboKey); - if (!lookupTableCompositeKeyValues.get(lookupTable).contains(wrap)) { - // System.out.println("Try " + wrap + " Failed, continue..."); - return false; - } - } - return true; - } - - private String createCell(ColumnDesc cDesc) throws Exception { - ColumnConfig cConfig = null; - - if ((cConfig = genConf.getColumnConfigByName(cDesc.getName())) == null) { - // if the column is not configured, use random values - return (createRandomCell(cDesc)); - - } else { - // the column has a configuration - if (!cConfig.isAsRange() && !cConfig.isExclusive() && r.nextBoolean()) { - // if the column still allows random values - return (createRandomCell(cDesc)); - - } else { - // use specified values - ArrayList<String> valueSet = cConfig.getValueSet(); - if (valueSet == null || valueSet.size() == 0) - throw new Exception("Did you forget to specify value set for " + cDesc.getName()); - - if (!cConfig.isAsRange()) { - return (randomPick(valueSet)); - } else { - if (valueSet.size() != 2) - throw new Exception("Only two values can be set for range values, the column: " + cDesc.getName()); - - return (createRandomCell(cDesc, valueSet)); - } - } - - } - } - - private LinkedList<String> createRow(TreeMap<String, String> factTableCol2LookupCol, TreeSet<String> usedCols, TreeSet<String> defaultColumns) throws Exception { - LinkedList<String> columnValues = new LinkedList<String>(); - - long currentRowTime = -1; - - for (TblColRef col : cube.getModel().getRootFactTable().getColumns()) { - - String colName = col.getName(); - - if (factTableCol2LookupCol.containsKey(colName)) { - - // if the current column is a fk column in fact table - ArrayList<String> candidates = this.feasibleValues.get(factTableCol2LookupCol.get(colName)); - - columnValues.add(candidates.get(r.nextInt(candidates.size()))); - } else if (usedCols.contains(colName)) { - // if the current column is a metric or dimension column in fact table - columnValues.add(createCell(col.getColumnDesc())); - } else { - - // otherwise this column is not useful in OLAP - columnValues.add(createDefaultsCell(col.getColumnDesc().getTypeName())); - defaultColumns.add(colName); - } - - if (col.equals(cube.getModel().getPartitionDesc().getPartitionDateColumnRef())) { - currentRowTime = format.parse(columnValues.get(columnValues.size() - 1)).getTime(); - } - } - - for (Integer index : differentiateColumns) { - if (r.nextBoolean()) {//only change half of data - if (currentRowTime >= differentiateBoundary) { - columnValues.set(index, columnValues.get(index) + "_B"); - } else { - columnValues.set(index, columnValues.get(index) + "_A"); - } - } - } - - return columnValues; - } - - /** - * return the text of table contents(one line one row) - * - * @param rowCount - * @param factTableCol2LookupCol - * @param lookupCol2FactTableCol - * @param usedCols - * @return - * @throws Exception - */ - private String createTable(int rowCount, TreeMap<String, String> factTableCol2LookupCol, TreeMap<String, String> lookupCol2FactTableCol, TreeSet<String> usedCols) throws Exception { - try { - TreeSet<String> defaultColumns = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < rowCount;) { - - LinkedList<String> columnValues = createRow(factTableCol2LookupCol, usedCols, defaultColumns); - - if (!matchAllCompositeKeys(lookupCol2FactTableCol, columnValues)) { - if (unlinkableRowCount < unlinkableRowCountMax) { - unlinkableRowCount++; - } else { - continue; - } - } - - for (String c : columnValues) - sb.append(c + ","); - sb.deleteCharAt(sb.length() - 1); - sb.append(System.getProperty("line.separator")); - - i++; - - // System.out.println("Just generated the " + i + "th record"); - } - - printColumnMappings(factTableCol2LookupCol, usedCols, defaultColumns); - - return sb.toString(); - - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - - return null; - } - - /** - * Randomly create a fact table and return the table content - * - * @param cubeName name of the cube - * @param rowCount expected row count generated - * @param linkableRatio the percentage of fact table rows that can be linked with all - * lookup table by INNER join - * @param randomSeed random seed - */ - public static String generate(String cubeName, String rowCount, String linkableRatio, String randomSeed) throws Exception { - - if (rowCount == null) - rowCount = "10000"; - if (linkableRatio == null) - linkableRatio = "0.6"; - - //if (randomSeed == null) - // don't give it value - - // String conflictRatio = "5";//this parameter do not allow configuring - // any more - - FactTableGenerator generator = new FactTableGenerator(); - long seed; - if (randomSeed != null) { - seed = Long.parseLong(randomSeed); - } else { - Random r = new Random(); - seed = r.nextLong(); - } - - generator.init(cubeName, Integer.parseInt(rowCount), 5, Double.parseDouble(linkableRatio), seed); - generator.prepare(); - return generator.cookData(); - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java ---------------------------------------------------------------------- diff --git a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java b/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java deleted file mode 100644 index 5204d2a..0000000 --- a/assembly/src/test/java/org/apache/kylin/job/dataGen/GenConfig.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.job.dataGen; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.HashMap; - -import org.apache.kylin.common.util.JsonUtil; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.databind.JsonMappingException; - -/** - */ -@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE, isGetterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE) -public class GenConfig { - - @JsonProperty("columnConfigs") - private ArrayList<ColumnConfig> columnConfigs; - - @JsonProperty("differentiateBoundary") - private String differentiateBoundary; //data before and after the provided date will be different, so that different segments will have different segments - - private HashMap<String, ColumnConfig> cache = new HashMap<String, ColumnConfig>(); - - public String getDifferentiateBoundary() { - return differentiateBoundary; - } - - public void setDifferentiateBoundary(String differentiateBoundary) { - this.differentiateBoundary = differentiateBoundary; - } - - public ArrayList<ColumnConfig> getColumnConfigs() { - return columnConfigs; - } - - public void setColumnConfigs(ArrayList<ColumnConfig> columnConfigs) { - this.columnConfigs = columnConfigs; - } - - public ColumnConfig getColumnConfigByName(String columnName) { - columnName = columnName.toLowerCase(); - - if (cache.containsKey(columnName)) - return cache.get(columnName); - - for (ColumnConfig cConfig : columnConfigs) { - if (cConfig.getColumnName().toLowerCase().equals(columnName)) { - cache.put(columnName, cConfig); - return cConfig; - } - } - cache.put(columnName, null); - return null; - } - - public static GenConfig loadConfig(InputStream stream) { - try { - GenConfig config = JsonUtil.readValue(stream, GenConfig.class); - return config; - } catch (JsonMappingException e) { - e.printStackTrace(); - } catch (JsonParseException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - - return null; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java index 2da1f5e..7105ede 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ColumnDesc.java @@ -45,6 +45,10 @@ public class ColumnDesc implements Serializable { @JsonInclude(JsonInclude.Include.NON_NULL) private String comment; + @JsonProperty("data_gen") + @JsonInclude(JsonInclude.Include.NON_NULL) + private String dataGen; + // parsed from data type private DataType type; private DataType upgradedType; @@ -148,6 +152,10 @@ public class ColumnDesc implements Serializable { public void setNullable(boolean nullable) { this.isNullable = nullable; } + + public String getDataGen() { + return dataGen; + } public void init(TableDesc table) { this.table = table; http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java index ab8c465..e845da1 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java @@ -26,6 +26,7 @@ import org.apache.kylin.common.persistence.RootPersistentEntity; import org.apache.kylin.common.util.StringSplitter; import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.annotation.JsonProperty; @@ -47,6 +48,10 @@ public class TableDesc extends RootPersistentEntity implements ISourceAware { private int sourceType = ISourceAware.ID_HIVE; @JsonProperty("table_type") private String tableType; + + @JsonProperty("data_gen") + @JsonInclude(JsonInclude.Include.NON_NULL) + private String dataGen; private DatabaseDesc database = new DatabaseDesc(); @@ -160,6 +165,10 @@ public class TableDesc extends RootPersistentEntity implements ISourceAware { return getMaxColumnIndex() + 1; } + public String getDataGen() { + return dataGen; + } + public void init() { if (name != null) name = name.toUpperCase(); http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java new file mode 100644 index 0000000..6387873 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenConfig.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import org.apache.kylin.metadata.model.ColumnDesc; + +public class ColumnGenConfig { + + public static final String FK = "FK"; + public static final String ID = "ID"; + public static final String RAND = "RAND"; + public static final String $RANDOM = "${RANDOM}"; + + // discrete values + boolean isDiscrete; + boolean isFK; + List<String> values; + + // random + boolean isRandom; + String randFormat; + int randStart; + int randEnd; + + // ID + boolean isID; + int idStart; + + // general + int cardinality; + boolean genNull; + double genNullPct; + String genNullStr; + boolean order; + boolean unique; + + public ColumnGenConfig(ColumnDesc col, ModelDataGenerator modelGen) throws IOException { + init(col, modelGen); + } + + private void init(ColumnDesc col, ModelDataGenerator modelGen) throws IOException { + + Map<String, String> config = Util.parseEqualCommaPairs(col.getDataGen(), "values"); + + values = Arrays.asList(Util.parseString(config, "values", "").split("[|]")); + + List<String> pkValues = modelGen.getPkValuesIfIsFk(col); + + if (FK.equals(values.get(0)) || (values.get(0).isEmpty() && pkValues != null)) { + isFK = true; + values = pkValues; + } else if (ID.equals(values.get(0))) { + isID = true; + idStart = (values.size() > 1) ? Integer.parseInt(values.get(1)) : 0; + } else if (RAND.equals(values.get(0)) || values.get(0).isEmpty()) { + isRandom = true; + randFormat = (values.size() > 1) ? values.get(1) : ""; + randStart = (values.size() > 2) ? Integer.parseInt(values.get(2)) : 0; + randEnd = (values.size() > 3) ? Integer.parseInt(values.get(3)) : 0; + } else { + isDiscrete = true; + } + + cardinality = Util.parseInt(config, "card", guessCardinality(col.getName())); + genNull = Util.parseBoolean(config, "null", guessGenNull(col.getName())); + genNullPct = Util.parseDouble(config, "nullpct", 0.01); + genNullStr = Util.parseString(config, "nullstr", "\\N"); // '\N' is null in hive + order = Util.parseBoolean(config, "order", false); + unique = Util.parseBoolean(config, "uniq", modelGen.isPK(col)); + } + + private int guessCardinality(String col) { + for (String s : col.split("_")) { + if (s.startsWith("C")) { + try { + return Integer.parseInt(s.substring(1)); + } catch (Exception ex) { + // ok + } + } + } + return 0; + } + + private boolean guessGenNull(String col) { + return col.contains("_NULL"); + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java new file mode 100644 index 0000000..775f4fc --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ColumnGenerator.java @@ -0,0 +1,361 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.TreeSet; + +import org.apache.commons.lang3.StringUtils; +import org.apache.kylin.common.util.DateFormat; +import org.apache.kylin.common.util.StringUtil; +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.model.ColumnDesc; + +import com.google.common.base.Preconditions; + +public class ColumnGenerator { + + final private ColumnGenConfig conf; + final private ColumnDesc targetCol; + final private int targetRows; + + public ColumnGenerator(ColumnDesc col, int nRows, ModelDataGenerator modelGen) throws IOException { + this.conf = new ColumnGenConfig(col, modelGen); + this.targetCol = col; + this.targetRows = nRows; + } + + public Iterator<String> generate(long seed) { + Base result; + if (conf.isFK) { + result = new DiscreteGen(conf.values, seed); + } else if (conf.isID) { + result = new IDGen(conf.idStart); + } else if (conf.isRandom) { + result = new RandomGen(targetCol, conf.randFormat, conf.randStart, conf.randEnd, conf.cardinality); + } else { + result = new DiscreteGen(conf.values); + } + + if (conf.cardinality > 0) { + result = new CardinalityFilter(result, conf.cardinality); + } + + if (conf.genNull) { + result = new AddNullFilter(result, conf.genNullPct, conf.genNullStr); + } + + if (conf.order || conf.unique) { + result = new OrderFilter(result, conf.unique, targetRows); + } + + return result; + } + + abstract public static class Base implements Iterator<String> { + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private class RandomGen extends Base { + + private DataType type; + private String format; + private int randStart; + private int randEnd; + private Random rand; + + public RandomGen(ColumnDesc col, String format, int randStart, int randEnd, int cardinality) { + this.type = col.getType(); + + if (type.isStringFamily()) { + // string + if (StringUtils.isBlank(format)) { + String name = col.getName(); + format = name.substring(0, Math.min(4, name.length())) + ColumnGenConfig.$RANDOM; + } + Preconditions.checkArgument(format.contains(ColumnGenConfig.$RANDOM)); + initNumberRange(randStart, randEnd, cardinality); + } else if (type.isTimeFamily()) { + // time + format = StringUtil.noBlank(format, DateFormat.DEFAULT_DATETIME_PATTERN_WITHOUT_MILLISECONDS); + initDateTimeRange(randStart, randEnd, 0); + } else if (type.isDateTimeFamily()) { + // date + format = StringUtil.noBlank(format, DateFormat.DEFAULT_DATE_PATTERN); + initDateTimeRange(randStart, randEnd, cardinality); + } else if (type.isIntegerFamily()) { + // integer + initNumberRange(randStart, randEnd, cardinality); + format = StringUtil.noBlank(format, "#"); + } else if (type.isNumberFamily()) { + // double + initNumberRange(randStart, randEnd, 0); + format = StringUtil.noBlank(format, ".##"); + } else { + throw new IllegalArgumentException(); + } + + this.format = format; + this.rand = new Random(); + } + + private void initDateTimeRange(int randStart, int randEnd, int days) { + if (randStart == 0 && randEnd == 0) { + randStart = 2010; + randEnd = 2015; + } + randEnd = Math.max(randEnd, randStart + (days / 365) + 1); + + Preconditions.checkArgument(randStart < randEnd); + Preconditions.checkArgument((randEnd - randStart) * 365 >= days); + + this.randStart = randStart; + this.randEnd = randEnd; + } + + private void initNumberRange(int randStart, int randEnd, int cardinality) { + if (randStart == 0 && randEnd == 0) { + randStart = 0; + randEnd = 1000; + } + randEnd = Math.max(randEnd, randStart + cardinality); + + Preconditions.checkArgument(randStart < randEnd); + Preconditions.checkArgument(randEnd - randStart >= cardinality); + + this.randStart = randStart; + this.randEnd = randEnd; + } + + @Override + public boolean hasNext() { + return true; + } + + @Override + public String next() { + if (type.isStringFamily()) { + // string + return format.replace(ColumnGenConfig.$RANDOM, "" + randomInt()); + } else if (type.isTimeFamily()) { + // time + return DateFormat.formatToTimeStr(randomMillis(), format); + } else if (type.isDateTimeFamily()) { + // date + return DateFormat.formatToDateStr(randomMillis(), format); + } else if (type.isIntegerFamily()) { + // integer + return formatNumber(randomInt()); + } else if (type.isNumberFamily()) { + // double + return formatNumber(randomDouble()); + } else { + throw new IllegalStateException(); + } + } + + private String formatNumber(double i) { + return new DecimalFormat(format).format(i); + } + + private int randomInt() { + return randStart + rand.nextInt(randEnd - randStart); + } + + private double randomDouble() { + return randomInt() + rand.nextDouble(); + } + + private long randomMillis() { + int secondsInYear = 3600 * 24 * 365; + long year = randStart + rand.nextInt(randEnd - randStart) - 1970; + long second = year * secondsInYear + rand.nextInt(secondsInYear); + return second * 1000; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + private class IDGen extends Base { + + int next; + + public IDGen(int start) { + next = start; + } + + @Override + public boolean hasNext() { + return true; + } + + @Override + public String next() { + return "" + (next++); + } + } + + private class DiscreteGen extends Base { + + private List<String> values; + private Random rand; + + public DiscreteGen(List<String> values) { + this.values = values; + this.rand = new Random(); + } + + public DiscreteGen(List<String> values, long seed) { + this.values = values; + this.rand = new Random(seed); + } + + @Override + public boolean hasNext() { + return true; + } + + @Override + public String next() { + if (values.isEmpty()) + return null; + else + return values.get(rand.nextInt(values.size())); + } + } + + private class CardinalityFilter extends Base { + + private Iterator<String> input; + private int card; + private TreeSet<String> cache; + + public CardinalityFilter(Iterator<String> input, int card) { + assert card > 0; + this.input = input; + this.card = card; + this.cache = new TreeSet<String>(); + } + + @Override + public boolean hasNext() { + return input.hasNext(); + } + + @Override + public String next() { + String r = input.next(); + + if (cache.size() < card) { + cache.add(r); + return r; + } + + r = cache.floor(r); + return r == null ? cache.first() : r; + } + } + + private class AddNullFilter extends Base { + + private Iterator<String> input; + private double nullPct; + private String nullStr; + private Random rand; + + public AddNullFilter(Iterator<String> input, double nullPct, String nullStr) { + this.input = input; + this.nullPct = nullPct; + this.nullStr = nullStr; + this.rand = new Random(); + } + + @Override + public boolean hasNext() { + return true; + } + + @Override + public String next() { + return rand.nextDouble() < nullPct || !input.hasNext() ? nullStr : input.next(); + } + } + + final private Comparator<String> comp = new Comparator<String>() { + @Override + public int compare(String s1, String s2) { + if (s1 == null) { + return s2 == null ? 0 : -1; + } else if (s2 == null) { + return 1; + } else { + if (targetCol.getType().isNumberFamily()) + return Double.compare(Double.parseDouble(s1), Double.parseDouble(s2)); + else + return s1.compareTo(s2); + } + } + }; + + private class OrderFilter extends Base { + + private Iterator<String> iter; + + public OrderFilter(Iterator<String> input, boolean unique, int targetRows) { + Collection<String> cache = unique ? new TreeSet<String>(comp) : new ArrayList<String>(targetRows); + int cap = targetRows * 100; + for (int i = 0; cache.size() < targetRows; i++) { + cache.add(input.next()); + if (i >= cap) + throw new IllegalStateException(); + } + + if (cache instanceof List) { + Collections.sort((List<String>) cache, comp); + } + + iter = cache.iterator(); + } + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public String next() { + return iter.next(); + } + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java new file mode 100644 index 0000000..c325ab0 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/ModelDataGenerator.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.kylin.common.persistence.ResourceStore; +import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.model.ColumnDesc; +import org.apache.kylin.metadata.model.DataModelDesc; +import org.apache.kylin.metadata.model.JoinDesc; +import org.apache.kylin.metadata.model.JoinTableDesc; +import org.apache.kylin.metadata.model.TableDesc; +import org.apache.kylin.metadata.model.TblColRef; + +import com.google.common.base.Preconditions; + +public class ModelDataGenerator { + + final private DataModelDesc model; + final private int targetRows; + final private ResourceStore outputStore; + final private String outputPath; + + boolean outprint = false; // for debug + + public ModelDataGenerator(DataModelDesc model, int nRows) { + this(model, nRows, ResourceStore.getStore(model.getConfig()), "/data"); + } + + public ModelDataGenerator(DataModelDesc model, int nRows, ResourceStore outputStore, String outputPath) { + this.model = model; + this.targetRows = nRows; + this.outputStore = outputStore; + this.outputPath = outputPath; + } + + public void generate() throws IOException { + Set<TableDesc> generated = new HashSet<>(); + Set<TableDesc> allTableDesc = new LinkedHashSet<>(); + + JoinTableDesc[] allTables = model.getJoinTables(); + for (int i = allTables.length - 1; i >= -1; i--) { + TableDesc table = (i == -1) ? model.getRootFactTable().getTableDesc() : allTables[i].getTableRef().getTableDesc(); + allTableDesc.add(table); + + if (generated.contains(table)) + continue; + + boolean gen = generateTable(table); + + if (gen) + generated.add(table); + } + + generateDDL(allTableDesc); + } + + private boolean generateTable(TableDesc table) throws IOException { + TableGenConfig config = new TableGenConfig(table, this); + if (!config.needGen) + return false; + + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8")); + + generateTableInternal(table, config, pout); + + pout.close(); + bout.close(); + + saveResource(bout.toByteArray(), path(table)); + return true; + } + + private void generateTableInternal(TableDesc table, TableGenConfig config, PrintWriter out) throws IOException { + ColumnDesc[] columns = table.getColumns(); + ColumnGenerator[] colGens = new ColumnGenerator[columns.length]; + Iterator<String>[] colIters = new Iterator[columns.length]; + + // config.rows is either a multiplier (0,1] or an absolute row number + int tableRows = (int) ((config.rows > 1) ? config.rows : targetRows * config.rows); + tableRows = Math.max(1, tableRows); + + // same seed for all columns, to ensure composite FK columns generate correct pairs + long seed = System.currentTimeMillis(); + + for (int i = 0; i < columns.length; i++) { + colGens[i] = new ColumnGenerator(columns[i], tableRows, this); + colIters[i] = colGens[i].generate(seed); + } + + for (int i = 0; i < tableRows; i++) { + for (int c = 0; c < columns.length; c++) { + if (c > 0) + out.print(","); + + String v = colIters[c].next(); + Preconditions.checkState(v == null || !v.contains(",")); + + out.print(v); + } + out.print("\n"); + } + } + + private void generateDDL(Set<TableDesc> tables) throws IOException { + + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + PrintWriter pout = new PrintWriter(new OutputStreamWriter(bout, "UTF-8")); + + generateDatabaseDDL(tables, pout); + generateCreateTableDDL(tables, pout); + generateLoadDataDDL(tables, pout); + + pout.close(); + bout.close(); + + saveResource(bout.toByteArray(), path(model)); + } + + private void generateDatabaseDDL(Set<TableDesc> tables, PrintWriter out) { + Set<String> dbs = new HashSet<>(); + for (TableDesc t : tables) { + String db = t.getDatabase(); + if (StringUtils.isBlank(db) == false && "DEFAULT".equals(db) == false) + dbs.add(db); + } + + for (String db : dbs) { + out.print("CREATE DATABASE IF NOT EXISTS " + db + ";\n"); + } + out.print("\n"); + } + + private void generateCreateTableDDL(Set<TableDesc> tables, PrintWriter out) { + for (TableDesc t : tables) { + out.print("DROP TABLE IF EXISTS " + t.getIdentity() + ";\n"); + + out.print("CREATE TABLE " + t.getIdentity() + "(" + "\n"); + + for (int i = 0; i < t.getColumns().length; i++) { + ColumnDesc col = t.getColumns()[i]; + out.print(" "); + if (i > 0) { + out.print(","); + } + out.print(col.getName() + " " + hiveType(col.getType()) + "\n"); + } + + out.print(")" + "\n"); + out.print("ROW FORMAT DELIMITED FIELDS TERMINATED BY ','" + "\n"); + out.print("STORED AS TEXTFILE" + ";\n"); + out.print("\n"); + } + } + + private String hiveType(DataType type) { + String t = type.toString(); + if (t.startsWith("varchar")) + return "string"; + else if (t.startsWith("integer")) + return "int"; + else + return t; + } + + private void generateLoadDataDDL(Set<TableDesc> tables, PrintWriter out) { + for (TableDesc t : tables) { + out.print("LOAD DATA LOCAL INPATH '" + t.getIdentity() + ".csv' OVERWRITE INTO TABLE " + t.getIdentity() + ";\n"); + } + } + + public boolean existsInStore(TableDesc table) throws IOException { + return outputStore.exists(path(table)); + } + + public boolean isPK(ColumnDesc col) { + for (JoinTableDesc joinTable : model.getJoinTables()) { + JoinDesc join = joinTable.getJoin(); + for (TblColRef pk : join.getPrimaryKeyColumns()) { + if (pk.getColumnDesc().equals(col)) + return true; + } + } + return false; + } + + public List<String> getPkValuesIfIsFk(ColumnDesc fk) throws IOException { + JoinTableDesc[] joinTables = model.getJoinTables(); + for (int i = 0; i < joinTables.length; i++) { + JoinTableDesc joinTable = joinTables[i]; + ColumnDesc pk = findPk(joinTable, fk); + if (pk == null) + continue; + + List<String> pkValues = getPkValues(pk); + if (pkValues != null) + return pkValues; + } + return null; + } + + private ColumnDesc findPk(JoinTableDesc joinTable, ColumnDesc fk) { + TblColRef[] fkCols = joinTable.getJoin().getForeignKeyColumns(); + for (int i = 0; i < fkCols.length; i++) { + if (fkCols[i].getColumnDesc().equals(fk)) + return joinTable.getJoin().getPrimaryKeyColumns()[i].getColumnDesc(); + } + return null; + } + + private List<String> getPkValues(ColumnDesc pk) throws IOException { + if (existsInStore(pk.getTable()) == false) + return null; + + List<String> r = new ArrayList<>(); + + BufferedReader in = new BufferedReader(new InputStreamReader(outputStore.getResource(path(pk.getTable())).inputStream, "UTF-8")); + try { + String line; + while ((line = in.readLine()) != null) { + r.add(line.split(",")[pk.getZeroBasedIndex()]); + } + } finally { + IOUtils.closeQuietly(in); + } + return r; + } + + private void saveResource(byte[] content, String path) throws IOException { + if (outprint) { + System.out.println("Generated " + path); + System.out.println(Bytes.toString(content)); + } + outputStore.putResource(path, new ByteArrayInputStream(content), System.currentTimeMillis()); + } + + private String path(TableDesc table) { + return outputPath + "/" + table.getIdentity() + ".csv"; + } + + private String path(DataModelDesc model) { + return outputPath + "/" + "ddl_" + model.getName() + ".sql"; + } + + public DataModelDesc getModle() { + return model; + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java new file mode 100644 index 0000000..be948c1 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/TableGenConfig.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.io.IOException; +import java.util.Map; + +import org.apache.kylin.metadata.model.TableDesc; + +public class TableGenConfig { + + boolean needGen; + double rows; + + public TableGenConfig(TableDesc table, ModelDataGenerator modelGen) throws IOException { + String dataGen = table.getDataGen(); + if (dataGen == null && modelGen.existsInStore(table) == false) { + dataGen = ""; + } + + if (dataGen == null) + return; + + needGen = true; + + Map<String, String> config = Util.parseEqualCommaPairs(dataGen, "rows"); + + // config.rows is either a multiplier (0,1] or an absolute row number + rows = Util.parseDouble(config, "rows", modelGen.getModle().isFactTable(table.getIdentity()) ? 1.0 : 20); + } + +} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java new file mode 100644 index 0000000..ca27bbf --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/source/datagen/Util.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.source.datagen; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; + +public class Util { + + static Map<String, String> parseEqualCommaPairs(String equalCommaPairs, String defaultKey) { + Map<String, String> r = new LinkedHashMap<>(); + + if (StringUtils.isBlank(equalCommaPairs)) + return r; + + for (String s : equalCommaPairs.split(",")) { + int equal = s.indexOf("="); + if (equal < 0) { + if (r.containsKey(defaultKey)) + r.put(s.trim(), "true"); + else + r.put(defaultKey, s.trim()); + } else { + r.put(s.substring(0, equal).trim(), s.substring(equal + 1).trim()); + } + } + return r; + } + + static double parseDouble(Map<String, String> config, String key, double dft) { + if (config.containsKey(key)) + return Double.parseDouble(config.get(key)); + else + return dft; + } + + static boolean parseBoolean(Map<String, String> config, String key, boolean dft) { + if (config.containsKey(key)) + return Boolean.parseBoolean(config.get(key)); + else + return dft; + } + + public static int parseInt(Map<String, String> config, String key, int dft) { + if (config.containsKey(key)) + return Integer.parseInt(config.get(key)); + else + return dft; + } + + public static String parseString(Map<String, String> config, String key, String dft) { + if (config.containsKey(key)) + return config.get(key); + else + return dft; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/test/java/org/apache/kylin/aggregation/topn/TopNCounterSerializerTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/aggregation/topn/TopNCounterSerializerTest.java b/core-metadata/src/test/java/org/apache/kylin/aggregation/topn/TopNCounterSerializerTest.java deleted file mode 100644 index 1ce17fe..0000000 --- a/core-metadata/src/test/java/org/apache/kylin/aggregation/topn/TopNCounterSerializerTest.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.aggregation.topn; - -import java.nio.ByteBuffer; - -import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.LocalFileMetadataTestCase; -import org.apache.kylin.measure.topn.TopNCounter; -import org.apache.kylin.measure.topn.TopNCounterSerializer; -import org.apache.kylin.metadata.datatype.DataType; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TopNCounterSerializerTest extends LocalFileMetadataTestCase { - - private static TopNCounterSerializer serializer; - - @BeforeClass - public static void setUp() throws Exception { - staticCreateTestMetadata(); - - DataType.register("topn"); - serializer = new TopNCounterSerializer(DataType.getType("topn(10)")); - } - - @AfterClass - public static void after() throws Exception { - cleanAfterClass(); - } - - @Test - public void testSerialization() { - TopNCounter<ByteArray> vs = new TopNCounter<ByteArray>(50); - Integer[] stream = { 1, 1, 2, 9, 1, 2, 3, 7, 7, 1, 3, 1, 1 }; - for (Integer i : stream) { - vs.offer(new ByteArray(Bytes.toBytes(i))); - } - vs.sortAndRetain(); - ByteBuffer out = ByteBuffer.allocate(1024); - serializer.serialize(vs, out); - - byte[] copyBytes = new byte[out.position()]; - System.arraycopy(out.array(), 0, copyBytes, 0, out.position()); - - ByteBuffer in = ByteBuffer.wrap(copyBytes); - TopNCounter<ByteArray> vsNew = serializer.deserialize(in); - - Assert.assertEquals(vs.toString(), vsNew.toString()); - - } - - @Test - public void testValueOf() { - // FIXME need a good unit test for valueOf() - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java index 586c007..5de2a3a 100644 --- a/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java +++ b/core-metadata/src/test/java/org/apache/kylin/measure/hllc/NewHyperLogLogBenchmarkTest.java @@ -20,6 +20,7 @@ package org.apache.kylin.measure.hllc; import org.apache.kylin.measure.hllc.HLLCounterOld; import org.apache.kylin.measure.hllc.HLLCounter; import org.apache.kylin.measure.hllc.RegisterType; +import org.junit.Ignore; import org.junit.Test; import java.nio.ByteBuffer; @@ -30,6 +31,7 @@ import static org.junit.Assert.assertEquals; /** * Created by xiefan on 16-12-12. */ +@Ignore("Save UT time") @SuppressWarnings("deprecation") public class NewHyperLogLogBenchmarkTest { http://git-wip-us.apache.org/repos/asf/kylin/blob/d1175d2c/core-metadata/src/test/java/org/apache/kylin/measure/topn/TopNCounterSerializerTest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/test/java/org/apache/kylin/measure/topn/TopNCounterSerializerTest.java b/core-metadata/src/test/java/org/apache/kylin/measure/topn/TopNCounterSerializerTest.java new file mode 100644 index 0000000..2daf3b4 --- /dev/null +++ b/core-metadata/src/test/java/org/apache/kylin/measure/topn/TopNCounterSerializerTest.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.measure.topn; + +import java.nio.ByteBuffer; + +import org.apache.kylin.common.util.ByteArray; +import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.common.util.LocalFileMetadataTestCase; +import org.apache.kylin.measure.topn.TopNCounter; +import org.apache.kylin.measure.topn.TopNCounterSerializer; +import org.apache.kylin.metadata.datatype.DataType; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TopNCounterSerializerTest extends LocalFileMetadataTestCase { + + private static TopNCounterSerializer serializer; + + @BeforeClass + public static void setUp() throws Exception { + staticCreateTestMetadata(); + + DataType.register("topn"); + serializer = new TopNCounterSerializer(DataType.getType("topn(10)")); + } + + @AfterClass + public static void after() throws Exception { + cleanAfterClass(); + } + + @Test + public void testSerialization() { + TopNCounter<ByteArray> vs = new TopNCounter<ByteArray>(50); + Integer[] stream = { 1, 1, 2, 9, 1, 2, 3, 7, 7, 1, 3, 1, 1 }; + for (Integer i : stream) { + vs.offer(new ByteArray(Bytes.toBytes(i))); + } + vs.sortAndRetain(); + ByteBuffer out = ByteBuffer.allocate(1024); + serializer.serialize(vs, out); + + byte[] copyBytes = new byte[out.position()]; + System.arraycopy(out.array(), 0, copyBytes, 0, out.position()); + + ByteBuffer in = ByteBuffer.wrap(copyBytes); + TopNCounter<ByteArray> vsNew = serializer.deserialize(in); + + Assert.assertEquals(vs.toString(), vsNew.toString()); + + } + + @Test + public void testValueOf() { + // FIXME need a good unit test for valueOf() + } +}