KYLIN-1851 code review format
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/350547e6 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/350547e6 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/350547e6 Branch: refs/heads/master Commit: 350547e6ec6634008a4d07d771822f81acc2bcbe Parents: 734a4f9 Author: Li Yang <[email protected]> Authored: Wed Nov 16 18:46:49 2016 +0800 Committer: Li Yang <[email protected]> Committed: Wed Nov 16 18:46:49 2016 +0800 ---------------------------------------------------------------------- build/conf/kylin.properties | 3 +- .../apache/kylin/common/KylinConfigBase.java | 4 +- .../org/apache/kylin/cube/model/CubeDesc.java | 4 +- .../org/apache/kylin/cube/CubeDescTest.java | 28 +-- .../apache/kylin/dict/DictionaryGenerator.java | 10 +- .../apache/kylin/dict/DictionaryManager.java | 3 - .../dict/NumberDictionaryForestBuilder.java | 7 +- .../org/apache/kylin/dict/TrieDictionary.java | 6 - .../apache/kylin/dict/TrieDictionaryForest.java | 72 +----- .../kylin/dict/TrieDictionaryForestBuilder.java | 10 +- .../apache/kylin/dict/NumberDictionaryTest.java | 8 +- .../engine/mr/DFSFileTableSortedReader.java | 249 ------------------- .../mr/steps/NumberDictionaryForestTest.java | 25 +- .../test_case_data/sandbox/kylin.properties | 9 +- .../org/apache/kylin/query/KylinTestBase.java | 3 - 15 files changed, 54 insertions(+), 387 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/build/conf/kylin.properties ---------------------------------------------------------------------- diff --git a/build/conf/kylin.properties b/build/conf/kylin.properties index 715b7a6..3b50c12 100644 --- a/build/conf/kylin.properties +++ b/build/conf/kylin.properties @@ -118,6 +118,7 @@ kylin.job.mapreduce.mapper.input.rows=1000000 kylin.job.step.timeout=7200 + ### CUBE ### # 'auto', 'inmem', 'layer' or 'random' for testing @@ -131,8 +132,6 @@ kylin.dictionary.max.cardinality=5000000 kylin.table.snapshot.max_mb=300 -#max size for one trie in TrieDictionaryForest (default 500MB) - ### QUERY ### http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 300f727..c7dd8a8 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -210,7 +210,7 @@ abstract public class KylinConfigBase implements Serializable { public String[] getRealizationProviders() { return getOptionalStringArray("kylin.realization.providers", // - new String[]{"org.apache.kylin.cube.CubeManager", "org.apache.kylin.storage.hybrid.HybridManager"}); + new String[] {"org.apache.kylin.cube.CubeManager", "org.apache.kylin.storage.hybrid.HybridManager"}); } public CliCommandExecutor getCliCommandExecutor() throws IOException { @@ -591,7 +591,7 @@ abstract public class KylinConfigBase implements Serializable { } public int[] getQueryMetricsPercentilesIntervals() { - String[] dft = {"60", "300", "3600"}; + String[] dft = { "60", "300", "3600" }; return getOptionalIntArray("kylin.query.metrics.percentiles.intervals", dft); } http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java index c9ebff8..7dad87b 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java @@ -755,7 +755,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware { int find = ArrayUtils.indexOf(dimColArray, fk[i]); if (find >= 0) { TblColRef derivedCol = initDimensionColRef(pk[i]); - initDerivedMap(new TblColRef[]{dimColArray[find]}, DeriveType.PK_FK, dim, new TblColRef[]{derivedCol}, null); + initDerivedMap(new TblColRef[] { dimColArray[find] }, DeriveType.PK_FK, dim, new TblColRef[] { derivedCol }, null); } } } @@ -776,7 +776,7 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware { extra[i] = ""; } } - return new String[][]{cols, extra}; + return new String[][] { cols, extra }; } private void initDerivedMap(TblColRef[] hostCols, DeriveType type, DimensionDesc dimension, TblColRef[] derivedCols, String[] extra) { http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java index 3326b24..9ad6427 100644 --- a/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java +++ b/core-cube/src/test/java/org/apache/kylin/cube/CubeDescTest.java @@ -89,7 +89,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase { @Test public void testBadInit3() throws Exception { thrown.expect(IllegalStateException.class); - thrown.expectMessage("Aggregation group 0 'includes' dimensions not include all the dimensions:" + sortStrs(new String[]{"SELLER_ID", "META_CATEG_NAME", "LSTG_FORMAT_NAME", "LSTG_SITE_ID", "SLR_SEGMENT_CD"})); + thrown.expectMessage("Aggregation group 0 'includes' dimensions not include all the dimensions:" + sortStrs(new String[] { "SELLER_ID", "META_CATEG_NAME", "LSTG_FORMAT_NAME", "LSTG_SITE_ID", "SLR_SEGMENT_CD" })); CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); String[] temp = Arrays.asList(cubeDesc.getAggregationGroups().get(0).getIncludes()).subList(0, 3).toArray(new String[3]); cubeDesc.getAggregationGroups().get(0).setIncludes(temp); @@ -114,7 +114,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase { @Test public void testBadInit5() throws Exception { CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); - cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[]{"seller_id", "META_CATEG_NAME"}; + cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[] { "seller_id", "META_CATEG_NAME" }; cubeDesc.init(getTestConfig()); } @@ -122,7 +122,7 @@ public class CubeDescTest extends LocalFileMetadataTestCase { @Test public void testBadInit6() throws Exception { CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); - cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[]{"seller_id", "lstg_format_name"}; + cubeDesc.getAggregationGroups().get(0).getSelectRule().mandatory_dims = new String[] { "seller_id", "lstg_format_name" }; cubeDesc.init(getTestConfig()); } @@ -133,43 +133,43 @@ public class CubeDescTest extends LocalFileMetadataTestCase { thrown.expectMessage("Aggregation group 0 require at least 2 dimensions in a joint"); CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); - cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"lstg_format_name"}}; + cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "lstg_format_name" } }; cubeDesc.init(getTestConfig()); } @Test public void testBadInit8() throws Exception { - String[] strs = new String[]{"CATEG_LVL2_NAME", "META_CATEG_NAME"}; + String[] strs = new String[] { "CATEG_LVL2_NAME", "META_CATEG_NAME" }; thrown.expect(IllegalStateException.class); thrown.expectMessage("Aggregation group 0 hierarchy dimensions overlap with joint dimensions: " + sortStrs(strs)); CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); - cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME"}}; + cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME" } }; cubeDesc.init(getTestConfig()); } @Test public void testBadInit9() throws Exception { - String[] strs = new String[]{"lstg_format_name", "META_CATEG_NAME"}; + String[] strs = new String[] { "lstg_format_name", "META_CATEG_NAME" }; thrown.expect(IllegalStateException.class); thrown.expectMessage("Aggregation group 0 hierarchy dimensions overlap with joint dimensions: " + sortStrs(strs)); CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); - cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME"}, new String[]{"lstg_format_name", "lstg_site_id"}}; - cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"META_CATEG_NAME", "lstg_format_name"}}; + cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" }, new String[] { "lstg_format_name", "lstg_site_id" } }; + cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "META_CATEG_NAME", "lstg_format_name" } }; cubeDesc.init(getTestConfig()); } @Test public void testBadInit10() throws Exception { - String[] strs = new String[]{"lstg_format_name", "lstg_site_id"}; + String[] strs = new String[] { "lstg_format_name", "lstg_site_id" }; thrown.expect(IllegalStateException.class); thrown.expectMessage("Aggregation group 0 a dimension exist in more than one joint: " + sortStrs(strs)); CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); - cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][]{new String[]{"lstg_format_name", "lstg_site_id", "slr_segment_cd"}, new String[]{"lstg_format_name", "lstg_site_id", "leaf_categ_id"}}; + cubeDesc.getAggregationGroups().get(0).getSelectRule().joint_dims = new String[][] { new String[] { "lstg_format_name", "lstg_site_id", "slr_segment_cd" }, new String[] { "lstg_format_name", "lstg_site_id", "leaf_categ_id" } }; cubeDesc.init(getTestConfig()); } @@ -180,19 +180,19 @@ public class CubeDescTest extends LocalFileMetadataTestCase { thrown.expectMessage("Aggregation group 0 require at least 2 dimensions in a hierarchy."); CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); - cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME"}}; + cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME" } }; cubeDesc.init(getTestConfig()); } @Test public void testBadInit12() throws Exception { - String[] strs = new String[]{"CATEG_LVL2_NAME", "META_CATEG_NAME"}; + String[] strs = new String[] { "CATEG_LVL2_NAME", "META_CATEG_NAME" }; thrown.expect(IllegalStateException.class); thrown.expectMessage("Aggregation group 0 a dimension exist in more than one hierarchy: " + sortStrs(strs)); CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_with_slr_desc"); - cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][]{new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME"}, new String[]{"META_CATEG_NAME", "CATEG_LVL2_NAME"}}; + cubeDesc.getAggregationGroups().get(0).getSelectRule().hierarchy_dims = new String[][] { new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME", "CATEG_LVL3_NAME" }, new String[] { "META_CATEG_NAME", "CATEG_LVL2_NAME" } }; cubeDesc.init(getTestConfig()); } http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java index 8695338..8eafe5f 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java @@ -36,12 +36,12 @@ import com.google.common.base.Preconditions; /** * @author yangli9 */ -@SuppressWarnings({"rawtypes", "unchecked"}) +@SuppressWarnings({ "rawtypes", "unchecked" }) public class DictionaryGenerator { private static final Logger logger = LoggerFactory.getLogger(DictionaryGenerator.class); - private static final String[] DATE_PATTERNS = new String[]{"yyyy-MM-dd", "yyyyMMdd"}; + private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" }; public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException { Preconditions.checkNotNull(dataType, "dataType cannot be null"); @@ -138,7 +138,6 @@ public class DictionaryGenerator { @Override public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException { int maxTrieSizeInMB = TrieDictionaryForestBuilder.getMaxTrieSizeInMB(); - //TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter()); TrieDictionaryForestBuilder builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId, maxTrieSizeInMB); byte[] value; while (valueEnumerator.moveNext()) { @@ -151,15 +150,13 @@ public class DictionaryGenerator { returnSamples.add(v); } return builder.build(); - //return builder.build(baseId); } } private static class NumberDictBuilder implements IDictionaryBuilder { @Override public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException { - int maxTrieSizeInMB = TrieDictionaryForestBuilder.getMaxTrieSizeInMB(); - NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(new StringBytesConverter(), baseId, maxTrieSizeInMB); + NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(new StringBytesConverter(), baseId); byte[] value; while (valueEnumerator.moveNext()) { value = valueEnumerator.current(); @@ -177,5 +174,4 @@ public class DictionaryGenerator { } } - } http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java index b8d039e..2dd5085 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java @@ -418,9 +418,6 @@ public class DictionaryManager { logger.info("DictionaryManager(" + System.identityHashCode(this) + ") loading DictionaryInfo(loadDictObj:" + loadDictObj + ") at " + resourcePath); DictionaryInfo info = store.getResource(resourcePath, DictionaryInfo.class, loadDictObj ? DictionaryInfoSerializer.FULL_SERIALIZER : DictionaryInfoSerializer.INFO_SERIALIZER); - //info.dictionaryObject.dump(System.out); - // if (loadDictObj) - // logger.debug("Loaded dictionary at " + resourcePath); return info; } http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java index c997ce1..519d4c3 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java @@ -29,8 +29,7 @@ public class NumberDictionaryForestBuilder<T> { private BytesConverter<T> bytesConverter; - private NumberDictionaryForest.NumberBytesCodec codec = new NumberDictionaryForest.NumberBytesCodec( - NumberDictionaryForest.MAX_DIGITS_BEFORE_DECIMAL_POINT); + private NumberDictionaryForest.NumberBytesCodec codec = new NumberDictionaryForest.NumberBytesCodec(NumberDictionaryForest.MAX_DIGITS_BEFORE_DECIMAL_POINT); public NumberDictionaryForestBuilder(BytesConverter<T> bytesConverter) { this(bytesConverter, 0); @@ -50,8 +49,6 @@ public class NumberDictionaryForestBuilder<T> { addValue(bytesConverter.convertToBytes(value)); } - - public void addValue(byte[] value) { codec.encodeNumber(value, 0, value.length); byte[] copy = Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen); @@ -64,7 +61,7 @@ public class NumberDictionaryForestBuilder<T> { return new NumberDictionaryForest<T>(forest, bytesConverter); } - public void setMaxTrieSize(int size){ + public void setMaxTrieSize(int size) { this.trieBuilder.setMaxTrieTreeSize(size); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java index a5e3d36..c099de0 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java @@ -126,7 +126,6 @@ public class TrieDictionary<T> extends Dictionary<T> { else throw new RuntimeException(e); } - //this.enableValueCache = false; if (enableValueCache) { valueToIdCache = new SoftReference<Map>(new ConcurrentHashMap()); idToValueCache = new SoftReference<Object[]>(new Object[nValues]); @@ -156,7 +155,6 @@ public class TrieDictionary<T> extends Dictionary<T> { @Override final protected int getIdFromValueImpl(T value, int roundingFlag) { if (enableValueCache && roundingFlag == 0) { - //System.out.println("use id cache"); Map cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when short of memory if (cache != null) { Integer id = null; @@ -171,7 +169,6 @@ public class TrieDictionary<T> extends Dictionary<T> { return id; } } - //System.out.println("not use id cache"); byte[] valueBytes = bytesConvert.convertToBytes(value); return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag); } @@ -273,7 +270,6 @@ public class TrieDictionary<T> extends Dictionary<T> { @Override final protected T getValueFromIdImpl(int id) { if (enableValueCache) { - //System.out.println("use value cache"); Object[] cache = idToValueCache.get(); // SoftReference to skip cache gracefully when short of memory if (cache != null) { int seq = calcSeqNoFromId(id); @@ -288,10 +284,8 @@ public class TrieDictionary<T> extends Dictionary<T> { return result; } } - //System.out.println("not use value cache"); byte[] value = new byte[getSizeOfValue()]; int length = getValueBytesFromId(id, value, 0); - //System.out.println("get value by id:"+id+" value:"+bytesConvert.convertFromBytes(value, 0, length).toString()); return bytesConvert.convertFromBytes(value, 0, length); } http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java index b0440db..38cd0dc 100755 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java @@ -18,14 +18,6 @@ package org.apache.kylin.dict; -import org.apache.kylin.common.util.ByteArray; -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataOutput; @@ -37,6 +29,12 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; +import org.apache.kylin.common.util.ByteArray; +import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.common.util.BytesUtil; +import org.apache.kylin.common.util.ClassUtil; +import org.apache.kylin.common.util.Dictionary; + /** * use trie forest to optimize trie dictionary @@ -46,8 +44,7 @@ import java.util.List; * Created by xiefan on 16-10-26. */ public class TrieDictionaryForest<T> extends Dictionary<T> { - - private static final Logger logger = LoggerFactory.getLogger(TrieDictionaryForest.class); + private static final long serialVersionUID = 1L; private ArrayList<TrieDictionary<T>> trees; @@ -168,30 +165,19 @@ public class TrieDictionaryForest<T> extends Dictionary<T> { if (id < 0) { throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!"); } - //System.out.println("getIdFromValue value:"+bytesConvert.convertFromBytes(value,offset,len)+" id:"+id); return id; } - //id --> value - private boolean printstr = false; - @Override protected T getValueFromIdImpl(int id) throws IllegalArgumentException { - //System.out.println("here"); byte[] data = getValueBytesFromIdImpl(id); if (data != null) { - if (!printstr) { - System.out.println("getValueFromIdImpl id:" + id + " value:" + bytesConvert.convertFromBytes(data, 0, data.length)); - printstr = true; - } return bytesConvert.convertFromBytes(data, 0, data.length); } else { return null; } } - private boolean isPrintstr2 = false; - @Override protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) throws IllegalArgumentException { @@ -199,14 +185,7 @@ public class TrieDictionaryForest<T> extends Dictionary<T> { int index = findIndexById(id); int treeInnerOffset = getTreeInnerOffset(id, index); TrieDictionary<T> tree = trees.get(index); - //getValueIndexTime2.addAndGet(System.currentTimeMillis() - startTime); - //startTime = System.currentTimeMillis(); int size = tree.getValueBytesFromIdImpl(treeInnerOffset, returnValue, offset); - if (!isPrintstr2) { - isPrintstr2 = true; - System.out.println("getValueBytesFromIdImpl id:" + id + " value:" + bytesConvert.convertFromBytes(returnValue, offset, size)); - } - //getValueTime2.addAndGet(System.currentTimeMillis() - startTime); return size; } @@ -250,19 +229,10 @@ public class TrieDictionaryForest<T> extends Dictionary<T> { @Override public void write(DataOutput out) throws IOException { - System.out.println("write dict"); writeHead(out); writeBody(out); } - /*private int compare(T value1,T value2){ - byte[] b1 = bytesConvert.convertToBytes(value1); - byte[] b2 = bytesConvert.convertToBytes(value2); - ByteArray ba1 = new ByteArray(b1,0,b1.length); - ByteArray ba2 = new ByteArray(b2,0,b2.length); - return ba1.compareTo(ba2); - }*/ - private void writeHead(DataOutput out) throws IOException { ByteArrayOutputStream byteBuf = new ByteArrayOutputStream(); DataOutputStream headOut = new DataOutputStream(byteBuf); @@ -299,8 +269,8 @@ public class TrieDictionaryForest<T> extends Dictionary<T> { @Override public void readFields(DataInput in) throws IOException { - System.out.println("read dict"); try { + @SuppressWarnings("unused") int headSize = in.readInt(); this.baseId = in.readInt(); String converterName = in.readUTF(); @@ -371,15 +341,6 @@ public class TrieDictionaryForest<T> extends Dictionary<T> { return Collections.unmodifiableList(this.trees); } - private boolean onlyOneTree() { - return trees.size() == 1; - } - - private int findIndexByValue(T value) { - byte[] valueBytes = bytesConvert.convertToBytes(value); - return findIndexByValue(new ByteArray(valueBytes, 0, valueBytes.length)); - } - private int findIndexByValue(ByteArray value) { int index = lowerBound(value, new Comparator<ByteArray>() { @Override @@ -421,29 +382,13 @@ public class TrieDictionaryForest<T> extends Dictionary<T> { found = true; } if (found) { - //System.out.println("look for:"+lookfor+" index:"+mid); return mid; } else { - //System.out.println("look for:"+lookfor+" index:"+Math.max(left,right)); return Math.min(left, right); //value may be bigger than the right tree } } public static void main(String[] args) { - /*ArrayList<Integer> list = new ArrayList<>(); - list.add(3); - list.add(10); - list.add(15); - Comparator<Integer> comp = new Comparator<Integer>() { - @Override - public int compare(Integer o1, Integer o2) { - return o1.compareTo(o2); - } - }; - int[] nums = {-1,0,1,2,3,4,13,15,16}; - for(int i : nums){ - System.out.println("found value:"+i+" index:"+lowerBound(i,comp,list)); - }*/ ArrayList<String> list = new ArrayList<>(); list.add("ä¸"); list.add("äº"); @@ -464,7 +409,6 @@ public class TrieDictionaryForest<T> extends Dictionary<T> { } }, list)); } - //System.out.println(BytesUtil.safeCompareBytes("äº".getBytes(),"ä¸".getBytes())); } public BytesConverter<T> getBytesConvert() { http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java index 5e2c346..1ceac27 100755 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java @@ -61,15 +61,7 @@ public class TrieDictionaryForestBuilder<T> { } public TrieDictionaryForestBuilder(BytesConverter<T> bytesConverter, int baseId) { - this.bytesConverter = bytesConverter; - this.trieBuilder = new TrieDictionaryBuilder<T>(bytesConverter); - this.baseId = baseId; - curOffset = 0; - int maxTrieTreeSizeMB = getMaxTrieSizeInMB(); - this.maxTrieTreeSize = maxTrieTreeSizeMB * 1024 * 1024; - logger.info("maxTrieSize is set to:" + maxTrieTreeSize + "B"); - //System.out.println("max trie size:"+maxTrieTreeSize); - //stringComparator = new ByteComparator<>(new StringBytesConverter()); + this(bytesConverter, baseId, getMaxTrieSizeInMB()); } public TrieDictionaryForestBuilder(BytesConverter<T> bytesConverter, int baseId, int maxTrieTreeSizeMB) { http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java index a9c4980..ea6358d 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/NumberDictionaryTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -/*package org.apache.kylin.dict; +package org.apache.kylin.dict; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -35,6 +35,7 @@ import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.metadata.datatype.DataType; import org.junit.After; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import com.google.common.collect.Lists; @@ -42,7 +43,7 @@ import com.google.common.collect.Sets; /** */ -/*public class NumberDictionaryTest extends LocalFileMetadataTestCase { +public class NumberDictionaryTest extends LocalFileMetadataTestCase { NumberDictionary.NumberBytesCodec codec = new NumberDictionary.NumberBytesCodec(NumberDictionary.MAX_DIGITS_BEFORE_DECIMAL_POINT); Random rand = new Random(); @@ -70,6 +71,7 @@ import com.google.common.collect.Sets; assertEquals(1, maxId); } + @Ignore @SuppressWarnings("unchecked") @Test public void testEmptyInput() throws IOException { @@ -207,4 +209,4 @@ import com.google.common.collect.Sets; return buf.toString(); } -}*/ +} http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java deleted file mode 100644 index 6af35d2..0000000 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/DFSFileTableSortedReader.java +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.engine.mr; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.StringEscapeUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.SequenceFile.Reader; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.StringSplitter; -import org.apache.kylin.source.ReadableTable.TableReader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.Closeable; -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -/** - * only use for reading output file of FactDistinctColumnsJob2 - */ -public class DFSFileTableSortedReader implements TableReader { - - private static final Logger logger = LoggerFactory.getLogger(DFSFileTableSortedReader.class); - private static final char CSV_QUOTE = '"'; - private static final String[] DETECT_DELIMS = new String[] { "\177", "|", "\t", "," }; - - private String filePath; - private String delim; - private List<RowReader> readerList; - - private String curLine; - private String[] curColumns; - private int expectedColumnNumber = -1; // helps delimiter detection - - public DFSFileTableSortedReader(String filePath, int expectedColumnNumber) throws IOException { - this(filePath, DFSFileTable.DELIM_AUTO, expectedColumnNumber); - } - - public DFSFileTableSortedReader(String filePath, String delim, int expectedColumnNumber) throws IOException { - filePath = HadoopUtil.fixWindowsPath(filePath); - this.filePath = filePath; - this.delim = delim; - this.expectedColumnNumber = expectedColumnNumber; - this.readerList = new ArrayList<RowReader>(); - - FileSystem fs = HadoopUtil.getFileSystem(filePath); - - ArrayList<FileStatus> allFiles = new ArrayList<>(); - FileStatus status = fs.getFileStatus(new Path(filePath)); - if (status.isFile()) { - allFiles.add(status); - } else { - FileStatus[] listStatus = fs.listStatus(new Path(filePath)); - allFiles.addAll(Arrays.asList(listStatus)); - } - - try { - for (FileStatus f : allFiles) { - RowReader rowReader = new SeqRowReader(HadoopUtil.getCurrentConfiguration(), fs, f.getPath().toString()); - this.readerList.add(rowReader); - } - } catch (IOException e) { - if (isExceptionSayingNotSeqFile(e) == false) - throw e; - - this.readerList = new ArrayList<RowReader>(); - for (FileStatus f : allFiles) { - RowReader rowReader = new CsvRowReader(fs, f.getPath().toString()); - this.readerList.add(rowReader); - } - } - } - - private boolean isExceptionSayingNotSeqFile(IOException e) { - if (e.getMessage() != null && e.getMessage().contains("not a SequenceFile")) - return true; - - if (e instanceof EOFException) // in case the file is very very small - return true; - - return false; - } - - @Override - public boolean next() throws IOException { - int curReaderIndex = -1; - RowReader curReader; - - while (++curReaderIndex < readerList.size()) { - curReader = readerList.get(curReaderIndex); - curLine = curReader.nextLine(); - curColumns = null; - - if (curLine != null) { - return true; - } - } - - return false; - } - - public String getLine() { - return curLine; - } - - @Override - public String[] getRow() { - if (curColumns == null) { - if (DFSFileTable.DELIM_AUTO.equals(delim)) - delim = autoDetectDelim(curLine); - - if (delim == null) - curColumns = new String[] { curLine }; - else - curColumns = split(curLine, delim); - } - return curColumns; - } - - private String[] split(String line, String delim) { - // FIXME CVS line should be parsed considering escapes - String[] str = StringSplitter.split(line, delim); - - // un-escape CSV - if (DFSFileTable.DELIM_COMMA.equals(delim)) { - for (int i = 0; i < str.length; i++) { - str[i] = unescapeCsv(str[i]); - } - } - - return str; - } - - private String unescapeCsv(String str) { - if (str == null || str.length() < 2) - return str; - - str = StringEscapeUtils.unescapeCsv(str); - - // unescapeCsv may not remove the outer most quotes - if (str.charAt(0) == CSV_QUOTE && str.charAt(str.length() - 1) == CSV_QUOTE) - str = str.substring(1, str.length() - 1); - - return str; - } - - @Override - public void close() { - for (RowReader reader : readerList) { - IOUtils.closeQuietly(reader); - } - } - - private String autoDetectDelim(String line) { - if (expectedColumnNumber > 0) { - for (String delim : DETECT_DELIMS) { - if (StringSplitter.split(line, delim).length == expectedColumnNumber) { - logger.info("Auto detect delim to be '" + delim + "', split line to " + expectedColumnNumber + " columns -- " + line); - return delim; - } - } - } - - logger.info("Auto detect delim to be null, will take THE-WHOLE-LINE as a single value, for " + filePath); - return null; - } - - // ============================================================================ - - private interface RowReader extends Closeable { - String nextLine() throws IOException; // return null on EOF - } - - private class SeqRowReader implements RowReader { - Reader reader; - Writable key; - Text value; - - SeqRowReader(Configuration hconf, FileSystem fs, String path) throws IOException { - reader = new Reader(hconf, Reader.file(new Path(path))); - key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf); - value = new Text(); - } - - @Override - public String nextLine() throws IOException { - boolean hasNext = reader.next(key, value); - if (hasNext) - return Bytes.toString(value.getBytes(), 0, value.getLength()); - else - return null; - } - - @Override - public void close() throws IOException { - reader.close(); - } - } - - private class CsvRowReader implements RowReader { - BufferedReader reader; - - CsvRowReader(FileSystem fs, String path) throws IOException { - FSDataInputStream in = fs.open(new Path(path)); - reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); - } - - @Override - public String nextLine() throws IOException { - return reader.readLine(); - } - - @Override - public void close() throws IOException { - reader.close(); - } - - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java index 554ee9c..66946b7 100644 --- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java +++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java @@ -1,16 +1,7 @@ package org.apache.kylin.engine.mr.steps; -import org.apache.hadoop.io.Text; -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.dict.NumberDictionary; -import org.apache.kylin.dict.NumberDictionaryBuilder; -import org.apache.kylin.dict.NumberDictionaryForest; -import org.apache.kylin.dict.NumberDictionaryForestBuilder; -import org.apache.kylin.dict.StringBytesConverter; -import org.apache.kylin.dict.TrieDictionaryForestBuilder; -import org.apache.kylin.engine.mr.steps.fdc2.SelfDefineSortableKey; -import org.apache.kylin.engine.mr.steps.fdc2.TypeFlag; -import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -25,8 +16,16 @@ import java.util.List; import java.util.Random; import java.util.UUID; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import org.apache.hadoop.io.Text; +import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.dict.NumberDictionary; +import org.apache.kylin.dict.NumberDictionaryBuilder; +import org.apache.kylin.dict.NumberDictionaryForest; +import org.apache.kylin.dict.NumberDictionaryForestBuilder; +import org.apache.kylin.dict.StringBytesConverter; +import org.apache.kylin.engine.mr.steps.fdc2.SelfDefineSortableKey; +import org.apache.kylin.engine.mr.steps.fdc2.TypeFlag; +import org.junit.Test; /** * Created by xiefan on 16-11-2. http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/examples/test_case_data/sandbox/kylin.properties ---------------------------------------------------------------------- diff --git a/examples/test_case_data/sandbox/kylin.properties b/examples/test_case_data/sandbox/kylin.properties index 93b86c9..de1250f 100644 --- a/examples/test_case_data/sandbox/kylin.properties +++ b/examples/test_case_data/sandbox/kylin.properties @@ -63,7 +63,7 @@ kylin.job.retry=0 # you will have to specify kylin.job.remote.cli.hostname, kylin.job.remote.cli.username and kylin.job.remote.cli.password # It should not be set to "true" unless you're NOT running Kylin.sh on a hadoop client machine # (Thus kylin instance has to ssh to another real hadoop client machine to execute hbase,hive,hadoop commands) -kylin.job.run.as.remote.cmd=true +kylin.job.run.as.remote.cmd=false # Only necessary when kylin.job.run.as.remote.cmd=true kylin.job.remote.cli.hostname=sandbox @@ -112,10 +112,12 @@ kylin.job.uhc.reducer.count=1 ### CUBE ### +# dictionary forest cut +kylin.dictionary.forest.trie.size.max_mb=500 + # 'auto', 'inmem', 'layer' or 'random' for testing kylin.cube.algorithm=random - # Enable/disable ACL check for cube query kylin.query.security.enabled=true @@ -161,6 +163,3 @@ kylin.query.metrics.percentiles.intervals=60, 360, 3600 # Env DEV|QA|PROD deploy.env=DEV - -#default 500MB -kylin.dictionary.forest.trie.size.max_mb=500 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/350547e6/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java b/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java index e1303e4..52461c4 100644 --- a/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java +++ b/kylin-it/src/test/java/org/apache/kylin/query/KylinTestBase.java @@ -491,10 +491,7 @@ public class KylinTestBase { ITable h2Table = executeQuery(h2Conn, queryName, sql, needSort); try { - //compare before junit // compare the result - System.out.println("h2 Table rows count:"+h2Table.getRowCount()); - System.out.println("kylin Table rows count:"+kylinTable.getRowCount()); Assertion.assertEquals(h2Table, kylinTable); } catch (Throwable t) { printInfo("execAndCompQuery failed on: " + sqlFile.getAbsolutePath());
