http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/pom.xml ---------------------------------------------------------------------- diff --git a/dictionary/pom.xml b/dictionary/pom.xml deleted file mode 100644 index ce86c66..0000000 --- a/dictionary/pom.xml +++ /dev/null @@ -1,128 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <artifactId>kylin-dictionary</artifactId> - <packaging>jar</packaging> - <name>Kylin:Dictionary</name> - - <parent> - <groupId>org.apache.kylin</groupId> - <artifactId>kylin</artifactId> - <version>1.3-SNAPSHOT</version> - </parent> - - <properties> - </properties> - - <dependencies> - - <dependency> - <groupId>org.apache.kylin</groupId> - <artifactId>kylin-common</artifactId> - <type>test-jar</type> - <scope>test</scope> - <version>${project.parent.version}</version> - </dependency> - - <!--Kylin Jar --> - <dependency> - <groupId>org.apache.kylin</groupId> - <artifactId>kylin-metadata</artifactId> - <version>${project.parent.version}</version> - </dependency> - - <dependency> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-databind</artifactId> - </dependency> - <dependency> - <groupId>commons-io</groupId> - <artifactId>commons-io</artifactId> - </dependency> - <dependency> - <groupId>commons-configuration</groupId> - <artifactId>commons-configuration</artifactId> - </dependency> - <dependency> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - </dependency> - - <!-- Env & Test --> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-hdfs</artifactId> - <scope>provided</scope> - <!-- protobuf version conflict with hbase--> - <exclusions> - <exclusion> - <groupId>com.google.protobuf</groupId> - <artifactId>protobuf-java</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase-common</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hive.hcatalog</groupId> - <artifactId>hive-hcatalog-core</artifactId> - <version>${hive-hcatalog.version}</version> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase-testing-util</artifactId> - <version>${hbase-hadoop2.version}</version> - <scope>test</scope> - <exclusions> - <exclusion> - <groupId>javax.servlet</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - <exclusion> - <groupId>javax.servlet.jsp</groupId> - <artifactId>jsp-api</artifactId> - </exclusion> - </exclusions> - </dependency> - </dependencies> - -</project>
http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/BytesConverter.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/BytesConverter.java b/dictionary/src/main/java/org/apache/kylin/dict/BytesConverter.java deleted file mode 100644 index 1ae2dc1..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/BytesConverter.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -public interface BytesConverter<T> { - - public byte[] convertToBytes(T v); - - public T convertFromBytes(byte[] b, int offset, int length); -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java deleted file mode 100644 index 14086c6..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/DateStrDictionary.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.io.PrintStream; -import java.io.UnsupportedEncodingException; -import java.util.Date; - -import org.apache.commons.lang.StringUtils; -import org.apache.kylin.common.util.DateFormat; -import org.apache.kylin.common.util.Dictionary; - -/** - * A dictionary for date string (date only, no time). - * - * Dates are numbered from 0000-1-1 -- 0 for "0000-1-1", 1 for "0000-1-2", 2 for "0000-1-3" and - * up to 3652426 for "9999-12-31". - * - * Note the implementation is not thread-safe. - * - * @author yangli9 - */ -public class DateStrDictionary extends Dictionary<String> { - - static final int ID_9999_12_31 = 3652426; // assume 0 based - - // ============================================================================ - - private String pattern; - private int baseId; - private int maxId; - - public DateStrDictionary() { - init(DateFormat.DEFAULT_DATE_PATTERN, 0); - } - - public DateStrDictionary(String datePattern, int baseId) { - init(datePattern, baseId); - } - - private void init(String datePattern, int baseId) { - this.pattern = datePattern; - this.baseId = baseId; - this.maxId = baseId + ID_9999_12_31; - } - - @Override - public int getMinId() { - return baseId; - } - - @Override - public int getMaxId() { - return maxId; - } - - @Override - public int getSizeOfId() { - return 3; - } - - @Override - public int getSizeOfValue() { - return pattern.length(); - } - - @Override - protected boolean isNullByteForm(byte[] value, int offset, int len) { - return value == null || len == 0; - } - - @Override - final protected int getIdFromValueImpl(String value, int roundFlag) { - Date date = DateFormat.stringToDate(value, pattern); - int id = calcIdFromSeqNo(getNumOfDaysSince0000(date)); - if (id < baseId || id > maxId) - throw new IllegalArgumentException("'" + value + "' encodes to '" + id + "' which is out of range [" + baseId + "," + maxId + "]"); - - return id; - } - - @Override - final protected String getValueFromIdImpl(int id) { - if (id < baseId || id > maxId) - throw new IllegalArgumentException("ID '" + id + "' is out of range [" + baseId + "," + maxId + "]"); - Date d = getDateFromNumOfDaysSince0000(calcSeqNoFromId(id)); - return DateFormat.dateToString(d, pattern); - } - - private int getNumOfDaysSince0000(Date d) { - // 86400000 = 1000 * 60 * 60 * 24 - // -719530 is offset of 0000-01-01 - return (int) (d.getTime() / 86400000 + 719530); - } - - private Date getDateFromNumOfDaysSince0000(int n) { - long millis = ((long) n - 719530) * 86400000; - return new Date(millis); - } - - @Override - final protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) { - try { - return getIdFromValue(new String(value, offset, len, "ISO-8859-1")); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); // never happen - } - } - - @Override - final protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) { - String date = getValueFromId(id); - byte bytes[]; - try { - bytes = date.getBytes("ISO-8859-1"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); // never happen - } - System.arraycopy(bytes, 0, returnValue, offset, bytes.length); - return bytes.length; - } - - private int calcIdFromSeqNo(int seq) { - return seq < 0 ? seq : baseId + seq; - } - - private int calcSeqNoFromId(int id) { - return id - baseId; - } - - @Override - public void write(DataOutput out) throws IOException { - out.writeUTF(pattern); - out.writeInt(baseId); - } - - @Override - public void readFields(DataInput in) throws IOException { - String pattern = in.readUTF(); - int baseId = in.readInt(); - init(pattern, baseId); - } - - @Override - public int hashCode() { - return 31 * baseId + pattern.hashCode(); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof DateStrDictionary)) - return false; - DateStrDictionary that = (DateStrDictionary) o; - return StringUtils.equals(this.pattern, that.pattern) && this.baseId == that.baseId; - } - - @Override - public void dump(PrintStream out) { - out.println(this.toString()); - } - - @Override - public String toString() { - return "DateStrDictionary [pattern=" + pattern + ", baseId=" + baseId + "]"; - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java deleted file mode 100644 index 0ba6566..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import java.io.IOException; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.*; - -import com.google.common.collect.Lists; -import org.apache.commons.lang.StringUtils; -import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.common.util.*; -import org.apache.kylin.dict.lookup.ReadableTable; -import org.apache.kylin.metadata.datatype.DataType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * @author yangli9 - */ -@SuppressWarnings({ "rawtypes", "unchecked" }) -public class DictionaryGenerator { - - private static final int DICT_MAX_CARDINALITY = getDictionaryMaxCardinality(); - - private static final Logger logger = LoggerFactory.getLogger(DictionaryGenerator.class); - - private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" }; - - private static int getDictionaryMaxCardinality() { - try { - return KylinConfig.getInstanceFromEnv().getDictionaryMaxCardinality(); - } catch (Throwable e) { - return 5000000; // some test case does not KylinConfig setup properly - } - } - - public static org.apache.kylin.common.util.Dictionary<?> buildDictionaryFromValueEnumerator(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator) throws IOException{ - org.apache.kylin.common.util.Dictionary dict = null; - int baseId = 0; // always 0 for now - final int nSamples = 5; - ArrayList samples = Lists.newArrayListWithCapacity(nSamples); - - // build dict, case by data type - DataType dataType = DataType.getInstance(info.getDataType()); - if (dataType.isDateTimeFamily()) - dict = buildDateStrDict(valueEnumerator, baseId, nSamples, samples); - else if (dataType.isNumberFamily()) - dict = buildNumberDict(valueEnumerator, baseId, nSamples, samples); - else - dict = buildStringDict(valueEnumerator, baseId, nSamples, samples); - - // log a few samples - StringBuilder buf = new StringBuilder(); - for (Object s : samples) { - if (buf.length() > 0) - buf.append(", "); - buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s)); - } - logger.info("Dictionary value samples: " + buf.toString()); - logger.info("Dictionary cardinality: " + dict.getSize()); - - if (dict instanceof TrieDictionary && dict.getSize() > DICT_MAX_CARDINALITY) - throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + dict.getSize()); - - return dict; - } - - public static org.apache.kylin.common.util.Dictionary mergeDictionaries(DictionaryInfo targetInfo, List<DictionaryInfo> sourceDicts) throws IOException { - return buildDictionaryFromValueEnumerator(targetInfo, new MultipleDictionaryValueEnumerator(sourceDicts)); - } - - public static org.apache.kylin.common.util.Dictionary<?> buildDictionary(DictionaryInfo info, ReadableTable inpTable) throws IOException { - - // currently all data types are casted to string to build dictionary - // String dataType = info.getDataType(); - - IDictionaryValueEnumerator columnValueEnumerator = null; - try { - logger.info("Building dictionary " + JsonUtil.writeValueAsString(info)); - - columnValueEnumerator = new TableColumnValueEnumerator(inpTable.getReader(), info.getSourceColumnIndex()); - return buildDictionaryFromValueEnumerator(info, columnValueEnumerator); - } finally { - if (columnValueEnumerator != null) - columnValueEnumerator.close(); - } - } - - private static org.apache.kylin.common.util.Dictionary buildDateStrDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException { - final int BAD_THRESHOLD = 2; - String matchPattern = null; - byte[] value; - - for (String ptn : DATE_PATTERNS) { - matchPattern = ptn; // be optimistic - int badCount = 0; - SimpleDateFormat sdf = new SimpleDateFormat(ptn); - - while (valueEnumerator.moveNext()) { - value = valueEnumerator.current(); - if (value.length == 0) - continue; - - String str = Bytes.toString(value); - try { - sdf.parse(str); - if (samples.size() < nSamples && !samples.contains(str)) - samples.add(str); - } catch (ParseException e) { - logger.info("Unrecognized datetime value: " + str); - badCount++; - if (badCount > BAD_THRESHOLD) { - matchPattern = null; - break; - } - } - } - if (matchPattern != null) { - return new DateStrDictionary(matchPattern, baseId); - } - } - throw new IllegalStateException("Unrecognized datetime value"); - } - - private static org.apache.kylin.common.util.Dictionary buildStringDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException { - TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter()); - byte[] value; - while (valueEnumerator.moveNext()) { - value = valueEnumerator.current(); - if (value == null) - continue; - String v = Bytes.toString(value); - builder.addValue(v); - if (samples.size() < nSamples && !samples.contains(v)) - samples.add(v); - } - return builder.build(baseId); - } - - private static org.apache.kylin.common.util.Dictionary buildNumberDict(IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList samples) throws IOException { - NumberDictionaryBuilder builder = new NumberDictionaryBuilder(new StringBytesConverter()); - byte[] value; - while (valueEnumerator.moveNext()) { - value = valueEnumerator.current(); - if (value == null) - continue; - String v = Bytes.toString(value); - if (StringUtils.isBlank(v)) // empty string is null for numbers - continue; - - builder.addValue(v); - if (samples.size() < nSamples && !samples.contains(v)) - samples.add(v); - } - return builder.build(baseId); - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java deleted file mode 100644 index 645722c..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.persistence.ResourceStore; -import org.apache.kylin.common.persistence.RootPersistentEntity; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.dict.lookup.ReadableTable.TableSignature; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; -import com.fasterxml.jackson.annotation.JsonProperty; - -@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) -public class DictionaryInfo extends RootPersistentEntity { - - @JsonProperty("source_table") - private String sourceTable; - @JsonProperty("source_column") - private String sourceColumn; - @JsonProperty("source_column_index") - private int sourceColumnIndex; // 0 based - @JsonProperty("data_type") - private String dataType; - @JsonProperty("input") - private TableSignature input; - @JsonProperty("dictionary_class") - private String dictionaryClass; - @JsonProperty("cardinality") - private int cardinality; - - transient Dictionary<?> dictionaryObject; - - public DictionaryInfo() { - } - - public DictionaryInfo(String sourceTable, String sourceColumn, int sourceColumnIndex, String dataType, TableSignature input) { - - this.updateRandomUuid(); - - this.sourceTable = sourceTable; - this.sourceColumn = sourceColumn; - this.sourceColumnIndex = sourceColumnIndex; - this.dataType = dataType; - this.input = input; - } - - public DictionaryInfo(DictionaryInfo other) { - - this.updateRandomUuid(); - - this.sourceTable = other.sourceTable; - this.sourceColumn = other.sourceColumn; - this.sourceColumnIndex = other.sourceColumnIndex; - this.dataType = other.dataType; - this.input = other.input; - } - - // ---------------------------------------------------------------------------- - - public String getResourcePath() { - return ResourceStore.DICT_RESOURCE_ROOT + "/" + sourceTable + "/" + sourceColumn + "/" + uuid + ".dict"; - } - - public String getResourceDir() { - return ResourceStore.DICT_RESOURCE_ROOT + "/" + sourceTable + "/" + sourceColumn; - } - - // ---------------------------------------------------------------------------- - - // to decide if two dictionaries are built on the same table/column, - // regardless of their signature - public boolean isDictOnSameColumn(DictionaryInfo other) { - return this.sourceTable.equalsIgnoreCase(other.sourceTable) && this.sourceColumn.equalsIgnoreCase(other.sourceColumn) && this.sourceColumnIndex == other.sourceColumnIndex && this.dataType.equalsIgnoreCase(other.dataType) && this.dictionaryClass.equalsIgnoreCase(other.dictionaryClass); - } - - public String getSourceTable() { - return sourceTable; - } - - public void setSourceTable(String sourceTable) { - this.sourceTable = sourceTable; - } - - public String getSourceColumn() { - return sourceColumn; - } - - public void setSourceColumn(String sourceColumn) { - this.sourceColumn = sourceColumn; - } - - public int getSourceColumnIndex() { - return sourceColumnIndex; - } - - public void setSourceColumnIndex(int sourceColumnIndex) { - this.sourceColumnIndex = sourceColumnIndex; - } - - public String getDataType() { - return dataType; - } - - public void setDataType(String dataType) { - this.dataType = dataType; - } - - public TableSignature getInput() { - return input; - } - - public void setInput(TableSignature input) { - this.input = input; - } - - public String getDictionaryClass() { - return dictionaryClass; - } - - public void setDictionaryClass(String dictionaryClass) { - this.dictionaryClass = dictionaryClass; - } - - public Dictionary<?> getDictionaryObject() { - return dictionaryObject; - } - - public void setDictionaryObject(Dictionary<?> dictionaryObject) { - this.dictionaryObject = dictionaryObject; - } - - public int getCardinality() { - return cardinality; - } - - public void setCardinality(int cardinality) { - this.cardinality = cardinality; - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java deleted file mode 100644 index 6381643..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfoSerializer.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; - -import org.apache.kylin.common.persistence.Serializer; -import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.common.util.JsonUtil; - -/** - * @author yangli9 - * - */ -public class DictionaryInfoSerializer implements Serializer<DictionaryInfo> { - - public static final DictionaryInfoSerializer FULL_SERIALIZER = new DictionaryInfoSerializer(false); - public static final DictionaryInfoSerializer INFO_SERIALIZER = new DictionaryInfoSerializer(true); - - private boolean infoOnly; - - public DictionaryInfoSerializer() { - this(false); - } - - public DictionaryInfoSerializer(boolean infoOnly) { - this.infoOnly = infoOnly; - } - - @Override - public void serialize(DictionaryInfo obj, DataOutputStream out) throws IOException { - String json = JsonUtil.writeValueAsIndentString(obj); - out.writeUTF(json); - - if (infoOnly == false) - obj.getDictionaryObject().write(out); - } - - @Override - public DictionaryInfo deserialize(DataInputStream in) throws IOException { - String json = in.readUTF(); - DictionaryInfo obj = JsonUtil.readValue(json, DictionaryInfo.class); - - if (infoOnly == false) { - Dictionary<?> dict; - try { - dict = (Dictionary<?>) ClassUtil.forName(obj.getDictionaryClass(), Dictionary.class).newInstance(); - } catch (InstantiationException e) { - throw new RuntimeException(e); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } catch (ClassNotFoundException e) { - throw new RuntimeException(e); - } - dict.readFields(in); - obj.setDictionaryObject(dict); - } - return obj; - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java deleted file mode 100644 index 9294e03..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; - -import org.apache.commons.compress.utils.IOUtils; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.kylin.common.KylinConfig; -import org.apache.kylin.common.persistence.ResourceStore; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.common.util.HadoopUtil; -import org.apache.kylin.dict.lookup.FileTable; -import org.apache.kylin.dict.lookup.HiveTable; -import org.apache.kylin.dict.lookup.ReadableTable; -import org.apache.kylin.dict.lookup.ReadableTable.TableSignature; -import org.apache.kylin.metadata.MetadataManager; -import org.apache.kylin.metadata.model.DataModelDesc; -import org.apache.kylin.metadata.model.TblColRef; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; - -public class DictionaryManager { - - private static final Logger logger = LoggerFactory.getLogger(DictionaryManager.class); - - private static final DictionaryInfo NONE_INDICATOR = new DictionaryInfo(); - - // static cached instances - private static final ConcurrentHashMap<KylinConfig, DictionaryManager> CACHE = new ConcurrentHashMap<KylinConfig, DictionaryManager>(); - - public static DictionaryManager getInstance(KylinConfig config) { - DictionaryManager r = CACHE.get(config); - if (r == null) { - r = new DictionaryManager(config); - CACHE.put(config, r); - if (CACHE.size() > 1) { - logger.warn("More than one singleton exist"); - } - } - return r; - } - - public static void clearCache() { - CACHE.clear(); - } - - // ============================================================================ - - private KylinConfig config; - private LoadingCache<String, DictionaryInfo> dictCache; - - // path ==> - // DictionaryInfo - - private DictionaryManager(KylinConfig config) { - this.config = config; - this.dictCache = CacheBuilder.newBuilder().maximumSize(KylinConfig.getInstanceFromEnv().getCachedDictMaxEntrySize())// - .expireAfterWrite(1, TimeUnit.DAYS).build(new CacheLoader<String, DictionaryInfo>() { - - @Override - public DictionaryInfo load(String key) throws Exception { - DictionaryInfo dictInfo = DictionaryManager.this.load(key, true); - if (dictInfo == null) { - return NONE_INDICATOR; - } else { - return dictInfo; - } - } - }); - } - - public Dictionary<?> getDictionary(String resourcePath) throws IOException { - DictionaryInfo dictInfo = getDictionaryInfo(resourcePath); - return dictInfo == null ? null : dictInfo.getDictionaryObject(); - } - - public DictionaryInfo getDictionaryInfo(String resourcePath) throws IOException { - try { - DictionaryInfo result = dictCache.get(resourcePath); - if (result == NONE_INDICATOR) { - return null; - } else { - return result; - } - } catch (ExecutionException e) { - throw new RuntimeException(e.getCause()); - } - } - - public DictionaryInfo trySaveNewDict(Dictionary<?> newDict, DictionaryInfo newDictInfo) throws IOException { - - String dupDict = checkDupByContent(newDictInfo, newDict); - if (dupDict != null) { - logger.info("Identical dictionary content " + newDict + ", reuse existing dictionary at " + dupDict); - return getDictionaryInfo(dupDict); - } - - newDictInfo.setCardinality(newDict.getSize()); - newDictInfo.setDictionaryObject(newDict); - newDictInfo.setDictionaryClass(newDict.getClass().getName()); - - save(newDictInfo); - dictCache.put(newDictInfo.getResourcePath(), newDictInfo); - - return newDictInfo; - } - - public DictionaryInfo mergeDictionary(List<DictionaryInfo> dicts) throws IOException { - if (dicts.size() == 0) - return null; - - if (dicts.size() == 1) - return dicts.get(0); - - DictionaryInfo firstDictInfo = null; - int totalSize = 0; - for (DictionaryInfo info : dicts) { - // check - if (firstDictInfo == null) { - firstDictInfo = info; - } else { - if (!firstDictInfo.isDictOnSameColumn(info)) { - // don't throw exception, just output warning as legacy cube segment may build dict on PK - logger.warn("Merging dictionaries are not structurally equal : " + firstDictInfo.getResourcePath() + " and " + info.getResourcePath()); - } - } - totalSize += info.getInput().getSize(); - } - - if (firstDictInfo == null) { - throw new IllegalArgumentException("DictionaryManager.mergeDictionary input cannot be null"); - } - - DictionaryInfo newDictInfo = new DictionaryInfo(firstDictInfo); - TableSignature signature = newDictInfo.getInput(); - signature.setSize(totalSize); - signature.setLastModifiedTime(System.currentTimeMillis()); - signature.setPath("merged_with_no_original_path"); - - String dupDict = checkDupByInfo(newDictInfo); - if (dupDict != null) { - logger.info("Identical dictionary input " + newDictInfo.getInput() + ", reuse existing dictionary at " + dupDict); - return getDictionaryInfo(dupDict); - } - - // check for cases where merging dicts are actually same - boolean identicalSourceDicts = true; - for (int i = 1; i < dicts.size(); ++i) { - if (!dicts.get(0).getDictionaryObject().equals(dicts.get(i).getDictionaryObject())) { - identicalSourceDicts = false; - break; - } - } - if (identicalSourceDicts) { - logger.info("Use one of the merging dictionaries directly"); - return dicts.get(0); - } - - Dictionary<?> newDict = DictionaryGenerator.mergeDictionaries(newDictInfo, dicts); - return trySaveNewDict(newDict, newDictInfo); - } - - public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { - - logger.info("building dictionary for " + col); - - Object[] tmp = decideSourceData(model, dict, col, factColumnsPath); - String srcTable = (String) tmp[0]; - String srcCol = (String) tmp[1]; - int srcColIdx = (Integer) tmp[2]; - ReadableTable inpTable = (ReadableTable) tmp[3]; - - if (!inpTable.exists()) - return null; - - DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature()); - - String dupDict = checkDupByInfo(dictInfo); - if (dupDict != null) { - logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); - return getDictionaryInfo(dupDict); - } - - Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable); - - return trySaveNewDict(dictionary, dictInfo); - } - - /** - * Get column origin - * - * @return 1. source table name - * 2. column name - * 3. column cardinal in source table - * 4. ReadableTable object - */ - - public Object[] decideSourceData(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { - String srcTable; - String srcCol; - int srcColIdx; - ReadableTable table; - MetadataManager metaMgr = MetadataManager.getInstance(config); - - // case of full table (dict on fact table) - if (model == null) { - srcTable = col.getTable(); - srcCol = col.getName(); - srcColIdx = col.getColumn().getZeroBasedIndex(); - int nColumns = metaMgr.getTableDesc(col.getTable()).getColumnCount(); - table = new FileTable(factColumnsPath + "/" + col.getName(), nColumns); - return new Object[] { srcTable, srcCol, srcColIdx, table }; - } - - // Decide source data of dictionary: - // 1. If 'useDict' specifies pre-defined data set, use that - // 2. Otherwise find a lookup table to scan through - - // Note FK on fact table is supported by scan the related PK on lookup - // table - - //String useDict = cube.getRowkey().getDictionary(col); - - // normal case, source from lookup table - if ("true".equals(dict) || "string".equals(dict) || "number".equals(dict) || "any".equals(dict)) { - // FK on fact table and join type is inner, use PK from lookup instead - if (model.isFactTable(col.getTable())) { - TblColRef pkCol = model.findPKByFK(col, "inner"); - if (pkCol != null) - col = pkCol; // scan the counterparty PK on lookup table instead - } - srcTable = col.getTable(); - srcCol = col.getName(); - srcColIdx = col.getColumn().getZeroBasedIndex(); - if (model.isFactTable(col.getTable())) { - table = new FileTable(factColumnsPath + "/" + col.getName(), -1); - } else { - table = new HiveTable(metaMgr, col.getTable()); - } - } - // otherwise could refer to a data set, e.g. common_indicators.txt - // (LEGACY PATH, since distinct values are collected from fact table) - else { - String dictDataSetPath = unpackDataSet(this.config.getTempHDFSDir(), dict); - if (dictDataSetPath == null) - throw new IllegalArgumentException("Unknown dictionary data set '" + dict + "', referred from " + col); - srcTable = "PREDEFINED"; - srcCol = dict; - srcColIdx = 0; - table = new FileTable(dictDataSetPath, -1); - } - - return new Object[] { srcTable, srcCol, srcColIdx, table }; - } - - private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException { - - InputStream in = this.getClass().getResourceAsStream("/org/apache/kylin/dict/" + dataSetName + ".txt"); - if (in == null) // data set resource not found - return null; - - ByteArrayOutputStream buf = new ByteArrayOutputStream(); - IOUtils.copy(in, buf); - in.close(); - byte[] bytes = buf.toByteArray(); - - Path tmpDataSetPath = new Path(tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt"); - - FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir); - boolean writtenNewFile = false; - if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) { - fs.mkdirs(tmpDataSetPath.getParent()); - FSDataOutputStream out = fs.create(tmpDataSetPath); - IOUtils.copy(new ByteArrayInputStream(bytes), out); - out.close(); - writtenNewFile = true; - } - - String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString(); - if (writtenNewFile) - logger.info("Dictionary temp data set file written to " + qualifiedPath); - return qualifiedPath; - } - - private String checkDupByInfo(DictionaryInfo dictInfo) throws IOException { - ResourceStore store = MetadataManager.getInstance(config).getStore(); - ArrayList<String> existings = store.listResources(dictInfo.getResourceDir()); - if (existings == null) - return null; - - TableSignature input = dictInfo.getInput(); - for (String existing : existings) { - DictionaryInfo existingInfo = load(existing, false); // skip cache, direct load from store - if (input.equals(existingInfo.getInput())) - return existing; - } - - return null; - } - - private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<?> dict) throws IOException { - ResourceStore store = MetadataManager.getInstance(config).getStore(); - ArrayList<String> existings = store.listResources(dictInfo.getResourceDir()); - if (existings == null) - return null; - - for (String existing : existings) { - logger.info("Checking dup dict :" + existing); - DictionaryInfo existingInfo = load(existing, true); // skip cache, direct load from store - if (existingInfo == null) - logger.info("existingInfo is null"); - - if (existingInfo != null && dict.equals(existingInfo.getDictionaryObject())) - return existing; - } - - return null; - } - - public void removeDictionary(String resourcePath) throws IOException { - ResourceStore store = MetadataManager.getInstance(config).getStore(); - store.deleteResource(resourcePath); - dictCache.invalidate(resourcePath); - } - - public void removeDictionaries(String srcTable, String srcCol) throws IOException { - DictionaryInfo info = new DictionaryInfo(); - info.setSourceTable(srcTable); - info.setSourceColumn(srcCol); - - ResourceStore store = MetadataManager.getInstance(config).getStore(); - ArrayList<String> existings = store.listResources(info.getResourceDir()); - if (existings == null) - return; - - for (String existing : existings) - removeDictionary(existing); - } - - void save(DictionaryInfo dict) throws IOException { - ResourceStore store = MetadataManager.getInstance(config).getStore(); - String path = dict.getResourcePath(); - logger.info("Saving dictionary at " + path); - store.putResource(path, dict, DictionaryInfoSerializer.FULL_SERIALIZER); - } - - DictionaryInfo load(String resourcePath, boolean loadDictObj) throws IOException { - ResourceStore store = MetadataManager.getInstance(config).getStore(); - - logger.debug("Going to load DictionaryInfo from " + resourcePath); - DictionaryInfo info = store.getResource(resourcePath, DictionaryInfo.class, loadDictObj ? DictionaryInfoSerializer.FULL_SERIALIZER : DictionaryInfoSerializer.INFO_SERIALIZER); - - if (loadDictObj) - logger.debug("Loaded dictionary at " + resourcePath); - - return info; - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java b/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java deleted file mode 100644 index ecf980a..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/IDictionaryValueEnumerator.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import java.io.IOException; - -/** - * Created by dongli on 10/28/15. - */ -public interface IDictionaryValueEnumerator { - byte[] current() throws IOException; - - boolean moveNext() throws IOException; - - void close() throws IOException; -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/ISegment.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/ISegment.java b/dictionary/src/main/java/org/apache/kylin/dict/ISegment.java deleted file mode 100644 index 68368b5..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/ISegment.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.metadata.model.TblColRef; - -/** - * Created by Hongbin Ma(Binmahone) on 12/17/14. - */ -public interface ISegment { - - public abstract int getColumnLength(TblColRef col); - - public abstract Dictionary<String> getDictionary(TblColRef col); - - public String getName(); - - public String getUuid(); -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/ListDictionaryValueEnumerator.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/ListDictionaryValueEnumerator.java b/dictionary/src/main/java/org/apache/kylin/dict/ListDictionaryValueEnumerator.java deleted file mode 100644 index 9ae08a8..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/ListDictionaryValueEnumerator.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import org.apache.kylin.dict.IDictionaryValueEnumerator; - -import java.io.IOException; -import java.util.List; -import java.util.ListIterator; - -/** - * Created by dongli on 10/28/15. - */ -public class ListDictionaryValueEnumerator implements IDictionaryValueEnumerator { - ListIterator<byte[]> listIterator; - - public ListDictionaryValueEnumerator(List<byte[]> list) { - listIterator = list.listIterator(); - } - - @Override - public byte[] current() throws IOException { - return listIterator.next(); - } - - @Override - public boolean moveNext() throws IOException { - return listIterator.hasNext(); - } - - @Override - public void close() throws IOException { - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java b/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java deleted file mode 100644 index 43d62a3..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/MultipleDictionaryValueEnumerator.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import com.google.common.collect.Lists; -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.common.util.Dictionary; - -import java.io.IOException; -import java.util.List; - -/** - * Created by dongli on 10/28/15. - */ -@SuppressWarnings("rawtypes") -public class MultipleDictionaryValueEnumerator implements IDictionaryValueEnumerator { - private int curDictIndex = 0; - private Dictionary curDict; - private int curKey; - private byte[] curValue = null; - private List<Dictionary> dictionaryList; - - public MultipleDictionaryValueEnumerator(List<DictionaryInfo> dictionaryInfoList) { - dictionaryList = Lists.newArrayListWithCapacity(dictionaryInfoList.size()); - for (DictionaryInfo dictInfo : dictionaryInfoList) { - dictionaryList.add(dictInfo.getDictionaryObject()); - } - if (!dictionaryList.isEmpty()) { - curDict = dictionaryList.get(0); - curKey = curDict.getMinId(); - } - } - - @Override - public byte[] current() throws IOException { - return curValue; - } - - @Override - public boolean moveNext() throws IOException { - if (curDictIndex < dictionaryList.size() && curKey <= curDict.getMaxId()) { - byte[] buffer = new byte[curDict.getSizeOfValue()]; - int size = curDict.getValueBytesFromId(curKey, buffer, 0); - curValue = Bytes.copy(buffer, 0, size); - - if (++curKey > curDict.getMaxId()) { - if (++curDictIndex < dictionaryList.size()) { - curDict = dictionaryList.get(curDictIndex); - curKey = curDict.getMinId(); - } - } - - return true; - } - curValue = null; - return false; - } - - @Override - public void close() throws IOException { - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java deleted file mode 100644 index edab9d6..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/NumberDictionary.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.util.Bytes; - -/** - * @author yangli9 - * - */ -public class NumberDictionary<T> extends TrieDictionary<T> { - - public static final int MAX_DIGITS_BEFORE_DECIMAL_POINT = 16; - - // encode a number into an order preserving byte sequence - // for positives -- padding '0' - // for negatives -- '-' sign, padding '9', invert digits, and terminate by ';' - static class NumberBytesCodec { - - byte[] buf = new byte[MAX_DIGITS_BEFORE_DECIMAL_POINT * 2]; - int bufOffset = 0; - int bufLen = 0; - - void encodeNumber(byte[] value, int offset, int len) { - if (len == 0) { - bufOffset = 0; - bufLen = 0; - return; - } - - if (len > buf.length) { - throw new IllegalArgumentException("Too many digits for NumberDictionary: " + Bytes.toString(value, offset, len) + ". Internal buffer is only " + buf.length + " bytes"); - } - - boolean negative = value[offset] == '-'; - - // terminate negative ';' - int start = buf.length - len; - int end = buf.length; - if (negative) { - start--; - end--; - buf[end] = ';'; - } - - // copy & find decimal point - int decimalPoint = end; - for (int i = start, j = offset; i < end; i++, j++) { - buf[i] = value[j]; - if (buf[i] == '.' && i < decimalPoint) { - decimalPoint = i; - } - } - // remove '-' sign - if (negative) { - start++; - } - - // prepend '0' - int nZeroPadding = MAX_DIGITS_BEFORE_DECIMAL_POINT - (decimalPoint - start); - if (nZeroPadding < 0 || nZeroPadding + 1 > start) - throw new IllegalArgumentException("Too many digits for NumberDictionary: " + Bytes.toString(value, offset, len) + ". Expect " + MAX_DIGITS_BEFORE_DECIMAL_POINT + " digits before decimal point at max."); - for (int i = 0; i < nZeroPadding; i++) { - buf[--start] = '0'; - } - - // consider negative - if (negative) { - buf[--start] = '-'; - for (int i = start + 1; i < buf.length; i++) { - int c = buf[i]; - if (c >= '0' && c <= '9') { - buf[i] = (byte) ('9' - (c - '0')); - } - } - } else { - buf[--start] = '0'; - } - - bufOffset = start; - bufLen = buf.length - start; - } - - int decodeNumber(byte[] returnValue, int offset) { - if (bufLen == 0) { - return 0; - } - - int in = bufOffset; - int end = bufOffset + bufLen; - int out = offset; - - // sign - boolean negative = buf[in] == '-'; - if (negative) { - returnValue[out++] = '-'; - in++; - end--; - } - - // remove padding - byte padding = (byte) (negative ? '9' : '0'); - for (; in < end; in++) { - if (buf[in] != padding) - break; - } - - // all paddings before '.', special case for '0' - if (in == end || !(buf[in] >= '0' && buf[in] <= '9')) { - returnValue[out++] = '0'; - } - - // copy the rest - if (negative) { - for (; in < end; in++, out++) { - int c = buf[in]; - if (c >= '0' && c <= '9') { - c = '9' - (c - '0'); - } - returnValue[out] = (byte) c; - } - } else { - System.arraycopy(buf, in, returnValue, out, end - in); - out += end - in; - } - - return out - offset; - } - } - - static ThreadLocal<NumberBytesCodec> localCodec = new ThreadLocal<NumberBytesCodec>(); - - // ============================================================================ - - public NumberDictionary() { // default constructor for Writable interface - super(); - } - - public NumberDictionary(byte[] trieBytes) { - super(trieBytes); - } - - private NumberBytesCodec getCodec() { - NumberBytesCodec codec = localCodec.get(); - if (codec == null) { - codec = new NumberBytesCodec(); - localCodec.set(codec); - } - return codec; - } - - @Override - protected boolean isNullObjectForm(T value) { - return value == null || value.equals(""); - } - - @Override - protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) { - NumberBytesCodec codec = getCodec(); - codec.encodeNumber(value, offset, len); - return super.getIdFromValueBytesImpl(codec.buf, codec.bufOffset, codec.bufLen, roundingFlag); - } - - @Override - protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) { - NumberBytesCodec codec = getCodec(); - codec.bufOffset = 0; - codec.bufLen = super.getValueBytesFromIdImpl(id, codec.buf, 0); - return codec.decodeNumber(returnValue, offset); - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java b/dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java deleted file mode 100644 index c35a259..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.util.Bytes; - -/** - * @author yangli9 - * - */ -public class NumberDictionaryBuilder<T> extends TrieDictionaryBuilder<T> { - - NumberDictionary.NumberBytesCodec codec = new NumberDictionary.NumberBytesCodec(); - - public NumberDictionaryBuilder(BytesConverter<T> bytesConverter) { - super(bytesConverter); - } - - @Override - public void addValue(byte[] value) { - codec.encodeNumber(value, 0, value.length); - byte[] copy = Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen); - super.addValue(copy); - } - - public NumberDictionary<T> build(int baseId) { - byte[] trieBytes = buildTrieBytes(baseId); - NumberDictionary<T> r = new NumberDictionary<T>(trieBytes); - return r; - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/StringBytesConverter.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/StringBytesConverter.java b/dictionary/src/main/java/org/apache/kylin/dict/StringBytesConverter.java deleted file mode 100644 index 83e9d2d..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/StringBytesConverter.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.util.Bytes; - -public class StringBytesConverter implements BytesConverter<String> { - - @Override - public byte[] convertToBytes(String v) { - return Bytes.toBytes(v); - } - - @Override - public String convertFromBytes(byte[] b, int offset, int length) { - return Bytes.toString(b, offset, length); - } - -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/TableColumnValueEnumerator.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/TableColumnValueEnumerator.java b/dictionary/src/main/java/org/apache/kylin/dict/TableColumnValueEnumerator.java deleted file mode 100644 index 42f5791..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/TableColumnValueEnumerator.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import org.apache.kylin.common.util.Bytes; -import org.apache.kylin.dict.lookup.ReadableTable; - -import java.io.IOException; -import java.util.Arrays; - -/** - * Created by dongli on 10/28/15. - */ -public class TableColumnValueEnumerator implements IDictionaryValueEnumerator { - - private ReadableTable.TableReader reader; - private int colIndex; - private byte[] colValue; - - public TableColumnValueEnumerator(ReadableTable.TableReader reader, int colIndex) { - this.reader = reader; - this.colIndex = colIndex; - } - - @Override - public boolean moveNext() throws IOException { - if (reader.next()) { - String colStrValue; - String[] split = reader.getRow(); - if (split.length == 1) { - colStrValue = split[0]; - } else { - // normal case - if (split.length <= colIndex) { - throw new ArrayIndexOutOfBoundsException("Column no. " + colIndex + " not found, line split is " + Arrays.asList(split)); - } - colStrValue = split[colIndex]; - } - - colValue = Bytes.toBytes(colStrValue); - return true; - - } else { - colValue = null; - return false; - } - } - - @Override - public void close() throws IOException { - if (reader != null) - reader.close(); - } - - @Override - public byte[] current() { - return colValue; - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/6b6aa313/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java ---------------------------------------------------------------------- diff --git a/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java b/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java deleted file mode 100644 index 2b6d77d..0000000 --- a/dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java +++ /dev/null @@ -1,428 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.dict; - -import java.io.ByteArrayInputStream; -import java.io.DataInput; -import java.io.DataInputStream; -import java.io.DataOutput; -import java.io.IOException; -import java.io.PrintStream; -import java.lang.ref.SoftReference; -import java.util.Arrays; -import java.util.HashMap; - -import org.apache.kylin.common.util.BytesUtil; -import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A dictionary based on Trie data structure that maps enumerations of byte[] to - * int IDs. - * - * With Trie the memory footprint of the mapping is kinda minimized at the cost - * CPU, if compared to HashMap of ID Arrays. Performance test shows Trie is - * roughly 10 times slower, so there's a cache layer overlays on top of Trie and - * gracefully fall back to Trie using a weak reference. - * - * The implementation is thread-safe. - * - * @author yangli9 - */ -@SuppressWarnings({ "rawtypes", "unchecked" }) -public class TrieDictionary<T> extends Dictionary<T> { - - public static final byte[] HEAD_MAGIC = new byte[] { 0x54, 0x72, 0x69, 0x65, 0x44, 0x69, 0x63, 0x74 }; // "TrieDict" - public static final int HEAD_SIZE_I = HEAD_MAGIC.length; - - public static final int BIT_IS_LAST_CHILD = 0x80; - public static final int BIT_IS_END_OF_VALUE = 0x40; - - private static final Logger logger = LoggerFactory.getLogger(TrieDictionary.class); - - private byte[] trieBytes; - - // non-persistent part - transient private int headSize; - @SuppressWarnings("unused") - transient private int bodyLen; - transient private int sizeChildOffset; - transient private int sizeNoValuesBeneath; - transient private int baseId; - transient private int maxValueLength; - transient private BytesConverter<T> bytesConvert; - - transient private int nValues; - transient private int sizeOfId; - transient private int childOffsetMask; - transient private int firstByteOffset; - - transient private boolean enableCache = true; - transient private SoftReference<HashMap> valueToIdCache; - transient private SoftReference<Object[]> idToValueCache; - - public TrieDictionary() { // default constructor for Writable interface - } - - public TrieDictionary(byte[] trieBytes) { - init(trieBytes); - } - - private void init(byte[] trieBytes) { - this.trieBytes = trieBytes; - if (BytesUtil.compareBytes(HEAD_MAGIC, 0, trieBytes, 0, HEAD_MAGIC.length) != 0) - throw new IllegalArgumentException("Wrong file type (magic does not match)"); - - try { - DataInputStream headIn = new DataInputStream( // - new ByteArrayInputStream(trieBytes, HEAD_SIZE_I, trieBytes.length - HEAD_SIZE_I)); - this.headSize = headIn.readShort(); - this.bodyLen = headIn.readInt(); - this.sizeChildOffset = headIn.read(); - this.sizeNoValuesBeneath = headIn.read(); - this.baseId = headIn.readShort(); - this.maxValueLength = headIn.readShort(); - - String converterName = headIn.readUTF(); - if (!converterName.isEmpty()) - this.bytesConvert = (BytesConverter<T>) ClassUtil.forName(converterName, BytesConverter.class).newInstance(); - - this.nValues = BytesUtil.readUnsigned(trieBytes, headSize + sizeChildOffset, sizeNoValuesBeneath); - this.sizeOfId = BytesUtil.sizeForValue(baseId + nValues + 1); // note baseId could raise 1 byte in ID space, +1 to reserve all 0xFF for NULL case - this.childOffsetMask = ~((BIT_IS_LAST_CHILD | BIT_IS_END_OF_VALUE) << ((sizeChildOffset - 1) * 8)); - this.firstByteOffset = sizeChildOffset + sizeNoValuesBeneath + 1; // the offset from begin of node to its first value byte - } catch (Exception e) { - if (e instanceof RuntimeException) - throw (RuntimeException) e; - else - throw new RuntimeException(e); - } - - if (enableCache) { - valueToIdCache = new SoftReference<HashMap>(new HashMap()); - idToValueCache = new SoftReference<Object[]>(new Object[nValues]); - } - } - - @Override - public int getMinId() { - return baseId; - } - - @Override - public int getMaxId() { - return baseId + nValues - 1; - } - - @Override - public int getSizeOfId() { - return sizeOfId; - } - - @Override - public int getSizeOfValue() { - return maxValueLength; - } - - @Override - final protected int getIdFromValueImpl(T value, int roundingFlag) { - if (enableCache && roundingFlag == 0) { - HashMap cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when short of memory - if (cache != null) { - Integer id = null; - id = (Integer) cache.get(value); - if (id != null) - return id.intValue(); - - byte[] valueBytes = bytesConvert.convertToBytes(value); - id = getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag); - - cache.put(value, id); - return id; - } - } - byte[] valueBytes = bytesConvert.convertToBytes(value); - return getIdFromValueBytes(valueBytes, 0, valueBytes.length, roundingFlag); - } - - @Override - protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, int roundingFlag) { - int seq = lookupSeqNoFromValue(headSize, value, offset, offset + len, roundingFlag); - int id = calcIdFromSeqNo(seq); - if (id < 0) - throw new IllegalArgumentException("Not a valid value: '" + bytesConvert.convertFromBytes(value, offset, len) + "'"); - return id; - } - - /** - * returns a code point from [0, nValues), preserving order of value - * - * @param n - * -- the offset of current node - * @param inp - * -- input value bytes to lookup - * @param o - * -- offset in the input value bytes matched so far - * @param inpEnd - * -- end of input - * @param roundingFlag - * -- =0: return -1 if not found -- <0: return closest smaller if - * not found, might be -1 -- >0: return closest bigger if not - * found, might be nValues - */ - private int lookupSeqNoFromValue(int n, byte[] inp, int o, int inpEnd, int roundingFlag) { - if (o == inpEnd) // special 'empty' value - return checkFlag(headSize, BIT_IS_END_OF_VALUE) ? 0 : roundSeqNo(roundingFlag, -1, -1, 0); - - int seq = 0; // the sequence no under track - - while (true) { - // match the current node, note [0] of node's value has been matched - // when this node is selected by its parent - int p = n + firstByteOffset; // start of node's value - int end = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); // end of node's value - for (p++; p < end && o < inpEnd; p++, o++) { // note matching start from [1] - if (trieBytes[p] != inp[o]) { - int comp = BytesUtil.compareByteUnsigned(trieBytes[p], inp[o]); - if (comp < 0) { - seq += BytesUtil.readUnsigned(trieBytes, n + sizeChildOffset, sizeNoValuesBeneath); - } - return roundSeqNo(roundingFlag, seq - 1, -1, seq); // mismatch - } - } - - // node completely matched, is input all consumed? - boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE); - if (o == inpEnd) { - return p == end && isEndOfValue ? seq : roundSeqNo(roundingFlag, seq - 1, -1, seq); // input all matched - } - if (isEndOfValue) - seq++; - - // find a child to continue - int c = headSize + (BytesUtil.readUnsigned(trieBytes, n, sizeChildOffset) & childOffsetMask); - if (c == headSize) // has no children - return roundSeqNo(roundingFlag, seq - 1, -1, seq); // input only partially matched - byte inpByte = inp[o]; - int comp; - while (true) { - p = c + firstByteOffset; - comp = BytesUtil.compareByteUnsigned(trieBytes[p], inpByte); - if (comp == 0) { // continue in the matching child, reset n and - // loop again - n = c; - o++; - break; - } else if (comp < 0) { // try next child - seq += BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath); - if (checkFlag(c, BIT_IS_LAST_CHILD)) - return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input - c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); - } else { // children are ordered by their first value byte - return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input - } - } - } - } - - private int roundSeqNo(int roundingFlag, int i, int j, int k) { - if (roundingFlag == 0) - return j; - else if (roundingFlag < 0) - return i; - else - return k; - } - - @Override - final protected T getValueFromIdImpl(int id) { - if (enableCache) { - Object[] cache = idToValueCache.get(); // SoftReference to skip cache gracefully when short of memory - if (cache != null) { - int seq = calcSeqNoFromId(id); - if (seq < 0 || seq >= nValues) - throw new IllegalArgumentException("Not a valid ID: " + id); - if (cache[seq] != null) - return (T) cache[seq]; - - byte[] value = new byte[getSizeOfValue()]; - int length = getValueBytesFromId(id, value, 0); - T result = bytesConvert.convertFromBytes(value, 0, length); - - cache[seq] = result; - return result; - } - } - byte[] value = new byte[getSizeOfValue()]; - int length = getValueBytesFromId(id, value, 0); - return bytesConvert.convertFromBytes(value, 0, length); - } - - @Override - protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) { - if (id < baseId || id >= baseId + nValues) - throw new IllegalArgumentException("Not a valid ID: " + id); - - int seq = calcSeqNoFromId(id); - - return lookupValueFromSeqNo(headSize, seq, returnValue, offset); - } - - /** - * returns a code point from [0, nValues), preserving order of value, or -1 - * if not found - * - * @param n - * -- the offset of current node - * @param seq - * -- the code point under track - * @param returnValue - * -- where return value is written to - */ - private int lookupValueFromSeqNo(int n, int seq, byte[] returnValue, int offset) { - int o = offset; - while (true) { - // write current node value - int p = n + firstByteOffset; - int len = BytesUtil.readUnsigned(trieBytes, p - 1, 1); - System.arraycopy(trieBytes, p, returnValue, o, len); - o += len; - - // if the value is ended - boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE); - if (isEndOfValue) { - seq--; - if (seq < 0) - return o - offset; - } - - // find a child to continue - int c = headSize + (BytesUtil.readUnsigned(trieBytes, n, sizeChildOffset) & childOffsetMask); - if (c == headSize) // has no children - return -1; // no child? corrupted dictionary! - int nValuesBeneath; - while (true) { - nValuesBeneath = BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath); - if (seq - nValuesBeneath < 0) { // value is under this child, reset n and loop again - n = c; - break; - } else { // go to next child - seq -= nValuesBeneath; - if (checkFlag(c, BIT_IS_LAST_CHILD)) - return -1; // no more child? corrupted dictionary! - p = c + firstByteOffset; - c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); - } - } - } - } - - private boolean checkFlag(int offset, int bit) { - return (trieBytes[offset] & bit) > 0; - } - - private int calcIdFromSeqNo(int seq) { - if (seq < 0 || seq >= nValues) - return -1; - else - return baseId + seq; - } - - private int calcSeqNoFromId(int id) { - return id - baseId; - } - - @Override - public void write(DataOutput out) throws IOException { - out.write(trieBytes); - } - - @Override - public void readFields(DataInput in) throws IOException { - byte[] headPartial = new byte[HEAD_MAGIC.length + Short.SIZE + Integer.SIZE]; - in.readFully(headPartial); - - if (BytesUtil.compareBytes(HEAD_MAGIC, 0, headPartial, 0, HEAD_MAGIC.length) != 0) - throw new IllegalArgumentException("Wrong file type (magic does not match)"); - - DataInputStream headIn = new DataInputStream( // - new ByteArrayInputStream(headPartial, HEAD_SIZE_I, headPartial.length - HEAD_SIZE_I)); - int headSize = headIn.readShort(); - int bodyLen = headIn.readInt(); - headIn.close(); - - byte[] all = new byte[headSize + bodyLen]; - System.arraycopy(headPartial, 0, all, 0, headPartial.length); - in.readFully(all, headPartial.length, all.length - headPartial.length); - - init(all); - } - - @Override - public void dump(PrintStream out) { - out.println("Total " + nValues + " values"); - for (int i = 0; i < nValues; i++) { - int id = calcIdFromSeqNo(i); - T value = getValueFromId(id); - out.println(id + " (" + Integer.toHexString(id) + "): " + value); - } - } - - @Override - public int hashCode() { - return Arrays.hashCode(trieBytes); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof TrieDictionary)) { - logger.info("Equals return false because o is not TrieDictionary"); - return false; - } - TrieDictionary that = (TrieDictionary) o; - return Arrays.equals(this.trieBytes, that.trieBytes); - } - - public static void main(String[] args) throws Exception { - TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); - b.addValue(""); - b.print(); - b.addValue("part"); - b.print(); - b.addValue("part"); - b.print(); - b.addValue("par"); - b.print(); - b.addValue("partition"); - b.print(); - b.addValue("party"); - b.print(); - b.addValue("parties"); - b.print(); - b.addValue("paint"); - b.print(); - TrieDictionary<String> dict = b.build(0); - - dict.dump(System.out); - - dict.getIdFromValueBytes(new byte[10], 0, 0); - } -}
