This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 88cc4353ef7 Fix some properties in ColumnMetadata (#16921)
88cc4353ef7 is described below
commit 88cc4353ef79bad5b63cf3b8425102b547c63b5d
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Tue Sep 30 14:13:02 2025 -0700
Fix some properties in ColumnMetadata (#16921)
---
.../creator/impl/SegmentColumnarIndexCreator.java | 57 ++++--
.../segment/index/loader/ForwardIndexHandler.java | 146 ++++++++--------
...IndexAndDictionaryBasedForwardIndexCreator.java | 33 ++--
.../local/segment/index/ColumnMetadataTest.java | 194 +++++++++++++--------
.../spi/creator/SegmentGeneratorConfig.java | 23 ++-
.../apache/pinot/spi/data/DateTimeFieldSpec.java | 2 +-
.../converter/DictionaryToRawIndexConverter.java | 16 +-
7 files changed, 265 insertions(+), 206 deletions(-)
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index 262fbb24ff4..16eef8b9b1e 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -602,9 +602,16 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
convertedStartTime = TimeUtils.getValidMinTimeMillis();
timeUnit = TimeUnit.MILLISECONDS;
} else {
- timeUnit =
Preconditions.checkNotNull(_config.getSegmentTimeUnit());
- convertedEndTime = timeUnit.convert(now, TimeUnit.MILLISECONDS);
- convertedStartTime =
timeUnit.convert(TimeUtils.getValidMinTimeMillis(), TimeUnit.MILLISECONDS);
+ timeUnit = _config.getSegmentTimeUnit();
+ if (timeUnit != null) {
+ convertedEndTime = timeUnit.convert(now, TimeUnit.MILLISECONDS);
+ convertedStartTime =
timeUnit.convert(TimeUtils.getValidMinTimeMillis(), TimeUnit.MILLISECONDS);
+ } else {
+ // Use millis as the time unit if not able to infer from config
+ timeUnit = TimeUnit.MILLISECONDS;
+ convertedEndTime = now;
+ convertedStartTime = TimeUtils.getValidMinTimeMillis();
+ }
}
LOGGER.warn(
"Caught exception while writing time metadata for segment: {},
time column: {}, total docs: {}. "
@@ -648,10 +655,16 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
properties.setProperty(getKeyFor(column, TOTAL_DOCS),
String.valueOf(totalDocs));
DataType dataType = fieldSpec.getDataType();
properties.setProperty(getKeyFor(column, DATA_TYPE),
String.valueOf(dataType));
+ // TODO: When the column is raw (no dictionary), we should set
BITS_PER_ELEMENT to -1 (invalid). Currently we set it
+ // regardless of whether dictionary is created or not for backward
compatibility because ForwardIndexHandler
+ // doesn't update this value when converting a raw column to
dictionary encoded.
+ // Consider changing it after releasing 1.5.0.
+ // See https://github.com/apache/pinot/pull/16921 for details
properties.setProperty(getKeyFor(column, BITS_PER_ELEMENT),
String.valueOf(PinotDataBitSet.getNumBitsPerValue(cardinality - 1)));
+ FieldType fieldType = fieldSpec.getFieldType();
properties.setProperty(getKeyFor(column, DICTIONARY_ELEMENT_SIZE),
String.valueOf(dictionaryElementSize));
- properties.setProperty(getKeyFor(column, COLUMN_TYPE),
String.valueOf(fieldSpec.getFieldType()));
+ properties.setProperty(getKeyFor(column, COLUMN_TYPE),
String.valueOf(fieldType));
properties.setProperty(getKeyFor(column, IS_SORTED),
String.valueOf(columnIndexCreationInfo.isSorted()));
properties.setProperty(getKeyFor(column, HAS_DICTIONARY),
String.valueOf(hasDictionary));
properties.setProperty(getKeyFor(column, IS_SINGLE_VALUED),
String.valueOf(fieldSpec.isSingleValueField()));
@@ -661,7 +674,8 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
String.valueOf(columnIndexCreationInfo.getTotalNumberOfEntries()));
properties.setProperty(getKeyFor(column, IS_AUTO_GENERATED),
String.valueOf(columnIndexCreationInfo.isAutoGenerated()));
- if (dataType.equals(DataType.STRING) || dataType.equals(DataType.BYTES) ||
dataType.equals(DataType.JSON)) {
+ DataType storedType = dataType.getStoredType();
+ if (storedType == DataType.STRING || storedType == DataType.BYTES) {
properties.setProperty(getKeyFor(column, SCHEMA_MAX_LENGTH),
fieldSpec.getEffectiveMaxLength());
// TODO let's revisit writing effective maxLengthStrategy into metadata,
as changing it right now may affect
// segment's CRC value
@@ -685,15 +699,28 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
}
}
- // datetime field
- if (fieldSpec.getFieldType() == FieldType.DATE_TIME) {
+ // Datetime field
+ if (fieldType == FieldType.DATE_TIME) {
DateTimeFieldSpec dateTimeFieldSpec = (DateTimeFieldSpec) fieldSpec;
properties.setProperty(getKeyFor(column, DATETIME_FORMAT),
dateTimeFieldSpec.getFormat());
properties.setProperty(getKeyFor(column, DATETIME_GRANULARITY),
dateTimeFieldSpec.getGranularity());
}
- // complex field
- if (fieldSpec.getFieldType() == FieldType.COMPLEX) {
+ if (fieldType != FieldType.COMPLEX) {
+ // Regular (non-complex) field
+ if (totalDocs > 0) {
+ Object min = columnIndexCreationInfo.getMin();
+ Object max = columnIndexCreationInfo.getMax();
+ // NOTE:
+ // Min/max could be null for real-time aggregate metrics. We don't
directly call addColumnMinMaxValueInfo() to
+ // avoid setting MIN_MAX_VALUE_INVALID flag, which will prevent
ColumnMinMaxValueGenerator from generating them
+ // when loading the segment.
+ if (min != null && max != null) {
+ addColumnMinMaxValueInfo(properties, column, min, max, storedType);
+ }
+ }
+ } else {
+ // Complex field
ComplexFieldSpec complexFieldSpec = (ComplexFieldSpec) fieldSpec;
properties.setProperty(getKeyFor(column, COMPLEX_CHILD_FIELD_NAMES),
new ArrayList<>(complexFieldSpec.getChildFieldSpecs().keySet()));
@@ -702,17 +729,9 @@ public class SegmentColumnarIndexCreator implements
SegmentCreator {
}
}
- // NOTE: Min/max could be null for real-time aggregate metrics.
- if ((fieldSpec.getFieldType() != FieldType.COMPLEX) && (totalDocs > 0)) {
- Object min = columnIndexCreationInfo.getMin();
- Object max = columnIndexCreationInfo.getMax();
- if (min != null && max != null) {
- addColumnMinMaxValueInfo(properties, column, min, max,
dataType.getStoredType());
- }
- }
-
+ // TODO: Revisit whether we should set default null value for complex field
String defaultNullValue =
columnIndexCreationInfo.getDefaultNullValue().toString();
- if (dataType.getStoredType() == DataType.STRING) {
+ if (storedType == DataType.STRING) {
// NOTE: Do not limit length of default null value because we need exact
value to determine whether the default
// null value changes
defaultNullValue =
CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(defaultNullValue);
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java
index f63e71f54e0..a45e0f35947 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java
@@ -31,6 +31,7 @@ import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.io.util.PinotDataBitSet;
import
org.apache.pinot.segment.local.segment.creator.impl.SegmentDictionaryCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueVarByteRawIndexCreator;
import
org.apache.pinot.segment.local.segment.creator.impl.stats.AbstractColumnStatisticsCollector;
@@ -48,6 +49,7 @@ import org.apache.pinot.segment.spi.ColumnMetadata;
import org.apache.pinot.segment.spi.V1Constants;
import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
import org.apache.pinot.segment.spi.compression.DictIdCompressionType;
+import org.apache.pinot.segment.spi.creator.ColumnIndexCreationInfo;
import org.apache.pinot.segment.spi.creator.IndexCreationContext;
import org.apache.pinot.segment.spi.creator.SegmentVersion;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
@@ -59,6 +61,7 @@ import org.apache.pinot.segment.spi.index.IndexReaderFactory;
import org.apache.pinot.segment.spi.index.IndexType;
import org.apache.pinot.segment.spi.index.StandardIndexes;
import org.apache.pinot.segment.spi.index.creator.ForwardIndexCreator;
+import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
import org.apache.pinot.segment.spi.index.reader.ForwardIndexReaderContext;
@@ -72,10 +75,7 @@ import org.apache.pinot.spi.data.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static
org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.CARDINALITY;
-import static
org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.DICTIONARY_ELEMENT_SIZE;
-import static
org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.HAS_DICTIONARY;
-import static
org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.getKeyFor;
+import static org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column.*;
/**
@@ -342,6 +342,8 @@ public class ForwardIndexHandler extends BaseIndexHandler {
Map<String, String> metadataProperties = new HashMap<>();
metadataProperties.put(getKeyFor(column, HAS_DICTIONARY),
String.valueOf(false));
metadataProperties.put(getKeyFor(column, DICTIONARY_ELEMENT_SIZE),
String.valueOf(0));
+ // TODO: See https://github.com/apache/pinot/pull/16921 for details
+ // metadataProperties.put(getKeyFor(column, BITS_PER_ELEMENT),
String.valueOf(-1));
SegmentMetadataUtils.updateMetadataProperties(_segmentDirectory,
metadataProperties);
// Remove the inverted index, FST index and range index
@@ -795,15 +797,16 @@ public class ForwardIndexHandler extends BaseIndexHandler
{
private void createDictBasedForwardIndex(String column,
SegmentDirectory.Writer segmentWriter)
throws Exception {
- ColumnMetadata existingColMetadata =
_segmentDirectory.getSegmentMetadata().getColumnMetadataFor(column);
- boolean isSingleValue = existingColMetadata.isSingleValue();
-
- File indexDir = _segmentDirectory.getSegmentMetadata().getIndexDir();
- String segmentName = _segmentDirectory.getSegmentMetadata().getName();
+ SegmentMetadataImpl segmentMetadata =
_segmentDirectory.getSegmentMetadata();
+ File indexDir = segmentMetadata.getIndexDir();
+ String segmentName = segmentMetadata.getName();
File inProgress = new File(indexDir, column + ".dict.inprogress");
File dictionaryFile = new File(indexDir, column +
V1Constants.Dict.FILE_EXTENSION);
+
+ ColumnMetadata existingColMetadata =
segmentMetadata.getColumnMetadataFor(column);
+ FieldSpec fieldSpec = existingColMetadata.getFieldSpec();
String fwdIndexFileExtension;
- if (isSingleValue) {
+ if (fieldSpec.isSingleValueField()) {
if (existingColMetadata.isSorted()) {
fwdIndexFileExtension =
V1Constants.Indexes.SORTED_SV_FORWARD_INDEX_FILE_EXTENSION;
} else {
@@ -825,22 +828,62 @@ public class ForwardIndexHandler extends BaseIndexHandler
{
FileUtils.deleteQuietly(dictionaryFile);
}
- LOGGER.info("Creating a new dictionary for segment={} and column={}",
segmentName, column);
- AbstractColumnStatisticsCollector statsCollector =
- getStatsCollector(column,
existingColMetadata.getDataType().getStoredType());
- SegmentDictionaryCreator dictionaryCreator =
- buildDictionary(column, existingColMetadata, segmentWriter,
statsCollector);
- LoaderUtils.writeIndexToV3Format(segmentWriter, column, dictionaryFile,
StandardIndexes.dictionary());
+ AbstractColumnStatisticsCollector statsCollector;
+ SegmentDictionaryCreator dictionaryCreator;
+ try (ForwardIndexReader<?> reader = StandardIndexes.forward()
+ .getReaderFactory()
+ .createIndexReader(segmentWriter, _fieldIndexConfigs.get(column),
existingColMetadata)) {
+ assert reader != null;
+
+ LOGGER.info("Creating a new dictionary for segment={} and column={}",
segmentName, column);
+ int numDocs = existingColMetadata.getTotalDocs();
+ statsCollector = getStatsCollector(column,
fieldSpec.getDataType().getStoredType());
+ // NOTE:
+ // Special null handling is not necessary here. This is because, the
existing default null value in the raw
+ // forwardIndex will be retained as such while created the dictionary
and dict-based forward index. Also, null
+ // value vectors maintain a bitmap of docIds. No handling is necessary
there.
+ try (PinotSegmentColumnReader columnReader = new
PinotSegmentColumnReader(reader, null, null,
+ existingColMetadata.getMaxNumberOfMultiValues())) {
+ for (int i = 0; i < numDocs; i++) {
+ statsCollector.collect(columnReader.getValue(i));
+ }
+ statsCollector.seal();
+ }
+ DictionaryIndexConfig dictConf =
_fieldIndexConfigs.get(column).getConfig(StandardIndexes.dictionary());
+ boolean optimizeDictionaryType =
_tableConfig.getIndexingConfig().isOptimizeDictionaryType();
+ boolean useVarLength = dictConf.getUseVarLengthDictionary() ||
DictionaryIndexType.shouldUseVarLengthDictionary(
+ reader.getStoredType(), statsCollector) || (optimizeDictionaryType
+ &&
DictionaryIndexType.optimizeTypeShouldUseVarLengthDictionary(reader.getStoredType(),
statsCollector));
+ dictionaryCreator = new SegmentDictionaryCreator(fieldSpec,
segmentMetadata.getIndexDir(), useVarLength);
+ dictionaryCreator.build(statsCollector.getUniqueValuesSet());
+
+ LOGGER.info("Built dictionary. Rewriting dictionary enabled forward
index for segment={} and column={}",
+ segmentName, column);
+ ColumnIndexCreationInfo creationInfo =
+ new ColumnIndexCreationInfo(statsCollector, true, useVarLength,
false, fieldSpec.getDefaultNullValue());
+ IndexCreationContext context = IndexCreationContext.builder()
+ .withIndexDir(indexDir)
+ .withFieldSpec(fieldSpec)
+ .withColumnIndexCreationInfo(creationInfo)
+ .withTotalDocs(numDocs)
+ .withDictionary(true)
+ .withTableNameWithType(_tableConfig.getTableName())
+ .withContinueOnError(
+ _tableConfig.getIngestionConfig() != null &&
_tableConfig.getIngestionConfig().isContinueOnError())
+ .build();
+ ForwardIndexConfig config =
_fieldIndexConfigs.get(column).getConfig(StandardIndexes.forward());
+ try (ForwardIndexCreator creator =
StandardIndexes.forward().createIndexCreator(context, config)) {
+ forwardIndexRewriteHelper(column, existingColMetadata, reader,
creator, numDocs, dictionaryCreator, null);
+ }
+ }
- LOGGER.info("Built dictionary. Rewriting dictionary enabled forward index
for segment={} and column={}",
- segmentName, column);
- writeDictEnabledForwardIndex(column, existingColMetadata, segmentWriter,
indexDir, dictionaryCreator);
// We used the existing forward index to generate a new forward index. The
existing forward index will be in V3
// format and the new forward index will be in V1 format. Remove the
existing forward index as it is not needed
// anymore. Note that removeIndex() will only mark an index for removal
and remove the in-memory state. The
// actual cleanup from columns.psf file will happen when
singleFileIndexDirectory.cleanupRemovedIndices() is
// called during segmentWriter.close().
segmentWriter.removeIndex(column, StandardIndexes.forward());
+ LoaderUtils.writeIndexToV3Format(segmentWriter, column, dictionaryFile,
StandardIndexes.dictionary());
LoaderUtils.writeIndexToV3Format(segmentWriter, column, fwdIndexFile,
StandardIndexes.forward());
LOGGER.info("Created forwardIndex. Updating metadata properties for
segment={} and column={}", segmentName, column);
@@ -851,7 +894,10 @@ public class ForwardIndexHandler extends BaseIndexHandler {
// If realtime segments were completed when the column was RAW, the
cardinality value is populated as Integer
// .MIN_VALUE. When dictionary is enabled for this column later,
cardinality value should be rightly populated so
// that the dictionary can be loaded.
- metadataProperties.put(getKeyFor(column, CARDINALITY),
String.valueOf(statsCollector.getCardinality()));
+ int cardinality = statsCollector.getCardinality();
+ metadataProperties.put(getKeyFor(column, CARDINALITY),
String.valueOf(cardinality));
+ metadataProperties.put(getKeyFor(column, BITS_PER_ELEMENT),
+ String.valueOf(PinotDataBitSet.getNumBitsPerValue(cardinality - 1)));
SegmentMetadataUtils.updateMetadataProperties(_segmentDirectory,
metadataProperties);
// We remove indexes that have to be rewritten when a dictEnabled is
toggled. Note that the respective index
@@ -864,64 +910,6 @@ public class ForwardIndexHandler extends BaseIndexHandler {
LOGGER.info("Created dictionary based forward index for segment: {},
column: {}", segmentName, column);
}
- private SegmentDictionaryCreator buildDictionary(String column,
ColumnMetadata existingColMetadata,
- SegmentDirectory.Writer segmentWriter, AbstractColumnStatisticsCollector
statsCollector)
- throws Exception {
- int numDocs = existingColMetadata.getTotalDocs();
-
- // Get the forward index reader factory and create a reader
- IndexReaderFactory<ForwardIndexReader> readerFactory =
StandardIndexes.forward().getReaderFactory();
- try (ForwardIndexReader<?> reader =
readerFactory.createIndexReader(segmentWriter, _fieldIndexConfigs.get(column),
- existingColMetadata)) {
- // Note: Special Null handling is not necessary here. This is because,
the existing default null value in the
- // raw forwardIndex will be retained as such while created the
dictionary and dict-based forward index. Also,
- // null value vectors maintain a bitmap of docIds. No handling is
necessary there.
- PinotSegmentColumnReader columnReader =
- new PinotSegmentColumnReader(reader, null, null,
existingColMetadata.getMaxNumberOfMultiValues());
- for (int i = 0; i < numDocs; i++) {
- Object obj = columnReader.getValue(i);
- statsCollector.collect(obj);
- }
- statsCollector.seal();
-
- DictionaryIndexConfig dictConf =
_fieldIndexConfigs.get(column).getConfig(StandardIndexes.dictionary());
-
- boolean optimizeDictionaryType =
_tableConfig.getIndexingConfig().isOptimizeDictionaryType();
- boolean useVarLength = dictConf.getUseVarLengthDictionary() ||
DictionaryIndexType.shouldUseVarLengthDictionary(
- reader.getStoredType(), statsCollector) || (optimizeDictionaryType
- &&
DictionaryIndexType.optimizeTypeShouldUseVarLengthDictionary(reader.getStoredType(),
statsCollector));
- SegmentDictionaryCreator dictionaryCreator = new
SegmentDictionaryCreator(existingColMetadata.getFieldSpec(),
- _segmentDirectory.getSegmentMetadata().getIndexDir(), useVarLength);
-
- dictionaryCreator.build(statsCollector.getUniqueValuesSet());
- return dictionaryCreator;
- }
- }
-
- private void writeDictEnabledForwardIndex(String column, ColumnMetadata
existingColMetadata,
- SegmentDirectory.Writer segmentWriter, File indexDir,
SegmentDictionaryCreator dictionaryCreator)
- throws Exception {
- // Get the forward index reader factory and create a reader
- IndexReaderFactory<ForwardIndexReader> readerFactory =
StandardIndexes.forward().getReaderFactory();
- try (ForwardIndexReader<?> reader =
readerFactory.createIndexReader(segmentWriter, _fieldIndexConfigs.get(column),
- existingColMetadata)) {
- IndexCreationContext.Builder builder =
-
IndexCreationContext.builder().withIndexDir(indexDir).withColumnMetadata(existingColMetadata)
- .withTableNameWithType(_tableConfig.getTableName())
- .withContinueOnError(_tableConfig.getIngestionConfig() != null
- && _tableConfig.getIngestionConfig().isContinueOnError());
- // existingColMetadata has dictEnable=false. Overwrite the value.
- builder.withDictionary(true);
- IndexCreationContext context = builder.build();
- ForwardIndexConfig config =
_fieldIndexConfigs.get(column).getConfig(StandardIndexes.forward());
-
- try (ForwardIndexCreator creator =
StandardIndexes.forward().createIndexCreator(context, config)) {
- int numDocs = existingColMetadata.getTotalDocs();
- forwardIndexRewriteHelper(column, existingColMetadata, reader,
creator, numDocs, dictionaryCreator, null);
- }
- }
- }
-
static void removeDictRelatedIndexes(String column, SegmentDirectory.Writer
segmentWriter) {
// TODO: Move this logic as a static function in each index creator.
@@ -965,6 +953,8 @@ public class ForwardIndexHandler extends BaseIndexHandler {
Map<String, String> metadataProperties = new HashMap<>();
metadataProperties.put(getKeyFor(column, HAS_DICTIONARY),
String.valueOf(false));
metadataProperties.put(getKeyFor(column, DICTIONARY_ELEMENT_SIZE),
String.valueOf(0));
+ // TODO: See https://github.com/apache/pinot/pull/16921 for details
+ // metadataProperties.put(getKeyFor(column, BITS_PER_ELEMENT),
String.valueOf(-1));
SegmentMetadataUtils.updateMetadataProperties(_segmentDirectory,
metadataProperties);
// Remove range index, inverted index and FST index.
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
index cf3d4b47e2f..9e267f90ef4 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java
@@ -216,9 +216,11 @@ public class
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
// MV columns, in addition to dictionary related metadata,
MAX_MULTI_VALUE_ELEMENTS and TOTAL_NUMBER_OF_ENTRIES
// may be modified which can be left behind in the modified state even
on forward index deletion.
LOGGER.info("Created forward index from inverted index and dictionary.
Updating metadata properties for "
- + "segment: {}, column: {}, property list: {}, is temporary: {}",
segmentName, _columnName,
+ + "segment: {}, column: {}, property list: {}, is temporary:
{}", segmentName, _columnName,
metadataProperties, _isTemporaryForwardIndex);
- _segmentMetadata =
SegmentMetadataUtils.updateMetadataProperties(_segmentDirectory,
metadataProperties);
+ if (!metadataProperties.isEmpty()) {
+ _segmentMetadata =
SegmentMetadataUtils.updateMetadataProperties(_segmentDirectory,
metadataProperties);
+ }
} catch (Exception e) {
throw new IOException(
String.format("Failed to update metadata properties for segment: %s,
column: %s", segmentName, _columnName),
@@ -280,11 +282,16 @@ public class
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
writeToForwardIndex(dictionary, context);
// Setup and return the metadata properties to update
- Map<String, String> metadataProperties = new HashMap<>();
- metadataProperties.put(getKeyFor(_columnName, HAS_DICTIONARY),
String.valueOf(_dictionaryEnabled));
- metadataProperties.put(getKeyFor(_columnName, DICTIONARY_ELEMENT_SIZE),
- String.valueOf(_dictionaryEnabled ?
_columnMetadata.getColumnMaxLength() : 0));
- return metadataProperties;
+ if (_dictionaryEnabled) {
+ return Map.of();
+ } else {
+ return Map.of(
+ getKeyFor(_columnName, HAS_DICTIONARY), String.valueOf(false),
+ getKeyFor(_columnName, DICTIONARY_ELEMENT_SIZE), String.valueOf(0)
+ // TODO: See https://github.com/apache/pinot/pull/16921 for details
+ // getKeyFor(_columnName, BITS_PER_ELEMENT), String.valueOf(-1)
+ );
+ }
}
}
@@ -368,13 +375,15 @@ public class
InvertedIndexAndDictionaryBasedForwardIndexCreator implements AutoC
// Setup and return the metadata properties to update
Map<String, String> metadataProperties = new HashMap<>();
- metadataProperties.put(getKeyFor(_columnName, HAS_DICTIONARY),
String.valueOf(_dictionaryEnabled));
- metadataProperties.put(getKeyFor(_columnName, DICTIONARY_ELEMENT_SIZE),
- String.valueOf(_dictionaryEnabled ?
_columnMetadata.getColumnMaxLength() : 0));
metadataProperties.put(getKeyFor(_columnName, MAX_MULTI_VALUE_ELEMENTS),
String.valueOf(maxNumberOfMultiValues[0]));
- metadataProperties.put(getKeyFor(_columnName, TOTAL_NUMBER_OF_ENTRIES),
- String.valueOf(_nextValueId));
+ metadataProperties.put(getKeyFor(_columnName, TOTAL_NUMBER_OF_ENTRIES),
String.valueOf(_nextValueId));
+ if (!_dictionaryEnabled) {
+ metadataProperties.put(getKeyFor(_columnName, HAS_DICTIONARY),
String.valueOf(false));
+ metadataProperties.put(getKeyFor(_columnName,
DICTIONARY_ELEMENT_SIZE), String.valueOf(0));
+ // TODO: See https://github.com/apache/pinot/pull/16921 for details
+ // metadataProperties.put(getKeyFor(_columnName, BITS_PER_ELEMENT),
String.valueOf(-1));
+ }
return metadataProperties;
}
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java
index 2b8a59b5a7e..83cb02a81a0 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java
@@ -22,6 +22,7 @@ import java.io.File;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@@ -46,25 +47,32 @@ import
org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import
org.apache.pinot.segment.spi.partition.BoundedColumnValuePartitionFunction;
import org.apache.pinot.spi.config.table.ColumnPartitionConfig;
import org.apache.pinot.spi.config.table.SegmentPartitionConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.data.ComplexFieldSpec;
+import org.apache.pinot.spi.data.DateTimeFieldSpec;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.FileFormat;
import org.apache.pinot.spi.env.CommonsConfigurationUtils;
import org.apache.pinot.spi.utils.TimeUtils;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
import org.apache.pinot.util.TestUtils;
-import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import static
org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Segment.SEGMENT_PADDING_CHARACTER;
+import static org.testng.Assert.*;
public class ColumnMetadataTest {
private static final String AVRO_DATA = "data/test_data-mv.avro";
private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(),
"ColumnMetadataTest");
private static final String CREATOR_VERSION = "TestHadoopJar.1.1.1";
+ private static final String RAW_TABLE_NAME = "testTable";
@BeforeMethod
public void setUp()
@@ -77,67 +85,102 @@ public class ColumnMetadataTest {
FileUtils.deleteQuietly(INDEX_DIR);
}
- public SegmentGeneratorConfig createSegmentConfigWithoutCreator()
- throws Exception {
- final String filePath =
-
TestUtils.getFileFromResourceUrl(ColumnMetadataTest.class.getClassLoader().getResource(AVRO_DATA));
- // Intentionally changed this to TimeUnit.Hours to make it non-default for
testing.
+ public SegmentGeneratorConfig createSegmentConfigWithoutCreator() {
+ URL resource = getClass().getClassLoader().getResource(AVRO_DATA);
+ assertNotNull(resource);
+ String filePath = TestUtils.getFileFromResourceUrl(resource);
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME)
+ .setNoDictionaryColumns(List.of("column4", "column7"))
+ .build();
+ Schema schema = new Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME)
+ .addSingleValueDimension("column3", DataType.STRING)
+ .addSingleValueDimension("column4", DataType.STRING)
+ .addMultiValueDimension("column6", DataType.INT)
+ .addMultiValueDimension("column7", DataType.INT)
+ .addDateTime("daysSinceEpoch", DataType.INT, "EPOCH|HOURS", "1:HOURS")
+ .build();
SegmentGeneratorConfig config =
- SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new
File(filePath), INDEX_DIR, "daysSinceEpoch",
- TimeUnit.HOURS, "testTable");
+ SegmentTestUtils.getSegmentGeneratorConfig(new File(filePath),
FileFormat.AVRO, INDEX_DIR, RAW_TABLE_NAME,
+ tableConfig, schema);
config.setSegmentNamePostfix("1");
return config;
}
- public SegmentGeneratorConfig createSegmentConfigWithCreator()
- throws Exception {
+ public SegmentGeneratorConfig createSegmentConfigWithCreator() {
SegmentGeneratorConfig config = createSegmentConfigWithoutCreator();
config.setCreatorVersion(CREATOR_VERSION);
return config;
}
public void verifySegmentAfterLoading(SegmentMetadata segmentMetadata) {
- // Multi-value numeric dimension column.
- ColumnMetadata col7Meta = segmentMetadata.getColumnMetadataFor("column7");
- Assert.assertEquals(col7Meta.getFieldSpec(), new
DimensionFieldSpec("column7", DataType.INT, false));
- Assert.assertEquals(col7Meta.getCardinality(), 359);
- Assert.assertEquals(col7Meta.getTotalDocs(), 100000);
- Assert.assertEquals(col7Meta.getBitsPerElement(), 9);
- Assert.assertEquals(col7Meta.getColumnMaxLength(), 0);
- Assert.assertFalse(col7Meta.isSorted());
- Assert.assertTrue(col7Meta.hasDictionary());
- Assert.assertEquals(col7Meta.getMaxNumberOfMultiValues(), 24);
- Assert.assertEquals(col7Meta.getTotalNumberOfEntries(), 134090);
- Assert.assertFalse(col7Meta.isAutoGenerated());
-
- // Single-value string dimension column.
+ // Single-value dictionary-encoded string dimension column
ColumnMetadata col3Meta = segmentMetadata.getColumnMetadataFor("column3");
- Assert.assertEquals(col3Meta.getFieldSpec(),
+ assertEquals(col3Meta.getFieldSpec(),
new DimensionFieldSpec("column3", DataType.STRING, true,
FieldSpec.DEFAULT_MAX_LENGTH, null));
- Assert.assertEquals(col3Meta.getCardinality(), 5);
- Assert.assertEquals(col3Meta.getTotalDocs(), 100000);
- Assert.assertEquals(col3Meta.getBitsPerElement(), 3);
- Assert.assertEquals(col3Meta.getColumnMaxLength(), 4);
- Assert.assertFalse(col3Meta.isSorted());
- Assert.assertTrue(col3Meta.hasDictionary());
- Assert.assertEquals(col3Meta.getMaxNumberOfMultiValues(), 0);
- Assert.assertEquals(col3Meta.getTotalNumberOfEntries(), 100000);
- Assert.assertFalse(col3Meta.isAutoGenerated());
-
- // Time column.
- // FIXME: Currently it is modeled as dimension in the auto-generated schema
+ assertEquals(col3Meta.getCardinality(), 5);
+ assertEquals(col3Meta.getTotalDocs(), 100000);
+ assertEquals(col3Meta.getBitsPerElement(), 3);
+ assertEquals(col3Meta.getColumnMaxLength(), 4);
+ assertFalse(col3Meta.isSorted());
+ assertTrue(col3Meta.hasDictionary());
+ assertEquals(col3Meta.getMaxNumberOfMultiValues(), 0);
+ assertEquals(col3Meta.getTotalNumberOfEntries(), 100000);
+ assertFalse(col3Meta.isAutoGenerated());
+
+ // Single-value raw string dimension column
+ ColumnMetadata col4Meta = segmentMetadata.getColumnMetadataFor("column4");
+ assertEquals(col4Meta.getFieldSpec(),
+ new DimensionFieldSpec("column4", DataType.STRING, true,
FieldSpec.DEFAULT_MAX_LENGTH, null));
+ assertEquals(col4Meta.getCardinality(), 5);
+ assertEquals(col4Meta.getTotalDocs(), 100000);
+ assertEquals(col4Meta.getBitsPerElement(), 3);
+ assertEquals(col4Meta.getColumnMaxLength(), 0);
+ assertFalse(col4Meta.isSorted());
+ assertFalse(col4Meta.hasDictionary());
+ assertEquals(col4Meta.getMaxNumberOfMultiValues(), 0);
+ assertEquals(col4Meta.getTotalNumberOfEntries(), 100000);
+ assertFalse(col4Meta.isAutoGenerated());
+
+ // Multi-value dictionary-encoded int dimension column
+ ColumnMetadata col6Meta = segmentMetadata.getColumnMetadataFor("column6");
+ assertEquals(col6Meta.getFieldSpec(), new DimensionFieldSpec("column6",
DataType.INT, false));
+ assertEquals(col6Meta.getCardinality(), 18499);
+ assertEquals(col6Meta.getTotalDocs(), 100000);
+ assertEquals(col6Meta.getBitsPerElement(), 15);
+ assertEquals(col6Meta.getColumnMaxLength(), 0);
+ assertFalse(col6Meta.isSorted());
+ assertTrue(col6Meta.hasDictionary());
+ assertEquals(col6Meta.getMaxNumberOfMultiValues(), 13);
+ assertEquals(col6Meta.getTotalNumberOfEntries(), 106688);
+ assertFalse(col6Meta.isAutoGenerated());
+
+ // Multi-value raw int dimension column
+ ColumnMetadata col7Meta = segmentMetadata.getColumnMetadataFor("column7");
+ assertEquals(col7Meta.getFieldSpec(), new DimensionFieldSpec("column7",
DataType.INT, false));
+ assertEquals(col7Meta.getCardinality(), 359);
+ assertEquals(col7Meta.getTotalDocs(), 100000);
+ assertEquals(col7Meta.getBitsPerElement(), 9);
+ assertEquals(col7Meta.getColumnMaxLength(), 0);
+ assertFalse(col7Meta.isSorted());
+ assertFalse(col7Meta.hasDictionary());
+ assertEquals(col7Meta.getMaxNumberOfMultiValues(), 24);
+ assertEquals(col7Meta.getTotalNumberOfEntries(), 134090);
+ assertFalse(col7Meta.isAutoGenerated());
+
+ // Date-time column
ColumnMetadata timeColumn =
segmentMetadata.getColumnMetadataFor("daysSinceEpoch");
- Assert.assertEquals(timeColumn.getFieldSpec(), new
DimensionFieldSpec("daysSinceEpoch", DataType.INT, true));
- Assert.assertEquals(timeColumn.getColumnName(), "daysSinceEpoch");
- Assert.assertEquals(timeColumn.getCardinality(), 1);
- Assert.assertEquals(timeColumn.getTotalDocs(), 100000);
- Assert.assertEquals(timeColumn.getBitsPerElement(), 1);
- Assert.assertEquals(timeColumn.getColumnMaxLength(), 0);
- Assert.assertTrue(timeColumn.isSorted());
- Assert.assertTrue(timeColumn.hasDictionary());
- Assert.assertEquals(timeColumn.getMaxNumberOfMultiValues(), 0);
- Assert.assertEquals(timeColumn.getTotalNumberOfEntries(), 100000);
- Assert.assertFalse(timeColumn.isAutoGenerated());
+ assertEquals(timeColumn.getFieldSpec(),
+ new DateTimeFieldSpec("daysSinceEpoch", DataType.INT, "EPOCH|HOURS",
"1:HOURS"));
+ assertEquals(timeColumn.getColumnName(), "daysSinceEpoch");
+ assertEquals(timeColumn.getCardinality(), 1);
+ assertEquals(timeColumn.getTotalDocs(), 100000);
+ assertEquals(timeColumn.getBitsPerElement(), 1);
+ assertEquals(timeColumn.getColumnMaxLength(), 0);
+ assertTrue(timeColumn.isSorted());
+ assertTrue(timeColumn.hasDictionary());
+ assertEquals(timeColumn.getMaxNumberOfMultiValues(), 0);
+ assertEquals(timeColumn.getTotalNumberOfEntries(), 100000);
+ assertFalse(timeColumn.isAutoGenerated());
}
@Test
@@ -155,7 +198,7 @@ public class ColumnMetadataTest {
// Make sure we got the creator name as well.
String creatorName = segmentMetadata.getCreatorName();
- Assert.assertEquals(creatorName, CREATOR_VERSION);
+ assertEquals(creatorName, CREATOR_VERSION);
}
@Test
@@ -172,7 +215,7 @@ public class ColumnMetadataTest {
verifySegmentAfterLoading(segmentMetadata);
// Make sure we get null for creator name.
- Assert.assertNull(segmentMetadata.getCreatorName());
+ assertNull(segmentMetadata.getCreatorName());
}
@Test
@@ -208,15 +251,15 @@ public class ColumnMetadataTest {
SegmentMetadata segmentMetadata = new
SegmentMetadataImpl(INDEX_DIR.listFiles()[0]);
verifySegmentAfterLoading(segmentMetadata);
// Make sure we get null for creator name.
- Assert.assertNull(segmentMetadata.getCreatorName());
+ assertNull(segmentMetadata.getCreatorName());
// Verify segment partitioning metadata.
ColumnMetadata col3Meta = segmentMetadata.getColumnMetadataFor("column3");
- Assert.assertNotNull(col3Meta.getPartitionFunction());
- Assert.assertTrue(col3Meta.getPartitionFunction() instanceof
BoundedColumnValuePartitionFunction);
- Assert.assertEquals(col3Meta.getPartitionFunction().getNumPartitions(), 4);
- Assert.assertEquals(col3Meta.getPartitionFunction().getFunctionConfig(),
functionConfig);
- Assert.assertEquals(col3Meta.getPartitions(), Stream.of(0, 1, 2,
3).collect(Collectors.toSet()));
+ assertNotNull(col3Meta.getPartitionFunction());
+ assertTrue(col3Meta.getPartitionFunction() instanceof
BoundedColumnValuePartitionFunction);
+ assertEquals(col3Meta.getPartitionFunction().getNumPartitions(), 4);
+ assertEquals(col3Meta.getPartitionFunction().getFunctionConfig(),
functionConfig);
+ assertEquals(col3Meta.getPartitions(), Stream.of(0, 1, 2,
3).collect(Collectors.toSet()));
}
@Test
@@ -229,16 +272,15 @@ public class ColumnMetadataTest {
PropertiesConfiguration propertiesConfiguration =
CommonsConfigurationUtils.fromFile(metadataFile);
ColumnMetadataImpl installationOutput =
ColumnMetadataImpl.fromPropertiesConfiguration("installation_output",
propertiesConfiguration);
- Assert.assertEquals(installationOutput.getMinValue(),
+ assertEquals(installationOutput.getMinValue(),
"\r\n\r\n utils em::C:\\dir\\utils\r\nPSParentPath :
Mi");
}
@Test
public void testComplexFieldSpec() {
- ComplexFieldSpec intMapFieldSpec = new ComplexFieldSpec("intMap",
DataType.MAP, true, Map.of(
- "key", new DimensionFieldSpec("key", DataType.STRING, true),
- "value", new DimensionFieldSpec("value", DataType.INT, true)
- ));
+ ComplexFieldSpec intMapFieldSpec = new ComplexFieldSpec("intMap",
DataType.MAP, true,
+ Map.of("key", new DimensionFieldSpec("key", DataType.STRING, true),
"value",
+ new DimensionFieldSpec("value", DataType.INT, true)));
ColumnIndexCreationInfo columnIndexCreationInfo =
new ColumnIndexCreationInfo(new DefaultColumnStatistics(null, null,
null, false, 1, 1), false, false, false,
Map.of());
@@ -247,7 +289,7 @@ public class ColumnMetadataTest {
SegmentColumnarIndexCreator.addColumnMetadataInfo(config, "intMap",
columnIndexCreationInfo, 1, intMapFieldSpec,
false, -1);
ColumnMetadataImpl intMapColumnMetadata =
ColumnMetadataImpl.fromPropertiesConfiguration("intMap", config);
- Assert.assertEquals(intMapColumnMetadata.getFieldSpec(), intMapFieldSpec);
+ assertEquals(intMapColumnMetadata.getFieldSpec(), intMapFieldSpec);
}
@Test
@@ -257,18 +299,17 @@ public class ColumnMetadataTest {
meta.addIndexSize(IndexService.getInstance().getNumericId(StandardIndexes.h3()),
0xffffffffffffL);
meta.addIndexSize(IndexService.getInstance().getNumericId(StandardIndexes.vector()),
0);
- Assert.assertEquals(meta.getNumIndexes(), 3);
- Assert.assertEquals(meta.getIndexSizeFor(StandardIndexes.json()), 12345L);
- Assert.assertEquals(meta.getIndexSizeFor(StandardIndexes.h3()),
0xffffffffffffL);
- Assert.assertEquals(meta.getIndexSizeFor(StandardIndexes.vector()), 0);
- Assert.assertEquals(meta.getIndexSizeFor(StandardIndexes.inverted()),
ColumnMetadata.INDEX_NOT_FOUND);
+ assertEquals(meta.getNumIndexes(), 3);
+ assertEquals(meta.getIndexSizeFor(StandardIndexes.json()), 12345L);
+ assertEquals(meta.getIndexSizeFor(StandardIndexes.h3()), 0xffffffffffffL);
+ assertEquals(meta.getIndexSizeFor(StandardIndexes.vector()), 0);
+ assertEquals(meta.getIndexSizeFor(StandardIndexes.inverted()),
ColumnMetadata.INDEX_NOT_FOUND);
try {
meta.addIndexSize(IndexService.getInstance().getNumericId(StandardIndexes.fst()),
-1);
- Assert.fail();
+ fail();
} catch (IllegalArgumentException e) {
- Assert.assertEquals(e.getMessage(),
- "Index size should be a non-negative integer value between 0 and
281474976710655");
+ assertEquals(e.getMessage(), "Index size should be a non-negative
integer value between 0 and 281474976710655");
}
}
@@ -281,13 +322,14 @@ public class ColumnMetadataTest {
config.setTimeColumnName("column4");
SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
- Assert.assertThrows(NumberFormatException.class, driver::build);
+ assertThrows(NumberFormatException.class, driver::build);
}
@Test
public void testBadTimeColumnWithContinueOnError()
throws Exception {
SegmentGeneratorConfig config = createSegmentConfigWithCreator();
+
// column4 is not a time column and should cause an exception to be thrown
when the segment is sealed and time
// metadata is being parsed and written
config.setTimeColumnName("column4");
@@ -295,12 +337,10 @@ public class ColumnMetadataTest {
SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
+
SegmentMetadata segmentMetadata = new
SegmentMetadataImpl(INDEX_DIR.listFiles()[0]);
- // The time unit being used is hours since epoch.
- long hoursSinceEpoch = System.currentTimeMillis() /
TimeUnit.HOURS.toMillis(1);
- // Use tolerance of 1 hour to eliminate any flakiness in the test due to
time boundaries.
- Assert.assertTrue(hoursSinceEpoch - segmentMetadata.getEndTime() <= 1);
- Assert.assertEquals(segmentMetadata.getStartTime(),
- TimeUnit.MILLISECONDS.toHours(TimeUtils.getValidMinTimeMillis()));
+ assertEquals(segmentMetadata.getTimeUnit(), TimeUnit.MILLISECONDS);
+ assertEquals(segmentMetadata.getStartTime(),
TimeUtils.getValidMinTimeMillis());
+ assertTrue(System.currentTimeMillis() - segmentMetadata.getEndTime() <
60_000L);
}
}
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
index eafc92484a8..706d1ce360e 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
@@ -150,11 +150,10 @@ public class SegmentGeneratorConfig implements
Serializable {
// NOTE: SegmentGeneratorConfig#setSchema doesn't set the time column
anymore. timeColumnName is expected to be
// read from table config.
- String timeColumnName = null;
- if (tableConfig.getValidationConfig() != null) {
- timeColumnName = tableConfig.getValidationConfig().getTimeColumnName();
+ String timeColumnName =
tableConfig.getValidationConfig().getTimeColumnName();
+ if (timeColumnName != null) {
+ setTime(timeColumnName, schema);
}
- setTime(timeColumnName, schema);
IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
String segmentVersion = indexingConfig.getSegmentFormatVersion();
@@ -229,14 +228,12 @@ public class SegmentGeneratorConfig implements
Serializable {
/**
* Set time column details using the given time column
*/
- private void setTime(@Nullable String timeColumnName, Schema schema) {
- if (timeColumnName != null) {
- DateTimeFieldSpec dateTimeFieldSpec =
schema.getSpecForTimeColumn(timeColumnName);
- if (dateTimeFieldSpec != null) {
- _segmentTimeColumnDataType = dateTimeFieldSpec.getDataType();
- setTimeColumnName(dateTimeFieldSpec.getName());
- setDateTimeFormatSpec(dateTimeFieldSpec.getFormatSpec());
- }
+ private void setTime(String timeColumnName, Schema schema) {
+ DateTimeFieldSpec dateTimeFieldSpec =
schema.getSpecForTimeColumn(timeColumnName);
+ if (dateTimeFieldSpec != null) {
+ _segmentTimeColumnDataType = dateTimeFieldSpec.getDataType();
+ setTimeColumnName(dateTimeFieldSpec.getName());
+ setDateTimeFormatSpec(dateTimeFieldSpec.getFormatSpec());
}
}
@@ -260,6 +257,7 @@ public class SegmentGeneratorConfig implements Serializable
{
}
}
+ @Nullable
public DateTimeFormatSpec getDateTimeFormatSpec() {
return _dateTimeFormatSpec;
}
@@ -387,6 +385,7 @@ public class SegmentGeneratorConfig implements Serializable
{
_sequenceId = sequenceId;
}
+ @Nullable
public TimeUnit getSegmentTimeUnit() {
return _segmentTimeUnit;
}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java
index dbb92090d17..83f5ef67c55 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java
@@ -191,7 +191,7 @@ public final class DateTimeFieldSpec extends FieldSpec {
@Override
public String toString() {
- return "< field type: DATE_TIME, field name: " + _name + ", datatype: " +
_dataType + ", time column format: "
+ return "< field type: DATE_TIME, field name: " + _name + ", data type: " +
_dataType + ", time column format: "
+ _format + ", time field granularity: " + _granularity + " >";
}
diff --git
a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
index 065bd27d85f..a9406a012e2 100644
---
a/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
+++
b/pinot-tools/src/main/java/org/apache/pinot/tools/segment/converter/DictionaryToRawIndexConverter.java
@@ -34,6 +34,9 @@ import
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoa
import
org.apache.pinot.segment.local.segment.index.forward.ForwardIndexCreatorFactory;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.V1Constants.MetadataKeys;
+import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Column;
+import org.apache.pinot.segment.spi.V1Constants.MetadataKeys.Segment;
import org.apache.pinot.segment.spi.compression.ChunkCompressionType;
import org.apache.pinot.segment.spi.datasource.DataSource;
import org.apache.pinot.segment.spi.datasource.DataSourceMetadata;
@@ -252,20 +255,19 @@ public class DictionaryToRawIndexConverter {
*/
private void updateMetadata(File segmentDir, String[] columns, String
tableName)
throws ConfigurationException {
- File metadataFile = new File(segmentDir,
V1Constants.MetadataKeys.METADATA_FILE_NAME);
+ File metadataFile = new File(segmentDir, MetadataKeys.METADATA_FILE_NAME);
PropertiesConfiguration properties =
CommonsConfigurationUtils.fromFile(metadataFile);
if (tableName != null) {
- properties
- .setProperty(V1Constants.MetadataKeys.Segment.TABLE_NAME,
TableNameBuilder.extractRawTableName(tableName));
+ properties.setProperty(Segment.TABLE_NAME,
TableNameBuilder.extractRawTableName(tableName));
}
for (String column : columns) {
- properties.setProperty(
- V1Constants.MetadataKeys.Column.getKeyFor(column,
V1Constants.MetadataKeys.Column.HAS_DICTIONARY), false);
- properties.setProperty(
- V1Constants.MetadataKeys.Column.getKeyFor(column,
V1Constants.MetadataKeys.Column.BITS_PER_ELEMENT), -1);
+ properties.setProperty(Column.getKeyFor(column, Column.HAS_DICTIONARY),
false);
+ properties.setProperty(Column.getKeyFor(column,
Column.DICTIONARY_ELEMENT_SIZE), 0);
+ properties.setProperty(Column.getKeyFor(column,
Column.BITS_PER_ELEMENT), -1);
}
+
CommonsConfigurationUtils.saveToFile(properties, metadataFile);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]