This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 781f5cd Support Native FST As An Index Subtype for FST Indices (#7729) 781f5cd is described below commit 781f5cdeee37d42b0eb0eb6069107f5b4588b13a Author: Atri Sharma <atri.j...@gmail.com> AuthorDate: Sat Nov 13 05:01:47 2021 +0530 Support Native FST As An Index Subtype for FST Indices (#7729) This PR introduces the notion of subtypes to FST index -- allowing users to set a segment level flag indicating whether the index should be built using native FST or Lucene FST. --- .../queries/FSTBasedRegexpLikeQueriesTest.java | 39 ++++++----- .../creator/impl/SegmentColumnarIndexCreator.java | 13 +++- .../index/column/PhysicalColumnIndexContainer.java | 10 ++- .../segment/index/loader/IndexHandlerFactory.java | 5 +- .../segment/index/loader/IndexLoadingConfig.java | 16 ++++- ...neFSTIndexHandler.java => FSTIndexHandler.java} | 26 ++++++-- .../segment/local/utils/nativefst/FSTHeader.java | 2 +- .../utils/nativefst/NativeFSTIndexCreator.java | 2 +- .../index/creator/NativeFSTIndexCreatorTest.java | 75 ++++++++++++++++++++++ .../org/apache/pinot/segment/spi/V1Constants.java | 1 - .../spi/creator/SegmentGeneratorConfig.java | 12 ++++ .../org/apache/pinot/spi/config/table/FSTType.java | 26 ++++++++ .../pinot/spi/config/table/IndexingConfig.java | 9 +++ 13 files changed, 204 insertions(+), 32 deletions(-) diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java index 84c8007..0a0e8ba 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java @@ -26,6 +26,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; import org.apache.commons.io.FileUtils; import org.apache.pinot.common.response.broker.AggregationResult; import org.apache.pinot.common.response.broker.BrokerResponseNative; @@ -44,6 +45,7 @@ import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; import org.apache.pinot.segment.spi.ImmutableSegment; import org.apache.pinot.segment.spi.IndexSegment; import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.spi.config.table.FSTType; import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; @@ -69,8 +71,6 @@ public class FSTBasedRegexpLikeQueriesTest extends BaseQueriesTest { private static final Integer INT_BASE_VALUE = 1000; private static final Integer NUM_ROWS = 1024; - private final List<GenericRow> _rows = new ArrayList<>(); - private IndexSegment _indexSegment; private List<IndexSegment> _indexSegments; @@ -94,19 +94,25 @@ public class FSTBasedRegexpLikeQueriesTest extends BaseQueriesTest { throws Exception { FileUtils.deleteQuietly(INDEX_DIR); - buildSegment(); - IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); - Set<String> fstIndexCols = new HashSet<>(); - fstIndexCols.add(DOMAIN_NAMES_COL); - indexLoadingConfig.setFSTIndexColumns(fstIndexCols); - - Set<String> invertedIndexCols = new HashSet<>(); - invertedIndexCols.add(DOMAIN_NAMES_COL); - indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols); - ImmutableSegment immutableSegment = - ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); - _indexSegment = immutableSegment; - _indexSegments = Arrays.asList(immutableSegment, immutableSegment); + List<IndexSegment> segments = new ArrayList<>(); + for (FSTType fstType : Arrays.asList(FSTType.LUCENE, FSTType.NATIVE)) { + buildSegment(fstType); + + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); + Set<String> fstIndexCols = new HashSet<>(); + fstIndexCols.add(DOMAIN_NAMES_COL); + indexLoadingConfig.setFSTIndexColumns(fstIndexCols); + indexLoadingConfig.setFSTIndexType(fstType); + Set<String> invertedIndexCols = new HashSet<>(); + invertedIndexCols.add(DOMAIN_NAMES_COL); + indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols); + ImmutableSegment segment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); + + segments.add(segment); + } + + _indexSegment = segments.get(ThreadLocalRandom.current().nextInt(2)); + _indexSegments = segments; } @AfterClass @@ -151,7 +157,7 @@ public class FSTBasedRegexpLikeQueriesTest extends BaseQueriesTest { return rows; } - private void buildSegment() + private void buildSegment(FSTType fstType) throws Exception { List<GenericRow> rows = createTestData(NUM_ROWS); List<FieldConfig> fieldConfigs = new ArrayList<>(); @@ -171,6 +177,7 @@ public class FSTBasedRegexpLikeQueriesTest extends BaseQueriesTest { config.setOutDir(INDEX_DIR.getPath()); config.setTableName(TABLE_NAME); config.setSegmentName(SEGMENT_NAME); + config.setFSTIndexType(fstType); SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); try (RecordReader recordReader = new GenericRowRecordReader(rows)) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java index 8315d9a..476c4c1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java @@ -51,6 +51,7 @@ import org.apache.pinot.segment.local.segment.creator.impl.inv.text.LuceneFSTInd import org.apache.pinot.segment.local.segment.creator.impl.nullvalue.NullValueVectorCreator; import org.apache.pinot.segment.local.segment.creator.impl.text.LuceneTextIndexCreator; import org.apache.pinot.segment.local.utils.GeometrySerializer; +import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexCreator; import org.apache.pinot.segment.spi.V1Constants; import org.apache.pinot.segment.spi.compression.ChunkCompressionType; import org.apache.pinot.segment.spi.creator.ColumnIndexCreationInfo; @@ -66,6 +67,7 @@ import org.apache.pinot.segment.spi.index.creator.TextIndexCreator; import org.apache.pinot.segment.spi.index.creator.TextIndexType; import org.apache.pinot.segment.spi.index.reader.H3IndexResolution; import org.apache.pinot.segment.spi.partition.PartitionFunction; +import org.apache.pinot.spi.config.table.FSTType; import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.data.DateTimeFieldSpec; import org.apache.pinot.spi.data.DateTimeFormatSpec; @@ -266,8 +268,15 @@ public class SegmentColumnarIndexCreator implements SegmentCreator { "FST index is currently only supported on STRING type columns"); Preconditions.checkState(dictEnabledColumn, "FST index is currently only supported on dictionary-encoded columns"); - _fstIndexCreatorMap.put(columnName, new LuceneFSTIndexCreator(_indexDir, columnName, - (String[]) indexCreationInfo.getSortedUniqueElementsArray())); + String[] sortedValues = (String[]) indexCreationInfo.getSortedUniqueElementsArray(); + TextIndexCreator textIndexCreator; + if (_config.getFSTIndexType() == FSTType.NATIVE) { + textIndexCreator = new NativeFSTIndexCreator(_indexDir, columnName, sortedValues); + } else { + textIndexCreator = new LuceneFSTIndexCreator(_indexDir, columnName, sortedValues); + } + + _fstIndexCreatorMap.put(columnName, textIndexCreator); } if (jsonIndexColumns.contains(columnName)) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java index 8dfd2c7..d7de202 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java @@ -55,6 +55,8 @@ import org.apache.pinot.segment.local.segment.index.readers.geospatial.Immutable import org.apache.pinot.segment.local.segment.index.readers.json.ImmutableJsonIndexReader; import org.apache.pinot.segment.local.segment.index.readers.sorted.SortedIndexReaderImpl; import org.apache.pinot.segment.local.segment.index.readers.text.LuceneTextIndexReader; +import org.apache.pinot.segment.local.utils.nativefst.FSTHeader; +import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexReader; import org.apache.pinot.segment.spi.ColumnMetadata; import org.apache.pinot.segment.spi.index.column.ColumnIndexContainer; import org.apache.pinot.segment.spi.index.reader.BloomFilterReader; @@ -176,7 +178,13 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer } if (loadFSTIndex) { - _fstIndex = new LuceneFSTIndexReader(segmentReader.getIndexFor(columnName, ColumnIndexType.FST_INDEX)); + PinotDataBuffer buffer = segmentReader.getIndexFor(columnName, ColumnIndexType.FST_INDEX); + int magicHeader = buffer.getInt(0); + if (magicHeader == FSTHeader.FST_MAGIC) { + _fstIndex = new NativeFSTIndexReader(buffer); + } else { + _fstIndex = new LuceneFSTIndexReader(buffer); + } } else { _fstIndex = null; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java index 7da4024..9dd9ecb 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexHandlerFactory.java @@ -20,10 +20,10 @@ package org.apache.pinot.segment.local.segment.index.loader; import java.io.File; import org.apache.pinot.segment.local.segment.index.loader.bloomfilter.BloomFilterHandler; +import org.apache.pinot.segment.local.segment.index.loader.invertedindex.FSTIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.invertedindex.H3IndexHandler; import org.apache.pinot.segment.local.segment.index.loader.invertedindex.InvertedIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.invertedindex.JsonIndexHandler; -import org.apache.pinot.segment.local.segment.index.loader.invertedindex.LuceneFSTIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.invertedindex.RangeIndexHandler; import org.apache.pinot.segment.local.segment.index.loader.invertedindex.TextIndexHandler; import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl; @@ -48,7 +48,8 @@ public class IndexHandlerFactory { case TEXT_INDEX: return new TextIndexHandler(indexDir, segmentMetadata, indexLoadingConfig, segmentWriter); case FST_INDEX: - return new LuceneFSTIndexHandler(indexDir, segmentMetadata, indexLoadingConfig, segmentWriter); + return new FSTIndexHandler(indexDir, segmentMetadata, indexLoadingConfig, segmentWriter, + indexLoadingConfig.getFSTIndexType()); case JSON_INDEX: return new JsonIndexHandler(indexDir, segmentMetadata, indexLoadingConfig, segmentWriter); case H3_INDEX: diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java index ef6d95d..0b04661 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/IndexLoadingConfig.java @@ -34,6 +34,7 @@ import org.apache.pinot.segment.spi.index.creator.H3IndexConfig; import org.apache.pinot.segment.spi.loader.SegmentDirectoryLoaderRegistry; import org.apache.pinot.spi.config.instance.InstanceDataManagerConfig; import org.apache.pinot.spi.config.table.BloomFilterConfig; +import org.apache.pinot.spi.config.table.FSTType; import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.IndexingConfig; import org.apache.pinot.spi.config.table.StarTreeIndexConfig; @@ -57,6 +58,7 @@ public class IndexLoadingConfig { private int _rangeIndexVersion = IndexingConfig.DEFAULT_RANGE_INDEX_VERSION; private Set<String> _textIndexColumns = new HashSet<>(); private Set<String> _fstIndexColumns = new HashSet<>(); + private FSTType _fstIndexType = FSTType.LUCENE; private Set<String> _jsonIndexColumns = new HashSet<>(); private Map<String, H3IndexConfig> _h3IndexConfigs = new HashMap<>(); private Set<String> _noDictionaryColumns = new HashSet<>(); // TODO: replace this by _noDictionaryConfig. @@ -107,7 +109,6 @@ public class IndexLoadingConfig { if (invertedIndexColumns != null) { _invertedIndexColumns.addAll(invertedIndexColumns); } - _rangeIndexVersion = indexingConfig.getRangeIndexVersion(); List<String> jsonIndexColumns = indexingConfig.getJsonIndexColumns(); if (jsonIndexColumns != null) { @@ -119,6 +120,10 @@ public class IndexLoadingConfig { _rangeIndexColumns.addAll(rangeIndexColumns); } + _rangeIndexVersion = indexingConfig.getRangeIndexVersion(); + + _fstIndexType = indexingConfig.getFSTIndexType(); + List<String> bloomFilterColumns = indexingConfig.getBloomFilterColumns(); if (bloomFilterColumns != null) { for (String bloomFilterColumn : bloomFilterColumns) { @@ -285,6 +290,10 @@ public class IndexLoadingConfig { return _rangeIndexVersion; } + public FSTType getFSTIndexType() { + return _fstIndexType; + } + /** * Used in two places: * (1) In {@link PhysicalColumnIndexContainer} to create the index loading info for immutable segments @@ -350,6 +359,11 @@ public class IndexLoadingConfig { } @VisibleForTesting + public void setFSTIndexType(FSTType fstType) { + _fstIndexType = fstType; + } + + @VisibleForTesting public void setJsonIndexColumns(Set<String> jsonIndexColumns) { _jsonIndexColumns = jsonIndexColumns; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/LuceneFSTIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java similarity index 87% rename from pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/LuceneFSTIndexHandler.java rename to pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java index 24e6538..50f2b67 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/LuceneFSTIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java @@ -29,12 +29,15 @@ import org.apache.pinot.segment.local.segment.index.loader.IndexHandler; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.index.loader.LoaderUtils; import org.apache.pinot.segment.local.segment.index.loader.SegmentPreProcessor; +import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexCreator; import org.apache.pinot.segment.spi.ColumnMetadata; import org.apache.pinot.segment.spi.SegmentMetadata; import org.apache.pinot.segment.spi.creator.SegmentVersion; +import org.apache.pinot.segment.spi.index.creator.TextIndexCreator; import org.apache.pinot.segment.spi.index.reader.Dictionary; import org.apache.pinot.segment.spi.store.ColumnIndexType; import org.apache.pinot.segment.spi.store.SegmentDirectory; +import org.apache.pinot.spi.config.table.FSTType; import org.apache.pinot.spi.data.FieldSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,20 +63,22 @@ import static org.apache.pinot.segment.spi.V1Constants.Indexes.FST_INDEX_FILE_EX * added column. In this case, the default column handler would have taken care of adding * dictionary for the new column. Read the dictionary to create FST index. */ -public class LuceneFSTIndexHandler implements IndexHandler { - private static final Logger LOGGER = LoggerFactory.getLogger(LuceneFSTIndexHandler.class); +public class FSTIndexHandler implements IndexHandler { + private static final Logger LOGGER = LoggerFactory.getLogger(FSTIndexHandler.class); private final File _indexDir; private final SegmentMetadata _segmentMetadata; private final SegmentDirectory.Writer _segmentWriter; private final Set<String> _columnsToAddIdx; + private final FSTType _fstType; - public LuceneFSTIndexHandler(File indexDir, SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig, - SegmentDirectory.Writer segmentWriter) { + public FSTIndexHandler(File indexDir, SegmentMetadata segmentMetadata, IndexLoadingConfig indexLoadingConfig, + SegmentDirectory.Writer segmentWriter, FSTType fstType) { _indexDir = indexDir; _segmentMetadata = segmentMetadata; _segmentWriter = segmentWriter; _columnsToAddIdx = new HashSet<>(indexLoadingConfig.getFSTIndexColumns()); + _fstType = fstType; } @Override @@ -130,13 +135,20 @@ public class LuceneFSTIndexHandler implements IndexHandler { LOGGER.info("Creating new FST index for column: {} in segment: {}, cardinality: {}", column, segmentName, columnMetadata.getCardinality()); - LuceneFSTIndexCreator luceneFSTIndexCreator = new LuceneFSTIndexCreator(_indexDir, column, null); + + TextIndexCreator fstIndexCreator; + if (_fstType == FSTType.LUCENE) { + fstIndexCreator = new LuceneFSTIndexCreator(_indexDir, column, null); + } else { + fstIndexCreator = new NativeFSTIndexCreator(_indexDir, column, null); + } + try (Dictionary dictionary = LoaderUtils.getDictionary(_segmentWriter, columnMetadata)) { for (int dictId = 0; dictId < dictionary.length(); dictId++) { - luceneFSTIndexCreator.add(dictionary.getStringValue(dictId)); + fstIndexCreator.add(dictionary.getStringValue(dictId)); } } - luceneFSTIndexCreator.seal(); + fstIndexCreator.seal(); // For v3, write the generated range index file into the single file and remove it. if (_segmentMetadata.getVersion() == SegmentVersion.v3) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java index a053c30..6f67fcb 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java @@ -31,7 +31,7 @@ public final class FSTHeader { /** * FST magic (4 bytes). */ - final static int FST_MAGIC = ('\\' << 24) | ('f' << 16) | ('s' << 8) | ('a'); + public static final int FST_MAGIC = ('\\' << 24) | ('f' << 16) | ('s' << 8) | 'a'; /** FST version number. */ final byte _version; diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java index 678105a..99298b1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java @@ -47,7 +47,7 @@ public class NativeFSTIndexCreator implements TextIndexCreator { * @throws IOException */ public NativeFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) { - _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.NATIVE_FST_INDEX_FILE_EXTENSION); + _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.FST_INDEX_FILE_EXTENSION); _fstBuilder = new FSTBuilder(); _dictId = 0; diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java new file mode 100644 index 0000000..cc4f153 --- /dev/null +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.index.creator; + +import java.io.File; +import java.io.IOException; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexCreator; +import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexReader; +import org.apache.pinot.segment.spi.memory.PinotDataBuffer; +import org.testng.Assert; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import static org.apache.pinot.segment.spi.V1Constants.Indexes.FST_INDEX_FILE_EXTENSION; + + +public class NativeFSTIndexCreatorTest { + private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "NativeFSTIndexCreatorTest"); + + @BeforeClass + public void setUp() + throws IOException { + FileUtils.forceMkdir(INDEX_DIR); + } + + @AfterClass + public void tearDown() + throws IOException { + FileUtils.deleteDirectory(INDEX_DIR); + } + + @Test + public void testIndexWriterReader() + throws IOException { + String[] uniqueValues = new String[3]; + uniqueValues[0] = "hello-world"; + uniqueValues[1] = "hello-world123"; + uniqueValues[2] = "still"; + + try (NativeFSTIndexCreator creator = new NativeFSTIndexCreator(INDEX_DIR, "testFSTColumn", uniqueValues)) { + creator.seal(); + } + + File fstFile = new File(INDEX_DIR, "testFSTColumn" + FST_INDEX_FILE_EXTENSION); + try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(fstFile); + NativeFSTIndexReader reader = new NativeFSTIndexReader(dataBuffer)) { + + int[] matchedDictIds = reader.getDictIds("hello.*").toArray(); + Assert.assertEquals(2, matchedDictIds.length); + Assert.assertEquals(0, matchedDictIds[0]); + Assert.assertEquals(1, matchedDictIds[1]); + + matchedDictIds = reader.getDictIds(".*llo").toArray(); + Assert.assertEquals(0, matchedDictIds.length); + } + } +} diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java index 744f0bc..e037544 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java @@ -44,7 +44,6 @@ public class V1Constants { public static final String BITMAP_INVERTED_INDEX_FILE_EXTENSION = ".bitmap.inv"; public static final String BITMAP_RANGE_INDEX_FILE_EXTENSION = ".bitmap.range"; public static final String FST_INDEX_FILE_EXTENSION = ".lucene.fst"; - public static final String NATIVE_FST_INDEX_FILE_EXTENSION = ".native.fst"; public static final String JSON_INDEX_FILE_EXTENSION = ".json.idx"; public static final String H3_INDEX_FILE_EXTENSION = ".h3.idx"; public static final String BLOOM_FILTER_FILE_EXTENSION = ".bloom"; diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java index d1b16d7..0ed1d60 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java @@ -38,6 +38,7 @@ import org.apache.pinot.segment.spi.creator.name.FixedSegmentNameGenerator; import org.apache.pinot.segment.spi.creator.name.SegmentNameGenerator; import org.apache.pinot.segment.spi.creator.name.SimpleSegmentNameGenerator; import org.apache.pinot.segment.spi.index.creator.H3IndexConfig; +import org.apache.pinot.spi.config.table.FSTType; import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.IndexingConfig; import org.apache.pinot.spi.config.table.SegmentPartitionConfig; @@ -91,6 +92,7 @@ public class SegmentGeneratorConfig implements Serializable { private String _segmentEndTime = null; private SegmentVersion _segmentVersion = SegmentVersion.v3; private Schema _schema = null; + private FSTType _fstTypeForFSTIndex = FSTType.LUCENE; private RecordReaderConfig _readerConfig = null; private List<StarTreeIndexConfig> _starTreeIndexConfigs = null; private boolean _enableDefaultStarTree = false; @@ -194,6 +196,8 @@ public class SegmentGeneratorConfig implements Serializable { extractH3IndexConfigsFromTableConfig(tableConfig); extractCompressionCodecConfigsFromTableConfig(tableConfig); + _fstTypeForFSTIndex = tableConfig.getIndexingConfig().getFSTIndexType(); + _nullHandlingEnabled = indexingConfig.isNullHandlingEnabled(); } } @@ -503,6 +507,14 @@ public class SegmentGeneratorConfig implements Serializable { return _sequenceId; } + public void setFSTIndexType(FSTType fstType) { + _fstTypeForFSTIndex = fstType; + } + + public FSTType getFSTIndexType() { + return _fstTypeForFSTIndex; + } + /** * This method should be used instead of setPostfix if you are adding a sequence number. */ diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FSTType.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FSTType.java new file mode 100644 index 0000000..8f20390 --- /dev/null +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FSTType.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.config.table; + +/** + * Type of FST to be used + */ +public enum FSTType { + LUCENE, NATIVE +} diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java index 07219d2..4d32b67 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java @@ -41,6 +41,7 @@ public class IndexingConfig extends BaseJsonConfig { @Deprecated // Moved to {@link IngestionConfig#getStreamIngestionConfig} private Map<String, String> _streamConfigs; private String _segmentFormatVersion; + private FSTType _fstTypeForFSTIndex; private String _columnMinMaxValueGeneratorMode; private List<String> _noDictionaryColumns; // TODO: replace this with noDictionaryConfig. private Map<String, String> _noDictionaryConfig; @@ -86,6 +87,14 @@ public class IndexingConfig extends BaseJsonConfig { return _rangeIndexVersion; } + public void setFSTIndexType(FSTType fstType) { + _fstTypeForFSTIndex = fstType; + } + + public FSTType getFSTIndexType() { + return _fstTypeForFSTIndex; + } + public void setRangeIndexVersion(int rangeIndexVersion) { _rangeIndexVersion = rangeIndexVersion; } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org