This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new c8b223f1fc Upgrade lucene to 9.10.0 and compatibility changes to code. (#12866) c8b223f1fc is described below commit c8b223f1fc2ab8df958175f8ae0c4b17902a2061 Author: Abhishek Sharma <abhishek.sha...@spothero.com> AuthorDate: Wed May 1 16:17:48 2024 -0400 Upgrade lucene to 9.10.0 and compatibility changes to code. (#12866) --- .../realtime/impl/vector/MutableVectorIndex.java | 2 +- .../impl/inv/text/LuceneFSTIndexCreator.java | 2 +- .../creator/impl/text/LuceneTextIndexCreator.java | 4 +-- .../impl/vector/HnswVectorIndexCreator.java | 3 +- .../vector/{lucene95 => lucene99}/HnswCodec.java | 14 ++++----- .../{lucene95 => lucene99}/HnswVectorsFormat.java | 10 +++--- .../converter/SegmentV1V2ToV3FormatConverter.java | 8 ++--- .../local/segment/index/fst/FstIndexType.java | 3 +- .../loader/invertedindex/FSTIndexHandler.java | 4 +-- .../loader/invertedindex/VectorIndexHandler.java | 5 +-- .../index/readers/LuceneFSTIndexReader.java | 3 +- .../local/segment/index/text/TextIndexType.java | 3 +- .../segment/index/vector/VectorIndexType.java | 3 +- .../local/segment/store/TextIndexUtils.java | 5 ++- .../local/segment/store/VectorIndexUtils.java | 24 +++++++++------ .../pinot/segment/local/utils/fst/FSTBuilder.java | 13 +++++--- .../utils/nativefst/NativeFSTIndexCreator.java | 2 +- .../index/creator/LuceneFSTIndexCreatorTest.java | 4 +-- .../index/creator/NativeFSTIndexCreatorTest.java | 4 +-- .../local/segment/index/loader/LoaderTest.java | 36 +++++++++++----------- .../index/loader/SegmentPreProcessorTest.java | 4 +-- .../segment/store/FilePerIndexDirectoryTest.java | 4 +-- .../store/SingleFileIndexDirectoryTest.java | 4 +-- .../segment/local/utils/fst/FSTBuilderTest.java | 2 -- .../org/apache/pinot/segment/spi/V1Constants.java | 4 +++ .../segment/spi/store/SegmentDirectoryPaths.java | 26 +++++++++++++--- pom.xml | 2 +- 27 files changed, 117 insertions(+), 81 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java index a591650be4..47329a1c2a 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java @@ -83,7 +83,7 @@ public class MutableVectorIndex implements VectorIndexReader, MutableIndex { // to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig. _indexDir = new File(FileUtils.getTempDirectory(), segmentName); _indexDirectory = FSDirectory.open( - new File(_indexDir, _vectorColumn + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION).toPath()); + new File(_indexDir, _vectorColumn + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION).toPath()); LOGGER.info("Creating mutable HNSW index for segment: {}, column: {} at path: {} with {}", segmentName, vectorColumn, _indexDir.getAbsolutePath(), vectorIndexConfig.getProperties()); _indexWriter = new IndexWriter(_indexDirectory, VectorIndexUtils.getIndexWriterConfig(vectorIndexConfig)); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java index 60b903739b..2e51c19096 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java @@ -55,7 +55,7 @@ public class LuceneFSTIndexCreator implements FSTIndexCreator { */ public LuceneFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) throws IOException { - _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); _fstBuilder = new FSTBuilder(); _dictId = 0; diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java index 49306d9404..2cdbf13f6a 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java @@ -330,7 +330,7 @@ public class LuceneTextIndexCreator extends AbstractTextIndexCreator { } private File getV1TextIndexFile(File indexDir) { - String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; return new File(indexDir, luceneIndexDirectory); } @@ -339,7 +339,7 @@ public class LuceneTextIndexCreator extends AbstractTextIndexCreator { String tmpSegmentName = indexDir.getParentFile().getName(); String segmentName = tmpSegmentName.substring(tmpSegmentName.indexOf("tmp-") + 4, tmpSegmentName.lastIndexOf('-')); String mutableDir = indexDir.getParentFile().getParentFile().getParent() + "/consumers/" + segmentName + "/" - + _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + + _textColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; return new File(mutableDir); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java index d13b450397..c1f5cbb0b5 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java @@ -61,7 +61,8 @@ public class HnswVectorIndexCreator implements VectorIndexCreator { try { // segment generation is always in V1 and later we convert (as part of post creation processing) // to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig. - File indexFile = new File(segmentIndexDir, _vectorColumn + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + File indexFile = new File(segmentIndexDir, _vectorColumn + + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); _indexDirectory = FSDirectory.open(indexFile.toPath()); LOGGER.info("Creating HNSW index for column: {} at path: {} with {} for segment: {}", column, indexFile.getAbsolutePath(), vectorIndexConfig.getProperties(), segmentIndexDir.getAbsolutePath()); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java similarity index 92% rename from pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java rename to pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java index ee7cf560df..bfcfcff5ac 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java @@ -16,8 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95; +package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99; +import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; @@ -25,8 +26,7 @@ import org.apache.lucene.codecs.PointsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat; -import org.apache.lucene.codecs.lucene95.Lucene95Codec; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; @@ -36,7 +36,7 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; * Extend the Lucene 9.5 index format * The major change here is to allow custom: @link{org.apache.lucene.codecs.KnnVectorsFormat} * - * @see org.apache.lucene.codecs.lucene95 package documentation for file format details. + * @see org.apache.lucene.codecs.lucene99 package documentation for file format details. */ public class HnswCodec extends FilterCodec { @@ -73,8 +73,8 @@ public class HnswCodec extends FilterCodec { * * @param mode stored fields compression mode to use for newly flushed/merged segments. */ - public HnswCodec(Lucene95Codec.Mode mode, KnnVectorsFormat defaultKnnVectorsFormat) { - super("Lucene95", new Lucene95Codec(mode)); + public HnswCodec(Lucene99Codec.Mode mode, KnnVectorsFormat defaultKnnVectorsFormat) { + super("Lucene99", new Lucene99Codec(mode)); _defaultKnnVectorsFormat = defaultKnnVectorsFormat; _defaultPostingsFormat = new Lucene90PostingsFormat(); _defaultDVFormat = new Lucene90DocValuesFormat(); @@ -123,7 +123,7 @@ public class HnswCodec extends FilterCodec { /** * Returns the vectors format that should be used for writing new segments of <code>field</code> * - * <p>The default implementation always returns "Lucene95". + * <p>The default implementation always returns "Lucene99". * * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility: * future version of Lucene are only guaranteed to be able to read the default implementation. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java similarity index 92% rename from pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java rename to pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java index b3f356c51e..2ba2781445 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java @@ -16,13 +16,13 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95; +package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99; import java.io.IOException; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; -import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.hnsw.HnswGraph; @@ -41,7 +41,7 @@ public final class HnswVectorsFormat extends KnnVectorsFormat { public static final int DEFAULT_MAX_DIMENSIONS = 2048; private final int _maxDimensions; - private final Lucene95HnswVectorsFormat _delegate; + private final Lucene99HnswVectorsFormat _delegate; /** * Constructs a format using the given graph construction parameters. @@ -51,7 +51,7 @@ public final class HnswVectorsFormat extends KnnVectorsFormat { * @param maxDimensions the maximum number of dimensions supported by this format */ public HnswVectorsFormat(int maxConn, int beamWidth, int maxDimensions) { - super("Lucene95HnswVectorsFormat"); + super("Lucene99HnswVectorsFormat"); if (maxDimensions <= 0 || maxDimensions > DEFAULT_MAX_DIMENSIONS) { throw new IllegalArgumentException( "maxDimensions must be postive and less than or equal to" @@ -59,7 +59,7 @@ public final class HnswVectorsFormat extends KnnVectorsFormat { + "; maxDimensions=" + maxDimensions); } - _delegate = new Lucene95HnswVectorsFormat(maxConn, beamWidth); + _delegate = new Lucene99HnswVectorsFormat(maxConn, beamWidth); _maxDimensions = maxDimensions; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java index ece4daf604..0e84a4b6ad 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java @@ -110,10 +110,10 @@ public class SegmentV1V2ToV3FormatConverter implements SegmentFormatConverter { if (file.isFile() && file.exists()) { FileUtils.deleteQuietly(file); } - if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION)) { + if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION)) { FileUtils.deleteDirectory(file); } - if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION)) { + if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION)) { FileUtils.deleteDirectory(file); } } @@ -226,7 +226,7 @@ public class SegmentV1V2ToV3FormatConverter implements SegmentFormatConverter { private void copyLuceneTextIndexIfExists(File segmentDirectory, File v3Dir) throws IOException { // TODO: see if this can be done by reusing some existing methods - String suffix = V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String suffix = V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; File[] textIndexFiles = segmentDirectory.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { @@ -263,7 +263,7 @@ public class SegmentV1V2ToV3FormatConverter implements SegmentFormatConverter { private void copyVectorIndexIfExists(File segmentDirectory, File v3Dir) throws IOException { // TODO: see if this can be done by reusing some existing methods - String suffix = V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; + String suffix = V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION; File[] vectorIndexFiles = segmentDirectory.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java index d04ce7bc97..83e755f734 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java @@ -65,7 +65,8 @@ public class FstIndexType extends AbstractIndexType<FstIndexConfig, TextIndexRea public static final String INDEX_DISPLAY_NAME = "fst"; private static final List<String> EXTENSIONS = ImmutableList.of(V1Constants.Indexes.LUCENE_FST_INDEX_FILE_EXTENSION, - V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION, + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); protected FstIndexType() { super(StandardIndexes.FST_ID); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java index 778e92db0a..b9d9c5096d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java @@ -44,7 +44,7 @@ import org.apache.pinot.spi.data.FieldSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; /** @@ -157,7 +157,7 @@ public class FSTIndexHandler extends BaseIndexHandler { String segmentName = _segmentDirectory.getSegmentMetadata().getName(); String columnName = columnMetadata.getColumnName(); File inProgress = new File(indexDir, columnName + ".fst.inprogress"); - File fstIndexFile = new File(indexDir, columnName + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstIndexFile = new File(indexDir, columnName + LUCENE_V99_FST_INDEX_FILE_EXTENSION); if (!inProgress.exists()) { // Create a marker file. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java index 584d4be1c3..b3e5d2dfc3 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java @@ -115,9 +115,10 @@ public class VectorIndexHandler extends BaseIndexHandler { String columnName = columnMetadata.getColumnName(); File inProgress = - new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION + ".inprogress"); + new File(segmentDirectory, columnName + + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION + ".inprogress"); File vectorIndexFile = - new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); if (!inProgress.exists()) { // Marker file does not exist, which means last run ended normally. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java index 6e579562c4..3bf2c2a601 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java @@ -52,7 +52,8 @@ public class LuceneFSTIndexReader implements TextIndexReader { _dataBufferIndexInput = new PinotBufferIndexInput(_dataBuffer, 0L, _dataBuffer.size()); _readFST = - new FST(_dataBufferIndexInput, _dataBufferIndexInput, PositiveIntOutputs.getSingleton(), new OffHeapFSTStore()); + new FST<>(FST.readMetadata(_dataBufferIndexInput, PositiveIntOutputs.getSingleton()), + _dataBufferIndexInput, new OffHeapFSTStore()); } @Override diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java index 596380d81b..cfbf6271f1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java @@ -75,7 +75,8 @@ public class TextIndexType extends AbstractIndexType<TextIndexConfig, TextIndexR private static final List<String> EXTENSIONS = Lists.newArrayList( V1Constants.Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION, V1Constants.Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION, - V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION, + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION ); protected TextIndexType() { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java index 59faf4c880..cb228b81aa 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java @@ -129,7 +129,8 @@ public class VectorIndexType extends AbstractIndexType<VectorIndexConfig, Vector @Override public List<String> getFileExtensions(@Nullable ColumnMetadata columnMetadata) { - return List.of(V1Constants.Indexes.VECTOR_INDEX_FILE_EXTENSION); + return List.of(V1Constants.Indexes.VECTOR_INDEX_FILE_EXTENSION, + V1Constants.Indexes.VECTOR_V99_INDEX_FILE_EXTENSION); } private static class ReaderFactory implements IndexReaderFactory<VectorIndexReader> { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java index caa47adff7..0c2369bdd8 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java @@ -49,6 +49,8 @@ public class TextIndexUtils { FileUtils.deleteQuietly(luceneMappingFile); File luceneV9IndexFile = new File(segDir, column + Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(luceneV9IndexFile); + File luceneV99IndexFile = new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(luceneV99IndexFile); File luceneV9MappingFile = new File(segDir, column + Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); FileUtils.deleteQuietly(luceneV9MappingFile); @@ -61,7 +63,8 @@ public class TextIndexUtils { //@formatter:off return new File(segDir, column + Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION).exists() || new File(segDir, column + Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists() - || new File(segDir, column + Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION).exists(); + || new File(segDir, column + Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION).exists() + || new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists(); //@formatter:on } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java index 15de36cf5f..698adcb318 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java @@ -20,12 +20,12 @@ package org.apache.pinot.segment.local.segment.store; import java.io.File; import org.apache.commons.io.FileUtils; -import org.apache.lucene.codecs.lucene95.Lucene95Codec; -import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95.HnswCodec; -import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95.HnswVectorsFormat; +import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99.HnswCodec; +import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99.HnswVectorsFormat; import org.apache.pinot.segment.spi.V1Constants.Indexes; import org.apache.pinot.segment.spi.index.creator.VectorIndexConfig; @@ -38,17 +38,21 @@ public class VectorIndexUtils { // Remove the lucene index file and potentially the docId mapping file. File luceneIndexFile = new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(luceneIndexFile); + File luceneV99IndexFile = new File(segDir, column + Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(luceneV99IndexFile); File luceneMappingFile = new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION); FileUtils.deleteQuietly(luceneMappingFile); // Remove the native index file File nativeIndexFile = new File(segDir, column + Indexes.VECTOR_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(nativeIndexFile); + File nativeV99IndexFile = new File(segDir, column + Indexes.VECTOR_V99_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(nativeV99IndexFile); } static boolean hasVectorIndex(File segDir, String column) { - return new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION).exists() || new File(segDir, - column + Indexes.VECTOR_INDEX_FILE_EXTENSION).exists(); + return new File(segDir, column + Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION).exists() || new File(segDir, + column + Indexes.VECTOR_V99_INDEX_FILE_EXTENSION).exists(); } public static VectorSimilarityFunction toSimilarityFunction( @@ -81,17 +85,17 @@ public class VectorIndexUtils { indexWriterConfig.setUseCompoundFile(useCompoundFile); int maxCon = Integer.parseInt(vectorIndexConfig.getProperties() - .getOrDefault("maxCon", String.valueOf(Lucene95HnswVectorsFormat.DEFAULT_MAX_CONN))); + .getOrDefault("maxCon", String.valueOf(Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN))); int beamWidth = Integer.parseInt(vectorIndexConfig.getProperties() - .getOrDefault("beamWidth", String.valueOf(Lucene95HnswVectorsFormat.DEFAULT_BEAM_WIDTH))); + .getOrDefault("beamWidth", String.valueOf(Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH))); int maxDimensions = Integer.parseInt(vectorIndexConfig.getProperties() .getOrDefault("maxDimensions", String.valueOf(HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS))); HnswVectorsFormat knnVectorsFormat = new HnswVectorsFormat(maxCon, beamWidth, maxDimensions); - Lucene95Codec.Mode mode = Lucene95Codec.Mode.valueOf(vectorIndexConfig.getProperties() - .getOrDefault("mode", Lucene95Codec.Mode.BEST_SPEED.name())); + Lucene99Codec.Mode mode = Lucene99Codec.Mode.valueOf(vectorIndexConfig.getProperties() + .getOrDefault("mode", Lucene99Codec.Mode.BEST_SPEED.name())); indexWriterConfig.setCodec(new HnswCodec(mode, knnVectorsFormat)); return indexWriterConfig; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java index 0a4596d173..a64f781758 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java @@ -36,28 +36,31 @@ import org.slf4j.LoggerFactory; */ public class FSTBuilder { public static final Logger LOGGER = LoggerFactory.getLogger(FSTBuilder.class); - private final FSTCompiler<Long> _builder = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton()); + private final FSTCompiler<Long> _fstCompiler = + (new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton())).build(); private final IntsRefBuilder _scratch = new IntsRefBuilder(); public static FST<Long> buildFST(SortedMap<String, Integer> input) throws IOException { PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(); - FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, fstOutput); + FSTCompiler.Builder<Long> fstCompilerBuilder = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, fstOutput); + FSTCompiler<Long> fstCompiler = fstCompilerBuilder.build(); IntsRefBuilder scratch = new IntsRefBuilder(); for (Map.Entry<String, Integer> entry : input.entrySet()) { fstCompiler.add(Util.toUTF16(entry.getKey(), scratch), entry.getValue().longValue()); } - return fstCompiler.compile(); + + return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } public void addEntry(String key, Integer value) throws IOException { - _builder.add(Util.toUTF16(key, _scratch), value.longValue()); + _fstCompiler.add(Util.toUTF16(key, _scratch), value.longValue()); } public FST<Long> done() throws IOException { - return _builder.compile(); + return FST.fromFSTReader(_fstCompiler.compile(), _fstCompiler.getFSTReader()); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java index 1f69bc50f1..933106b4e8 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java @@ -47,7 +47,7 @@ public class NativeFSTIndexCreator implements FSTIndexCreator { * @param sortedEntries Sorted entries of the unique values of the column. */ public NativeFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) { - _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); _fstBuilder = new FSTBuilder(); _dictId = 0; diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java index b9c568000e..e0e6168c9a 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java @@ -32,7 +32,7 @@ import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; public class LuceneFSTIndexCreatorTest { @@ -62,7 +62,7 @@ public class LuceneFSTIndexCreatorTest { LuceneFSTIndexCreator creator = new LuceneFSTIndexCreator( INDEX_DIR, "testFSTColumn", uniqueValues); creator.seal(); - File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V99_FST_INDEX_FILE_EXTENSION); PinotDataBuffer pinotDataBuffer = PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, "fstIndexFile"); LuceneFSTIndexReader reader = new LuceneFSTIndexReader(pinotDataBuffer); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java index d77268ef28..f98324af58 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java @@ -29,7 +29,7 @@ import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; public class NativeFSTIndexCreatorTest { @@ -59,7 +59,7 @@ public class NativeFSTIndexCreatorTest { creator.seal(); } - File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V99_FST_INDEX_FILE_EXTENSION); try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(fstFile); NativeFSTIndexReader reader = new NativeFSTIndexReader(dataBuffer)) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java index 87a1e5db25..98dff135d5 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java @@ -65,7 +65,7 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import org.testng.collections.Lists; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -341,7 +341,7 @@ public class LoaderTest { fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -360,7 +360,7 @@ public class LoaderTest { fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -377,7 +377,7 @@ public class LoaderTest { fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -590,7 +590,7 @@ public class LoaderTest { Assert.assertNotNull(textIndexFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -616,7 +616,7 @@ public class LoaderTest { Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -644,7 +644,7 @@ public class LoaderTest { Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -671,7 +671,7 @@ public class LoaderTest { Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -694,7 +694,7 @@ public class LoaderTest { Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -720,7 +720,7 @@ public class LoaderTest { Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -746,7 +746,7 @@ public class LoaderTest { Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -776,7 +776,7 @@ public class LoaderTest { Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -801,7 +801,7 @@ public class LoaderTest { Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); @@ -821,7 +821,7 @@ public class LoaderTest { Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); @@ -843,7 +843,7 @@ public class LoaderTest { Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -867,7 +867,7 @@ public class LoaderTest { Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -886,7 +886,7 @@ public class LoaderTest { Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -905,7 +905,7 @@ public class LoaderTest { Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java index 3349821963..acdc679256 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java @@ -1441,8 +1441,8 @@ public class SegmentPreProcessorTest { // V1 use separate file for each column index. File iiFile = new File(_indexDir, strColumn + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION); File rgFile = new File(_indexDir, strColumn + V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION); - File txtFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); - File fstFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File txtFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); + File fstFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); File bfFile = new File(_indexDir, strColumn + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION); assertFalse(iiFile.exists()); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java index 38eae8436e..a385a60b03 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java @@ -231,11 +231,11 @@ public class FilePerIndexDirectoryTest { // Both files for TextIndex should be removed. fpi.removeIndex("foo", StandardIndexes.text()); - assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertFalse( new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); } - assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); // Read indices back and check the content. diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java index 7f0dcebb05..3a94ceec11 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java @@ -264,11 +264,11 @@ public class SingleFileIndexDirectoryTest { // Both files for TextIndex should be removed. sfd.removeIndex("foo", StandardIndexes.text()); - assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertFalse( new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); } - assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertTrue( new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java index 493e7b3449..edee3ebef2 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java @@ -28,7 +28,6 @@ import java.util.TreeMap; import org.apache.commons.io.FileUtils; import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.OffHeapFSTStore; import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.pinot.segment.spi.memory.PinotDataBuffer; @@ -78,7 +77,6 @@ public class FSTBuilderTest { PinotDataBuffer pinotDataBuffer = PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, ""); PinotBufferIndexInput indexInput = new PinotBufferIndexInput(pinotDataBuffer, 0L, fstFile.length()); - FST<Long> readFST = new FST(indexInput, indexInput, outputs, new OffHeapFSTStore()); List<Long> results = RegexpMatcher.regexMatch("hello.*123", fst); Assert.assertEquals(results.size(), 1); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java index 25ded5fa30..8827329a7b 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java @@ -54,8 +54,12 @@ public class V1Constants { public static final String LUCENE_TEXT_INDEX_FILE_EXTENSION = ".lucene.index"; public static final String LUCENE_V9_FST_INDEX_FILE_EXTENSION = ".lucene.v9.fst"; public static final String LUCENE_V9_TEXT_INDEX_FILE_EXTENSION = ".lucene.v9.index"; + public static final String LUCENE_V99_FST_INDEX_FILE_EXTENSION = ".lucene.v99.fst"; + public static final String LUCENE_V99_TEXT_INDEX_FILE_EXTENSION = ".lucene.v99.index"; public static final String VECTOR_INDEX_FILE_EXTENSION = ".vector.index"; public static final String VECTOR_HNSW_INDEX_FILE_EXTENSION = ".vector.hnsw.index"; + public static final String VECTOR_V99_INDEX_FILE_EXTENSION = ".vector.v99.index"; + public static final String VECTOR_V99_HNSW_INDEX_FILE_EXTENSION = ".vector.v99.hnsw.index"; public static final String VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".vector.hnsw.mapping"; } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java index b8c09a0329..c873ab7e03 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java @@ -79,8 +79,14 @@ public class SegmentDirectoryPaths { */ @Nullable public static File findTextIndexIndexFile(File indexDir, String column) { - String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; File indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); + // check for V9 version, if null + if (indexFormatFile == null) { + luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); + } + // check for old version, if null if (indexFormatFile == null) { luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION; indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); @@ -101,8 +107,14 @@ public class SegmentDirectoryPaths { } public static File findFSTIndexIndexFile(File indexDir, String column) { - String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; File formatFile = findFormatFile(indexDir, luceneIndexDirectory); + // check for V9 version, if null + if (formatFile == null) { + luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; + formatFile = findFormatFile(indexDir, luceneIndexDirectory); + } + // check for old version, if null if (formatFile == null) { luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_FST_INDEX_FILE_EXTENSION; formatFile = findFormatFile(indexDir, luceneIndexDirectory); @@ -120,8 +132,14 @@ public class SegmentDirectoryPaths { @Nullable @VisibleForTesting public static File findVectorIndexIndexFile(File segmentIndexDir, String column) { - String vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; - return findFormatFile(segmentIndexDir, vectorIndexDirectory); + String vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION; + File formatFile = findFormatFile(segmentIndexDir, vectorIndexDirectory); + + if (formatFile == null) { + vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; + formatFile = findFormatFile(segmentIndexDir, vectorIndexDirectory); + } + return formatFile; } /** diff --git a/pom.xml b/pom.xml index e3d1f26a9b..1d7ea32456 100644 --- a/pom.xml +++ b/pom.xml @@ -153,7 +153,7 @@ <jsonsmart.version>2.5.1</jsonsmart.version> <quartz.version>2.3.2</quartz.version> <calcite.version>1.36.0</calcite.version> - <lucene.version>9.8.0</lucene.version> + <lucene.version>9.10.0</lucene.version> <reflections.version>0.10.2</reflections.version> <dynatrace.hash4j.version>0.17.0</dynatrace.hash4j.version> <!-- helix-core, spark-core use libraries from io.dropwizard.metrics --> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org