[GitHub] [pinot] gortiz commented on a diff in pull request #10184: Complete index spi

via GitHub Wed, 29 Mar 2023 01:35:42 -0700


gortiz commented on code in PR #10184:
URL: https://github.com/apache/pinot/pull/10184#discussion_r1151581410



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java:
##########
@@ -19,74 +19,152 @@
 
 package org.apache.pinot.segment.local.segment.index.fst;
 
+import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.util.HashMap;
 import java.util.Map;
+import java.util.Set;
 import javax.annotation.Nullable;
+import 
org.apache.pinot.segment.local.segment.creator.impl.inv.text.LuceneFSTIndexCreator;
+import 
org.apache.pinot.segment.local.segment.index.loader.ConfigurableFromIndexLoadingConfig;
+import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
+import 
org.apache.pinot.segment.local.segment.index.loader.invertedindex.FSTIndexHandler;
+import 
org.apache.pinot.segment.local.segment.index.readers.LuceneFSTIndexReader;
+import org.apache.pinot.segment.local.utils.nativefst.FSTHeader;
+import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexCreator;
+import org.apache.pinot.segment.local.utils.nativefst.NativeFSTIndexReader;
 import org.apache.pinot.segment.spi.ColumnMetadata;
 import org.apache.pinot.segment.spi.V1Constants;
 import org.apache.pinot.segment.spi.creator.IndexCreationContext;
+import org.apache.pinot.segment.spi.index.AbstractIndexType;
+import org.apache.pinot.segment.spi.index.ColumnConfigDeserializer;
 import org.apache.pinot.segment.spi.index.FieldIndexConfigs;
-import org.apache.pinot.segment.spi.index.IndexCreator;
+import org.apache.pinot.segment.spi.index.FstIndexConfig;
+import org.apache.pinot.segment.spi.index.IndexConfigDeserializer;
 import org.apache.pinot.segment.spi.index.IndexHandler;
-import org.apache.pinot.segment.spi.index.IndexReader;
+import org.apache.pinot.segment.spi.index.IndexReaderConstraintException;
 import org.apache.pinot.segment.spi.index.IndexReaderFactory;
 import org.apache.pinot.segment.spi.index.IndexType;
 import org.apache.pinot.segment.spi.index.StandardIndexes;
+import org.apache.pinot.segment.spi.index.creator.FSTIndexCreator;
+import org.apache.pinot.segment.spi.index.reader.TextIndexReader;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
 import org.apache.pinot.segment.spi.store.SegmentDirectory;
-import org.apache.pinot.spi.config.table.IndexConfig;
+import org.apache.pinot.spi.config.table.FSTType;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.IndexingConfig;
 import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.Schema;
 
 
-public class FstIndexType implements IndexType<IndexConfig, IndexReader, 
IndexCreator> {
-  public static final FstIndexType INSTANCE = new FstIndexType();
+public class FstIndexType extends AbstractIndexType<FstIndexConfig, 
TextIndexReader, FSTIndexCreator>
+    implements ConfigurableFromIndexLoadingConfig<FstIndexConfig> {
 
-  private FstIndexType() {
+  protected FstIndexType() {
+    super(StandardIndexes.FST_ID);
   }
 
   @Override
-  public String getId() {
-    return StandardIndexes.FST_ID;
+  public Class<FstIndexConfig> getIndexConfigClass() {
+    return FstIndexConfig.class;
   }
 
   @Override
-  public Class<IndexConfig> getIndexConfigClass() {
-    return IndexConfig.class;
+  public Map<String, FstIndexConfig> fromIndexLoadingConfig(IndexLoadingConfig 
indexLoadingConfig) {
+    Map<String, FstIndexConfig> result = new HashMap<>();
+    Set<String> fstIndexColumns = indexLoadingConfig.getFSTIndexColumns();
+    for (String column : indexLoadingConfig.getAllKnownColumns()) {
+      if (fstIndexColumns.contains(column)) {
+        FstIndexConfig conf = new 
FstIndexConfig(indexLoadingConfig.getFSTIndexType());
+        result.put(column, conf);
+      } else {
+        result.put(column, FstIndexConfig.DISABLED);
+      }
+    }
+    return result;
   }
 
   @Override
-  public IndexConfig getDefaultConfig() {
-    return IndexConfig.DISABLED;
+  public FstIndexConfig getDefaultConfig() {
+    return FstIndexConfig.DISABLED;
   }
 
   @Override
-  public IndexConfig getConfig(TableConfig tableConfig, Schema schema) {
-    throw new UnsupportedOperationException();
+  public ColumnConfigDeserializer<FstIndexConfig> getDeserializer() {
+    return IndexConfigDeserializer.fromIndexes("fst", getIndexConfigClass())
+        .withExclusiveAlternative(IndexConfigDeserializer.fromIndexTypes(
+            FieldConfig.IndexType.FST,
+            (tableConfig, fieldConfig) -> {
+              IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
+              FSTType fstIndexType = indexingConfig != null ? 
indexingConfig.getFSTIndexType() : null;
+              return new FstIndexConfig(fstIndexType);
+            }));
   }
 
   @Override
-  public IndexCreator createIndexCreator(IndexCreationContext context, 
IndexConfig indexConfig)
-      throws Exception {
-    throw new UnsupportedOperationException();
+  public FSTIndexCreator createIndexCreator(IndexCreationContext context, 
FstIndexConfig indexConfig)
+      throws IOException {
+    Preconditions.checkState(context.getFieldSpec().isSingleValueField(),
+        "FST index is currently only supported on single-value columns");
+    
Preconditions.checkState(context.getFieldSpec().getDataType().getStoredType() 
== FieldSpec.DataType.STRING,
+        "FST index is currently only supported on STRING type columns");
+    Preconditions.checkState(context.hasDictionary(),
+        "FST index is currently only supported on dictionary-encoded columns");
+    if (indexConfig.getFstType() == FSTType.NATIVE) {
+      return new NativeFSTIndexCreator(context);
+    } else {
+      return new LuceneFSTIndexCreator(context);
+    }
   }
 
   @Override
-  public IndexReaderFactory<IndexReader> getReaderFactory() {
-    throw new UnsupportedOperationException();
+  public ReaderFactory getReaderFactory() {
+    return ReaderFactory.INSTANCE;
+  }
+
+  public static TextIndexReader read(PinotDataBuffer dataBuffer, 
ColumnMetadata metadata)
+      throws IndexReaderConstraintException, IOException {
+    return ReaderFactory.INSTANCE.createIndexReader(dataBuffer, metadata);
   }
 
   @Override
   public IndexHandler createIndexHandler(SegmentDirectory segmentDirectory, 
Map<String, FieldIndexConfigs> configsByCol,
       @Nullable Schema schema, @Nullable TableConfig tableConfig) {
-    throw new UnsupportedOperationException();
+    return new FSTIndexHandler(segmentDirectory, configsByCol, tableConfig);
   }
 
   @Override
   public String getFileExtension(ColumnMetadata columnMetadata) {
     return V1Constants.Indexes.FST_INDEX_FILE_EXTENSION;
   }
 
-  @Override
-  public String toString() {
-    return getId();
+  public static class ReaderFactory extends 
IndexReaderFactory.Default<FstIndexConfig, TextIndexReader> {
+    public static final ReaderFactory INSTANCE = new ReaderFactory();
+    @Override
+    protected IndexType<FstIndexConfig, TextIndexReader, ?> getIndexType() {
+      return StandardIndexes.fst();
+    }
+
+    protected TextIndexReader createIndexReader(PinotDataBuffer dataBuffer, 
ColumnMetadata metadata)
+        throws IndexReaderConstraintException, IOException {
+      if (!metadata.hasDictionary()) {
+        throw new IndexReaderConstraintException(metadata.getColumnName(), 
StandardIndexes.fst(),
+            "This index requires a dictionary");

Review Comment:
   FST index types only work with dictionary encoded columns. I've added this 
check as something we should be checking even if we didn't.
   
   By doing so, if for some reason the code is trying to get a fst index reader 
from a raw column, we are going to have an exception earlier (when the reader 
is created instead the first time `TextIndexReader.getDocIds` is called), which 
is better.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

[GitHub] [pinot] gortiz commented on a diff in pull request #10184: Complete index spi

Reply via email to