xiangfu0 commented on code in PR #17032:
URL: https://github.com/apache/pinot/pull/17032#discussion_r2449873127
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java:
##########
@@ -75,15 +77,26 @@ public SegmentLocalFSDirectory(File directory) {
_segmentDirectory = null;
_segmentLock = new SegmentLock();
_readMode = null;
+ _segmentDirectoryLoaderContext = null;
}
-
public SegmentLocalFSDirectory(File directory, ReadMode readMode)
throws IOException, ConfigurationException {
- this(directory, new SegmentMetadataImpl(directory), readMode);
+ this(directory, new SegmentMetadataImpl(directory), null, readMode);
+ }
+ public SegmentLocalFSDirectory(File directory, SegmentDirectoryLoaderContext
segmentDirectoryLoaderContext,
+ ReadMode readMode)
+ throws IOException, ConfigurationException {
+ this(directory, new SegmentMetadataImpl(directory),
segmentDirectoryLoaderContext, readMode);
+ }
Review Comment:
nit: newline
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java:
##########
@@ -75,15 +77,26 @@ public SegmentLocalFSDirectory(File directory) {
_segmentDirectory = null;
_segmentLock = new SegmentLock();
_readMode = null;
+ _segmentDirectoryLoaderContext = null;
}
-
public SegmentLocalFSDirectory(File directory, ReadMode readMode)
throws IOException, ConfigurationException {
- this(directory, new SegmentMetadataImpl(directory), readMode);
+ this(directory, new SegmentMetadataImpl(directory), null, readMode);
+ }
+ public SegmentLocalFSDirectory(File directory, SegmentDirectoryLoaderContext
segmentDirectoryLoaderContext,
+ ReadMode readMode)
+ throws IOException, ConfigurationException {
+ this(directory, new SegmentMetadataImpl(directory),
segmentDirectoryLoaderContext, readMode);
+ }
+ public SegmentLocalFSDirectory(File directory, SegmentMetadataImpl metadata,
ReadMode readMode)
+ throws IOException, ConfigurationException {
+ this(directory, metadata, null, readMode);
}
@VisibleForTesting
- public SegmentLocalFSDirectory(File directoryFile, SegmentMetadataImpl
metadata, ReadMode readMode) {
+ public SegmentLocalFSDirectory(File directoryFile, SegmentMetadataImpl
metadata,
+ SegmentDirectoryLoaderContext segmentDirectoryLoaderContext, ReadMode
readMode) {
Review Comment:
```suggestion
@nullable SegmentDirectoryLoaderContext segmentDirectoryLoaderContext,
ReadMode readMode) {
```
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java:
##########
@@ -92,12 +96,18 @@ class SingleFileIndexDirectory extends ColumnIndexDirectory
{
// re-arranges the content in index file to keep it compact.
private boolean _shouldCleanupRemovedIndices;
+ public SingleFileIndexDirectory(File segmentDirectory, SegmentMetadataImpl
segmentMetadata, ReadMode readMode)
+ throws IOException, ConfigurationException {
+ this(segmentDirectory, segmentMetadata, null, readMode);
+ }
+
/**
* @param segmentDirectory File pointing to segment directory
* @param segmentMetadata segment metadata. Metadata must be fully
initialized
* @param readMode mmap vs heap mode
*/
- public SingleFileIndexDirectory(File segmentDirectory, SegmentMetadataImpl
segmentMetadata, ReadMode readMode)
+ public SingleFileIndexDirectory(File segmentDirectory, SegmentMetadataImpl
segmentMetadata,
+ SegmentDirectoryLoaderContext segmentDirectoryLoaderContext, ReadMode
readMode)
Review Comment:
```suggestion
@nullable SegmentDirectoryLoaderContext segmentDirectoryLoaderContext,
ReadMode readMode)
```
##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/RawValueBasedInvertedIndexCreator.java:
##########
@@ -59,4 +59,13 @@ public interface RawValueBasedInvertedIndexCreator extends
InvertedIndexCreator
* For multi-value column, adds the double values for the next document.
*/
void add(double[] values, int length);
+
+ default void add(String value) {
Review Comment:
Why put default implementation here?
##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/memory/EmptyIndexBuffer.java:
##########
@@ -0,0 +1,304 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.memory;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Properties;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+
+
+/**
+ * A specialized PinotDataBuffer implementation for zero-size index entries
that contains S3 segment path information.
+ * This buffer is useful for debugging and tracking purposes when dealing with
empty index entries.
+ */
+public class EmptyIndexBuffer extends PinotDataBuffer {
+ private final Properties _properties;
+ private final String _segmentName;
+ private final String _tableNameWithType;
+ private final String _segmentPath;
+
+ /**
+ * Creates a new S3EmptyIndexBuffer for a zero-size index entry
Review Comment:
```suggestion
* Creates a new EmptyIndexBuffer for a zero-size index entry
```
##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/memory/EmptyIndexBuffer.java:
##########
@@ -0,0 +1,304 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.memory;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Properties;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+
+
+/**
+ * A specialized PinotDataBuffer implementation for zero-size index entries
that contains S3 segment path information.
Review Comment:
```suggestion
* A specialized PinotDataBuffer implementation for zero-size index entries
that contains remote (S3, GCS, etc) segment path information.
```
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java:
##########
@@ -285,27 +299,30 @@ private static void convertSegmentFormat(File indexDir,
IndexLoadingConfig index
return;
}
String segmentName = indexDir.getName();
- LOGGER.info("Segment: {} needs to be converted from version: {} to {}",
segmentName, segmentVersionOnDisk,
- segmentVersionToLoad);
+ LOGGER.info("Segment: {} needs to be converted from version: {} to {}",
segmentName,
+ segmentVersionOnDisk, segmentVersionToLoad);
SegmentFormatConverter converter =
SegmentFormatConverterFactory.getConverter(segmentVersionOnDisk,
segmentVersionToLoad);
LOGGER.info("Using converter: {} to up-convert segment: {}",
converter.getClass().getSimpleName(), segmentName);
converter.convert(indexDir);
- LOGGER.info("Successfully up-converted segment: {} from version: {} to
{}", segmentName, segmentVersionOnDisk,
- segmentVersionToLoad);
+ LOGGER.info("Successfully up-converted segment: {} from version: {} to
{}", segmentName,
+ segmentVersionOnDisk, segmentVersionToLoad);
}
private static void preprocessSegment(File indexDir, String segmentName,
String segmentCrc,
- IndexLoadingConfig indexLoadingConfig, @Nullable
SegmentOperationsThrottler segmentOperationsThrottler)
+ IndexLoadingConfig indexLoadingConfig, @Nullable
SegmentOperationsThrottler segmentOperationsThrottler,
+ SegmentZKMetadata zkMetadata)
Review Comment:
```suggestion
@Nullable SegmentZKMetadata zkMetadata)
```
##########
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RawValueInvertedIndexFilterOperator.java:
##########
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import com.google.common.base.CaseFormat;
+import java.util.Collections;
+import java.util.List;
+import org.apache.pinot.common.request.context.predicate.EqPredicate;
+import org.apache.pinot.common.request.context.predicate.InPredicate;
+import org.apache.pinot.common.request.context.predicate.NotEqPredicate;
+import org.apache.pinot.common.request.context.predicate.NotInPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.core.common.BlockDocIdSet;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.ExplainAttributeBuilder;
+import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.core.operator.docidsets.EmptyDocIdSet;
+import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import
org.apache.pinot.segment.local.segment.index.readers.RawValueBitmapInvertedIndexReader;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.spi.data.FieldSpec.DataType;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+/**
+ * Filter operator that uses raw value bitmap inverted index to handle
predicates on raw encoded columns.
+ */
+public class RawValueInvertedIndexFilterOperator extends
BaseColumnFilterOperator {
+ private static final String EXPLAIN_NAME = "FILTER_RAW_INVERTED_INDEX";
+
+ private final PredicateEvaluator _predicateEvaluator;
+ private final RawValueBitmapInvertedIndexReader _invertedIndexReader;
+ private final DataType _dataType;
+ private final boolean _exclusive;
+
+ public RawValueInvertedIndexFilterOperator(QueryContext queryContext,
PredicateEvaluator predicateEvaluator,
+ DataSource dataSource, int numDocs) {
+ super(queryContext, dataSource, numDocs);
+ _predicateEvaluator = predicateEvaluator;
+ _invertedIndexReader = (RawValueBitmapInvertedIndexReader)
dataSource.getInvertedIndex();
+ _dataType = dataSource.getDataSourceMetadata().getDataType();
+ _exclusive = predicateEvaluator.isExclusive();
+ }
+
+ @Override
+ protected BlockDocIdSet getTrues() {
+ // Handle null predicate
+ if (_predicateEvaluator.isAlwaysFalse()) {
+ return EmptyDocIdSet.getInstance();
+ }
+ if (_predicateEvaluator.isAlwaysTrue()) {
+ return new BitmapDocIdSet(new MutableRoaringBitmap(), _numDocs);
+ }
+
+ // Get bitmap for each matching value and OR them together
+ MutableRoaringBitmap result = computeMatchingBitmap();
+
+
+ return new BitmapDocIdSet(result, _numDocs);
+ }
+
+ private MutableRoaringBitmap computeMatchingBitmap() {
+ MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+ Predicate predicate = _predicateEvaluator.getPredicate();
+ Predicate.Type predicateType = predicate.getType();
+ switch (predicateType) {
+ case EQ:
+ addMatchingValueBitmap(bitmap, ((EqPredicate) predicate).getValue());
+ break;
+ case NOT_EQ:
+ addMatchingValueBitmap(bitmap, ((NotEqPredicate)
predicate).getValue());
+ break;
+ case IN:
+ for (String value : ((InPredicate) predicate).getValues()) {
+ addMatchingValueBitmap(bitmap, value);
+ }
+ break;
+ case NOT_IN:
+ for (String value : ((NotInPredicate) predicate).getValues()) {
+ addMatchingValueBitmap(bitmap, value);
+ }
+ break;
+ case RANGE:
+ // For range queries, we need to scan through all values and apply the
predicate
+ // This is not efficient, but it's the only way to handle range
queries with raw encoding
+ // TODO: Add support for range index with raw encoding
+ throw new UnsupportedOperationException("Range predicates not
supported for raw encoded columns");
Review Comment:
Since dictionary is within the inverted index, I think we can add a method
in `RawValueBitmapInvertedIndexReader` directly to fetch the bitmap by passing
the range.
##########
pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java:
##########
@@ -1608,7 +1612,7 @@ StaleSegment isSegmentStale(IndexLoadingConfig
indexLoadingConfig, SegmentDataMa
}
private SegmentDirectory initSegmentDirectory(String segmentName, String
segmentCrc,
- IndexLoadingConfig indexLoadingConfig)
+ IndexLoadingConfig indexLoadingConfig, SegmentZKMetadata zkMetadata)
Review Comment:
```suggestion
IndexLoadingConfig indexLoadingConfig, @nullable SegmentZKMetadata
zkMetadata)
```
##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/memory/EmptyIndexBuffer.java:
##########
@@ -0,0 +1,304 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.memory;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Properties;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+
+
+/**
+ * A specialized PinotDataBuffer implementation for zero-size index entries
that contains S3 segment path information.
+ * This buffer is useful for debugging and tracking purposes when dealing with
empty index entries.
+ */
+public class EmptyIndexBuffer extends PinotDataBuffer {
+ private final Properties _properties;
+ private final String _segmentName;
+ private final String _tableNameWithType;
+ private final String _segmentPath;
+
+ /**
+ * Creates a new S3EmptyIndexBuffer for a zero-size index entry
+ *
+ * @param properties Properties containing S3 configuration (bucket, key,
etc.)
+ * @param segmentName The name of the segment
+ * @param tableNameWithType The table name with type
+ */
+ public EmptyIndexBuffer(Properties properties, String segmentName, String
tableNameWithType) {
+ super(false); // Not closeable since it's just metadata
+ _properties = properties;
+ _segmentName = segmentName;
+ _tableNameWithType = tableNameWithType;
+ _segmentPath = constructSegmentPath();
+ }
+
+ /**
+ * Constructs the S3 segment path from properties
+ * @return The constructed S3 URI as a string
+ */
+ private String constructSegmentPath() {
Review Comment:
make it abstract and have another `S3EmptyIndexBuffer` extends this
`EmptyIndexBuffer`?
##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/RawValueBasedInvertedIndexCreator.java:
##########
@@ -59,4 +59,13 @@ public interface RawValueBasedInvertedIndexCreator extends
InvertedIndexCreator
* For multi-value column, adds the double values for the next document.
*/
void add(double[] values, int length);
+
+ default void add(String value) {
+ }
+ default void add(String[] value, int length) {
Review Comment:
add comments
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java:
##########
@@ -89,7 +90,18 @@ public static ImmutableSegment load(File indexDir,
IndexLoadingConfig indexLoadi
public static ImmutableSegment load(File indexDir, IndexLoadingConfig
indexLoadingConfig,
@Nullable SegmentOperationsThrottler segmentOperationsThrottler)
throws Exception {
- return load(indexDir, indexLoadingConfig, true,
segmentOperationsThrottler);
+ return load(indexDir, indexLoadingConfig, true,
segmentOperationsThrottler, null);
+ }
+
+ /**
+ * Loads the segment with specified IndexLoadingConfig.
+ * This method modifies the segment like to convert segment format, add or
remove indices.
+ * Mostly used by UT cases to add some specific index for testing purpose.
+ */
+ public static ImmutableSegment load(File indexDir, IndexLoadingConfig
indexLoadingConfig,
+ @Nullable SegmentOperationsThrottler segmentOperationsThrottler,
SegmentZKMetadata zkMetadata)
Review Comment:
```suggestion
@Nullable SegmentOperationsThrottler segmentOperationsThrottler,
@Nullable SegmentZKMetadata zkMetadata)
```
##########
pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java:
##########
@@ -1293,9 +1297,9 @@ public boolean tryLoadExistingSegment(SegmentZKMetadata
zkMetadata, IndexLoading
@Nullable
private SegmentDirectory tryInitSegmentDirectory(String segmentName, String
segmentCrc,
- IndexLoadingConfig indexLoadingConfig) {
+ IndexLoadingConfig indexLoadingConfig, SegmentZKMetadata zkMetadata) {
Review Comment:
```suggestion
@nullable IndexLoadingConfig indexLoadingConfig, SegmentZKMetadata
zkMetadata) {
```
##########
pinot-tools/src/main/resources/examples/minions/batch/baseballStats/baseballStats_offline_table_config.json:
##########
@@ -39,5 +38,11 @@
"SegmentGenerationAndPushTask": {
}
}
- }
+ },
+ "fieldConfigList": [
Review Comment:
You can just amend existing table by adding a new column `playerIDRaw` using
ingestion transformer.
Then `playerId` will be using dictionary based inverted index and
`playerIDRaw` using raw inverted index
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java:
##########
@@ -98,15 +110,15 @@ public static ImmutableSegment load(File indexDir,
IndexLoadingConfig indexLoadi
*/
public static ImmutableSegment load(File indexDir, IndexLoadingConfig
indexLoadingConfig, boolean needPreprocess)
throws Exception {
- return load(indexDir, indexLoadingConfig, needPreprocess, null);
+ return load(indexDir, indexLoadingConfig, needPreprocess, null, null);
}
/**
* Loads the segment with specified schema and IndexLoadingConfig, and
allows to control whether to
* modify the segment like to convert segment format, add or remove indices.
*/
public static ImmutableSegment load(File indexDir, IndexLoadingConfig
indexLoadingConfig, boolean needPreprocess,
- @Nullable SegmentOperationsThrottler segmentOperationsThrottler)
+ @Nullable SegmentOperationsThrottler segmentOperationsThrottler,
SegmentZKMetadata zkMetadata)
Review Comment:
```suggestion
@Nullable SegmentOperationsThrottler segmentOperationsThrottler,
@Nullable SegmentZKMetadata zkMetadata)
```
##########
pinot-tools/src/main/resources/examples/minions/batch/baseballStats/baseballStats_offline_table_config_raw_inverted_index.json:
##########
@@ -0,0 +1,49 @@
+{
+ "tableName": "baseballStats",
Review Comment:
I think we don't need this
##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/immutable/ImmutableSegmentLoader.java:
##########
@@ -143,7 +157,7 @@ public static ImmutableSegment load(File indexDir,
IndexLoadingConfig indexLoadi
}
public static void preprocess(File indexDir, IndexLoadingConfig
indexLoadingConfig,
- @Nullable SegmentOperationsThrottler segmentOperationsThrottler)
+ @Nullable SegmentOperationsThrottler segmentOperationsThrottler,
SegmentZKMetadata zkMetadata)
Review Comment:
```suggestion
@Nullable SegmentOperationsThrottler segmentOperationsThrottler,
@Nullable SegmentZKMetadata zkMetadata)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]