Jackie-Jiang commented on a change in pull request #5667: URL: https://github.com/apache/incubator-pinot/pull/5667#discussion_r456002900
########## File path: pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/TextIndexHandler.java ########## @@ -151,30 +143,76 @@ private void createTextIndexForColumn(ColumnMetadata columnMetadata) return; } int numDocs = columnMetadata.getTotalDocs(); - LOGGER.info("Creating new text index for column: {} in segment: {}", column, _segmentName); + boolean hasDictionary = columnMetadata.hasDictionary(); + LOGGER.info("Creating new text index for column: {} in segment: {}, hasDictionary: {}", column, _segmentName, hasDictionary); File segmentDirectory = SegmentDirectoryPaths.segmentDirectoryFor(_indexDir, _segmentVersion); // The handlers are always invoked by the preprocessor. Before this ImmutableSegmentLoader would have already // up-converted the segment from v1/v2 -> v3 (if needed). So based on the segmentVersion, whatever segment // segmentDirectory is indicated to us by SegmentDirectoryPaths, we create lucene index there. There is no // further need to move around the lucene index directory since it is created with correct directory structure // based on segmentVersion. - try (LuceneTextIndexCreator textIndexCreator = new LuceneTextIndexCreator(column, segmentDirectory, true); - VarByteChunkSVForwardIndexReader forwardIndexReader = getForwardIndexReader(columnMetadata); - ChunkReaderContext readerContext = forwardIndexReader.createContext()) { - for (int docId = 0; docId < numDocs; docId++) { - textIndexCreator.addDoc(forwardIndexReader.getString(docId, readerContext), docId); + try (ForwardIndexReader forwardIndexReader = getForwardIndexReader(columnMetadata); + ForwardIndexReaderContext readerContext = forwardIndexReader.createContext(); + LuceneTextIndexCreator textIndexCreator = new LuceneTextIndexCreator(column, segmentDirectory, true)) { + if (!hasDictionary) { + // text index on raw column, just read the raw forward index + VarByteChunkSVForwardIndexReader rawIndexReader = (VarByteChunkSVForwardIndexReader)forwardIndexReader; + ChunkReaderContext chunkReaderContext = (ChunkReaderContext)readerContext; + for (int docId = 0; docId < numDocs; docId++) { + textIndexCreator.addDoc(rawIndexReader.getString(docId, chunkReaderContext), docId); + } + } else { + // text index on dictionary encoded SV column + // read forward index to get dictId + // read the raw value from dictionary using dictId + try (BaseImmutableDictionary dictionary = getDictionaryReader(columnMetadata)) { + if (columnMetadata.isSingleValue()) { Review comment: Remove this check (or replace with checkState) ########## File path: pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/defaultcolumn/BaseDefaultColumnHandler.java ########## @@ -321,88 +321,33 @@ protected void removeColumnV1Indices(String column) } /** - * Right now the text index is supported on RAW (non-dictionary encoded) - * single-value STRING columns. Eventually we will relax the constraints - * step by step. - * For example, later on user should be able to create text index on - * a dictionary encoded STRING column that also has native Pinot's inverted - * index. We can also support it on BYTE columns later. + * Right now the text index is supported on RAW and dictionary encoded + * single-value STRING columns. Later we can add support for text index + * on multi-value columns and BYTE type columns * @param column column name - * @param indexLoadingConfig index loading config * @param fieldSpec field spec */ - private void checkUnsupportedOperationsForTextIndex(String column, IndexLoadingConfig indexLoadingConfig, - FieldSpec fieldSpec) { - if (!indexLoadingConfig.getNoDictionaryColumns().contains(column)) { - throw new UnsupportedOperationException( - "Text index is currently not supported on dictionary encoded column: " + column); - } - - Set<String> sortedColumns = new HashSet<>(indexLoadingConfig.getSortedColumns()); - if (sortedColumns.contains(column)) { - // since Pinot's current implementation doesn't support raw sorted columns, - // we need to check for this too - throw new UnsupportedOperationException("Text index is currently not supported on sorted column: " + column); - } - + private void checkUnsupportedOperationsForTextIndex(String column, FieldSpec fieldSpec) { Review comment: Let's remove this check as it will be checked in `TextIndexHandler`. Try not to couple different modules together. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org