siddharthteotia commented on code in PR #8961: URL: https://github.com/apache/pinot/pull/8961#discussion_r919464331
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/bloomfilter/BloomFilterHandler.java: ########## @@ -136,17 +138,132 @@ private void createBloomFilterForColumn(SegmentDirectory.Writer segmentWriter, C BloomFilterConfig bloomFilterConfig = _bloomFilterConfigs.get(columnName); LOGGER.info("Creating new bloom filter for segment: {}, column: {} with config: {}", segmentName, columnName, bloomFilterConfig); - try (BloomFilterCreator bloomFilterCreator = indexCreatorProvider.newBloomFilterCreator( - IndexCreationContext.builder().withIndexDir(indexDir).withColumnMetadata(columnMetadata) - .build().forBloomFilter(bloomFilterConfig)); - Dictionary dictionary = getDictionaryReader(columnMetadata, segmentWriter)) { - int length = dictionary.length(); - for (int i = 0; i < length; i++) { - bloomFilterCreator.add(dictionary.getStringValue(i)); + + if (columnMetadata.hasDictionary()) { + try (BloomFilterCreator bloomFilterCreator = indexCreatorProvider.newBloomFilterCreator( + IndexCreationContext.builder().withIndexDir(indexDir).withColumnMetadata(columnMetadata) + .build().forBloomFilter(bloomFilterConfig)); + Dictionary dictionary = getDictionaryReader(columnMetadata, segmentWriter)) { + int length = dictionary.length(); + for (int i = 0; i < length; i++) { + bloomFilterCreator.add(dictionary.getStringValue(i)); + } + bloomFilterCreator.seal(); + } + } else { + int numDocs = columnMetadata.getTotalDocs(); + try (BloomFilterCreator bloomFilterCreator = indexCreatorProvider.newBloomFilterCreator( + IndexCreationContext.builder().withIndexDir(indexDir).withColumnMetadata(columnMetadata) + .build().forBloomFilter(bloomFilterConfig)); + ForwardIndexReader forwardIndexReader = LoaderUtils.getForwardIndexReader(segmentWriter, columnMetadata); + ForwardIndexReaderContext readerContext = forwardIndexReader.createContext()) { + if (columnMetadata.isSingleValue()) { + // SV + switch (columnMetadata.getDataType()) { + case INT: + for (int i = 0; i < numDocs; i++) { + bloomFilterCreator.add(Integer.toString(forwardIndexReader.getInt(i, readerContext))); + } + break; + case LONG: + for (int i = 0; i < numDocs; i++) { + bloomFilterCreator.add(Long.toString(forwardIndexReader.getLong(i, readerContext))); + } + break; + case FLOAT: + for (int i = 0; i < numDocs; i++) { + bloomFilterCreator.add(Float.toString(forwardIndexReader.getFloat(i, readerContext))); + } + break; + case DOUBLE: + for (int i = 0; i < numDocs; i++) { + bloomFilterCreator.add(Double.toString(forwardIndexReader.getDouble(i, readerContext))); + } + break; + case STRING: + for (int i = 0; i < numDocs; i++) { + bloomFilterCreator.add(forwardIndexReader.getString(i, readerContext)); + } + break; + case BYTES: + for (int i = 0; i < numDocs; i++) { + bloomFilterCreator.add( + BytesUtils.toHexString(forwardIndexReader.getBytes(i, readerContext)) + ); + } + break; + default: + throw new IllegalStateException("Unsupported data type: " + columnMetadata.getDataType() + " for column: " + + columnMetadata.getColumnName()); + } + bloomFilterCreator.seal(); Review Comment: can be called exactly once after the if branch for dict or no-dict -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org