This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 736f70f059 Add CaseSensitiveAnalyzer and support for case-sensitive text indexing (#15803) 736f70f059 is described below commit 736f70f059b53eb5be5a8aedf61c4974b711aa3e Author: Xiang Fu <xiangfu.1...@gmail.com> AuthorDate: Sat May 17 15:12:48 2025 +0800 Add CaseSensitiveAnalyzer and support for case-sensitive text indexing (#15803) --- .../integration/tests/custom/TextIndicesTest.java | 88 +++++++++++++++++++--- .../segment/index/text/CaseSensitiveAnalyzer.java | 85 +++++++++++++++++++++ .../local/segment/store/TextIndexUtils.java | 10 ++- .../pinot/segment/spi/index/TextIndexConfig.java | 46 +++++++++-- .../fineFoodReviews_offline_table_config.json | 15 ++-- .../fineFoodReviews_realtime_table_config.json | 12 +-- 6 files changed, 223 insertions(+), 33 deletions(-) diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/TextIndicesTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/TextIndicesTest.java index 353cd00396..bf520e28cf 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/TextIndicesTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/TextIndicesTest.java @@ -18,6 +18,9 @@ */ package org.apache.pinot.integration.tests.custom; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.BufferedReader; import java.io.File; import java.io.InputStream; @@ -25,9 +28,7 @@ import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import javax.annotation.Nullable; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData; @@ -49,10 +50,11 @@ import static org.testng.AssertJUnit.fail; @Test(suiteName = "CustomClusterIntegrationTest") public class TextIndicesTest extends CustomDataQueryClusterIntegrationTest { - + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final String DEFAULT_TABLE_NAME = "TextIndicesTest"; private static final String TEXT_COLUMN_NAME = "skills"; + private static final String TEXT_COLUMN_NAME_CASE_SENSITIVE = "skills_case_sensitive"; private static final String TEXT_COLUMN_NAME_NATIVE = "skills_native"; private static final String TIME_COLUMN_NAME = "millisSinceEpoch"; private static final int NUM_SKILLS = 28; @@ -87,7 +89,7 @@ public class TextIndicesTest extends CustomDataQueryClusterIntegrationTest { @Override protected List<String> getNoDictionaryColumns() { - return Collections.singletonList(TEXT_COLUMN_NAME); + return List.of(TEXT_COLUMN_NAME, TEXT_COLUMN_NAME_CASE_SENSITIVE); } @Nullable @@ -104,13 +106,50 @@ public class TextIndicesTest extends CustomDataQueryClusterIntegrationTest { @Override protected List<FieldConfig> getFieldConfigs() { - Map<String, String> propertiesMap = new HashMap<>(); - propertiesMap.put(FieldConfig.TEXT_FST_TYPE, FieldConfig.TEXT_NATIVE_FST_LITERAL); + ObjectNode textColumnIndexes; + try { + textColumnIndexes = (ObjectNode) OBJECT_MAPPER.readTree("{\"text\": {}}"); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + FieldConfig textColumnFieldConfig = + new FieldConfig(TEXT_COLUMN_NAME, FieldConfig.EncodingType.RAW, null, null, null, null, textColumnIndexes, null, + null); - return Arrays.asList( - new FieldConfig(TEXT_COLUMN_NAME, FieldConfig.EncodingType.RAW, FieldConfig.IndexType.TEXT, null, null), - new FieldConfig(TEXT_COLUMN_NAME_NATIVE, FieldConfig.EncodingType.RAW, FieldConfig.IndexType.TEXT, null, - propertiesMap)); + ObjectNode textColumnCaseSensitiveIndexes; + try { + textColumnCaseSensitiveIndexes = (ObjectNode) OBJECT_MAPPER.readTree( + "{" + + " \"text\": " + + " {" + + " \"caseSensitive\": \"true\"" + + " }" + + "}" + ); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + FieldConfig textColumnCaseSensitiveFieldConfig = + new FieldConfig(TEXT_COLUMN_NAME_CASE_SENSITIVE, FieldConfig.EncodingType.RAW, null, null, null, null, + textColumnCaseSensitiveIndexes, null, null); + + ObjectNode textColumnNativeIndexes; + try { + textColumnNativeIndexes = (ObjectNode) OBJECT_MAPPER.readTree( + "{" + + " \"text\": " + + " {" + + " \"fst\": \"NATIVE\"" + + " }" + + "}" + ); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + FieldConfig textColumnNativeFieldConfig = + new FieldConfig(TEXT_COLUMN_NAME_NATIVE, FieldConfig.EncodingType.RAW, null, null, null, null, + textColumnNativeIndexes, null, null); + return Arrays.asList(textColumnFieldConfig, textColumnCaseSensitiveFieldConfig, textColumnNativeFieldConfig); } @Override @@ -122,6 +161,7 @@ public class TextIndicesTest extends CustomDataQueryClusterIntegrationTest { public Schema createSchema() { return new Schema.SchemaBuilder().setSchemaName(getTableName()) .addSingleValueDimension(TEXT_COLUMN_NAME, FieldSpec.DataType.STRING) + .addSingleValueDimension(TEXT_COLUMN_NAME_CASE_SENSITIVE, FieldSpec.DataType.STRING) .addSingleValueDimension(TEXT_COLUMN_NAME_NATIVE, FieldSpec.DataType.STRING) .addDateTime(TIME_COLUMN_NAME, FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build(); } @@ -150,6 +190,8 @@ public class TextIndicesTest extends CustomDataQueryClusterIntegrationTest { org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord("myRecord", null, null, false); avroSchema.setFields(Arrays.asList(new org.apache.avro.Schema.Field(TEXT_COLUMN_NAME, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING), null, null), + new org.apache.avro.Schema.Field(TEXT_COLUMN_NAME_CASE_SENSITIVE, + org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING), null, null), new org.apache.avro.Schema.Field(TEXT_COLUMN_NAME_NATIVE, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING), null, null), new org.apache.avro.Schema.Field(TIME_COLUMN_NAME, @@ -159,6 +201,7 @@ public class TextIndicesTest extends CustomDataQueryClusterIntegrationTest { for (int i = 0; i < NUM_RECORDS; i++) { GenericData.Record record = new GenericData.Record(avroSchema); record.put(TEXT_COLUMN_NAME, skills.get(i % NUM_SKILLS)); + record.put(TEXT_COLUMN_NAME_CASE_SENSITIVE, skills.get(i % NUM_SKILLS)); record.put(TEXT_COLUMN_NAME_NATIVE, skills.get(i % NUM_SKILLS)); record.put(TIME_COLUMN_NAME, System.currentTimeMillis()); fileWriter.append(record); @@ -215,8 +258,29 @@ public class TextIndicesTest extends CustomDataQueryClusterIntegrationTest { Thread.sleep(100); } - assertTrue(getTextColumnQueryResult(String.format(TEST_TEXT_COLUMN_QUERY_NATIVE, getTableName())) - == NUM_MATCHING_RECORDS_NATIVE); + assertEquals(getTextColumnQueryResult(String.format(TEST_TEXT_COLUMN_QUERY_NATIVE, getTableName())), + NUM_MATCHING_RECORDS_NATIVE); + } + + @Test(dataProvider = "useBothQueryEngines") + public void testTextSearchCountQueryCaseSensitive(boolean useMultiStageQueryEngine) + throws Exception { + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + // Keep posting queries until all records are consumed + long previousResult = 0; + + String queryWithMatch = "SELECT COUNT(*) FROM %s WHERE TEXT_MATCH(skills_case_sensitive, 'Java')"; + String queryWithoutMatch = "SELECT COUNT(*) FROM %s WHERE TEXT_MATCH(skills_case_sensitive, 'java')"; + while (getCurrentCountStarResult() < NUM_RECORDS) { + long result = getTextColumnQueryResult(String.format(queryWithMatch, getTableName())); + assertTrue(result >= previousResult); + previousResult = result; + Thread.sleep(100); + } + + assertEquals(getTextColumnQueryResult(String.format(queryWithMatch, getTableName())), 12000); + // Test case sensitive match, all skills are 'Java' not 'java' + assertEquals(getTextColumnQueryResult(String.format(queryWithoutMatch, getTableName())), 0); } private long getTextColumnQueryResult(String query) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/CaseSensitiveAnalyzer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/CaseSensitiveAnalyzer.java new file mode 100644 index 0000000000..d8b003f7ae --- /dev/null +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/CaseSensitiveAnalyzer.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.index.text; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.StopFilter; +import org.apache.lucene.analysis.StopwordAnalyzerBase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardTokenizer; + + +/** + * A {@link org.apache.lucene.analysis.Analyzer} for case-sensitive text. + * It's directly copied from {@link org.apache.lucene.analysis.standard.StandardAnalyzer} but + * removes the lowercase filter. + */ +public class CaseSensitiveAnalyzer extends StopwordAnalyzerBase { + + /** Default maximum allowed token length */ + public static final int DEFAULT_MAX_TOKEN_LENGTH = 255; + + private int _maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; + + /** + * Builds an analyzer with the given stop words. + * + * @param stopWords stop words + */ + public CaseSensitiveAnalyzer(CharArraySet stopWords) { + super(stopWords); + } + + /** Builds an analyzer with no stop words. */ + public CaseSensitiveAnalyzer() { + this(CharArraySet.EMPTY_SET); + } + + /** + * Set the max allowed token length. Tokens larger than this will be chopped up at this token + * length and emitted as multiple tokens. If you need to skip such large tokens, you could + * increase this max length, and then use {@code LengthFilter} to remove long tokens. The default + * is {@link org.apache.pinot.segment.local.segment.index.text.CaseSensitiveAnalyzer#DEFAULT_MAX_TOKEN_LENGTH}. + */ + public void setMaxTokenLength(int length) { + _maxTokenLength = length; + } + + /** + * Returns the current maximum token length + * + * @see #setMaxTokenLength + */ + public int getMaxTokenLength() { + return _maxTokenLength; + } + + @Override + protected TokenStreamComponents createComponents(final String fieldName) { + final StandardTokenizer tokenizer = new StandardTokenizer(); + tokenizer.setMaxTokenLength(_maxTokenLength); + TokenStream tok = new StopFilter(tokenizer, stopwords); + return new TokenStreamComponents( + r -> { + tokenizer.setMaxTokenLength(_maxTokenLength); + tokenizer.setReader(r); + }, + tok); + } +} diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java index 63383aebbb..8d982af4ee 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java @@ -36,6 +36,7 @@ import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.queryparser.classic.QueryParserBase; import org.apache.pinot.segment.local.segment.creator.impl.text.LuceneTextIndexCreator; +import org.apache.pinot.segment.local.segment.index.text.CaseSensitiveAnalyzer; import org.apache.pinot.segment.local.segment.index.text.TextIndexConfigBuilder; import org.apache.pinot.segment.spi.V1Constants; import org.apache.pinot.segment.spi.V1Constants.Indexes; @@ -145,7 +146,7 @@ public class TextIndexUtils { // When there is no analyzer defined, or when StandardAnalyzer (default) is used without arguments, // use existing logic to obtain an instance of StandardAnalyzer with customized stop words return TextIndexUtils.getStandardAnalyzerWithCustomizedStopWords( - config.getStopWordsInclude(), config.getStopWordsExclude()); + config.getStopWordsInclude(), config.getStopWordsExclude(), config.isCaseSensitive()); } // Custom analyzer + custom configs via reflection @@ -270,8 +271,8 @@ public class TextIndexUtils { } } - public static StandardAnalyzer getStandardAnalyzerWithCustomizedStopWords(@Nullable List<String> stopWordsInclude, - @Nullable List<String> stopWordsExclude) { + public static Analyzer getStandardAnalyzerWithCustomizedStopWords(@Nullable List<String> stopWordsInclude, + @Nullable List<String> stopWordsExclude, boolean isCaseSensitive) { HashSet<String> stopWordSet = LuceneTextIndexCreator.getDefaultEnglishStopWordsSet(); if (stopWordsInclude != null) { stopWordSet.addAll(stopWordsInclude); @@ -279,6 +280,9 @@ public class TextIndexUtils { if (stopWordsExclude != null) { stopWordsExclude.forEach(stopWordSet::remove); } + if (isCaseSensitive) { + return new CaseSensitiveAnalyzer(new CharArraySet(stopWordSet, false)); + } return new StandardAnalyzer(new CharArraySet(stopWordSet, true)); } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java index b32704c179..fc1de28337 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java @@ -42,6 +42,7 @@ public class TextIndexConfig extends IndexConfig { private static final boolean LUCENE_INDEX_DEFAULT_USE_AND_FOR_MULTI_TERM_QUERIES = false; private static final boolean LUCENE_USE_LOG_BYTE_SIZE_MERGE_POLICY = false; private static final DocIdTranslatorMode LUCENE_TRANSLATOR_MODE = null; + private static final boolean LUCENE_INDEX_DEFAULT_CASE_SENSITIVE_INDEX = false; // keep in sync with constructor! private static final List<String> PROPERTY_NAMES = List.of( @@ -49,13 +50,13 @@ public class TextIndexConfig extends IndexConfig { "luceneUseCompoundFile", "luceneMaxBufferSizeMB", "luceneAnalyzerClass", "luceneAnalyzerClassArgs", "luceneAnalyzerClassArgTypes", "luceneQueryParserClass", "enablePrefixSuffixMatchingInPhraseQueries", "reuseMutableIndex", "luceneNRTCachingDirectoryMaxBufferSizeMB", "useLogByteSizeMergePolicy", - "docIdTranslatorMode" + "docIdTranslatorMode", "caseSensitive" ); public static final TextIndexConfig DISABLED = new TextIndexConfig(true, null, null, false, false, Collections.emptyList(), Collections.emptyList(), false, LUCENE_INDEX_DEFAULT_MAX_BUFFER_SIZE_MB, null, null, null, null, false, false, 0, false, - null); + null, LUCENE_INDEX_DEFAULT_CASE_SENSITIVE_INDEX); private final FSTType _fstType; @Nullable @@ -75,6 +76,7 @@ public class TextIndexConfig extends IndexConfig { private final int _luceneNRTCachingDirectoryMaxBufferSizeMB; private final boolean _useLogByteSizeMergePolicy; private final DocIdTranslatorMode _docIdTranslatorMode; + private final boolean _caseSensitive; public enum DocIdTranslatorMode { // build and keep mapping @@ -98,6 +100,21 @@ public class TextIndexConfig extends IndexConfig { } } + public TextIndexConfig(Boolean disabled, FSTType fstType, Object rawValueForTextIndex, boolean enableQueryCache, + boolean useANDForMultiTermQueries, List<String> stopWordsInclude, List<String> stopWordsExclude, + Boolean luceneUseCompoundFile, Integer luceneMaxBufferSizeMB, String luceneAnalyzerClass, + String luceneAnalyzerClassArgs, String luceneAnalyzerClassArgTypes, String luceneQueryParserClass, + Boolean enablePrefixSuffixMatchingInPhraseQueries, Boolean reuseMutableIndex, + Integer luceneNRTCachingDirectoryMaxBufferSizeMB, Boolean useLogByteSizeMergePolicy, + DocIdTranslatorMode docIdTranslatorMode) { + this(disabled, fstType, rawValueForTextIndex, enableQueryCache, useANDForMultiTermQueries, + stopWordsInclude, stopWordsExclude, luceneUseCompoundFile, luceneMaxBufferSizeMB, luceneAnalyzerClass, + luceneAnalyzerClassArgs, luceneAnalyzerClassArgTypes, luceneQueryParserClass, + enablePrefixSuffixMatchingInPhraseQueries, reuseMutableIndex, + luceneNRTCachingDirectoryMaxBufferSizeMB, useLogByteSizeMergePolicy, docIdTranslatorMode, + LUCENE_INDEX_DEFAULT_CASE_SENSITIVE_INDEX); + } + @JsonCreator public TextIndexConfig(@JsonProperty("disabled") Boolean disabled, @JsonProperty("fst") FSTType fstType, @@ -116,7 +133,8 @@ public class TextIndexConfig extends IndexConfig { @JsonProperty("reuseMutableIndex") Boolean reuseMutableIndex, @JsonProperty("luceneNRTCachingDirectoryMaxBufferSizeMB") Integer luceneNRTCachingDirectoryMaxBufferSizeMB, @JsonProperty("useLogByteSizeMergePolicy") Boolean useLogByteSizeMergePolicy, - @JsonProperty("docIdTranslatorMode") DocIdTranslatorMode docIdTranslatorMode) { + @JsonProperty("docIdTranslatorMode") DocIdTranslatorMode docIdTranslatorMode, + @JsonProperty("caseSensitive") Boolean caseSensitive) { super(disabled); _fstType = fstType; _rawValueForTextIndex = rawValueForTextIndex; @@ -137,7 +155,7 @@ public class TextIndexConfig extends IndexConfig { _luceneAnalyzerClassArgs = CsvParser.parse(luceneAnalyzerClassArgs, true, false); _luceneAnalyzerClassArgTypes = CsvParser.parse(luceneAnalyzerClassArgTypes, false, true); _luceneQueryParserClass = luceneQueryParserClass == null - ? FieldConfig.TEXT_INDEX_DEFAULT_LUCENE_QUERY_PARSER_CLASS : luceneQueryParserClass; + ? FieldConfig.TEXT_INDEX_DEFAULT_LUCENE_QUERY_PARSER_CLASS : luceneQueryParserClass; _enablePrefixSuffixMatchingInPhraseQueries = enablePrefixSuffixMatchingInPhraseQueries == null ? LUCENE_INDEX_ENABLE_PREFIX_SUFFIX_MATCH_IN_PHRASE_SEARCH : enablePrefixSuffixMatchingInPhraseQueries; @@ -148,6 +166,7 @@ public class TextIndexConfig extends IndexConfig { _useLogByteSizeMergePolicy = useLogByteSizeMergePolicy == null ? LUCENE_USE_LOG_BYTE_SIZE_MERGE_POLICY : useLogByteSizeMergePolicy; _docIdTranslatorMode = docIdTranslatorMode == null ? LUCENE_TRANSLATOR_MODE : docIdTranslatorMode; + _caseSensitive = caseSensitive == null ? LUCENE_INDEX_DEFAULT_CASE_SENSITIVE_INDEX : caseSensitive; } public FSTType getFstType() { @@ -250,6 +269,10 @@ public class TextIndexConfig extends IndexConfig { return _luceneNRTCachingDirectoryMaxBufferSizeMB; } + public boolean isCaseSensitive() { + return _caseSensitive; + } + public static abstract class AbstractBuilder { @Nullable protected FSTType _fstType; @@ -272,6 +295,7 @@ public class TextIndexConfig extends IndexConfig { protected boolean _useLogByteSizeMergePolicy = LUCENE_USE_LOG_BYTE_SIZE_MERGE_POLICY; @Nullable protected DocIdTranslatorMode _docIdTranslatorMode = LUCENE_TRANSLATOR_MODE; + protected boolean _caseSensitive = LUCENE_INDEX_DEFAULT_CASE_SENSITIVE_INDEX; public AbstractBuilder(@Nullable FSTType fstType) { _fstType = fstType; @@ -296,6 +320,7 @@ public class TextIndexConfig extends IndexConfig { _luceneNRTCachingDirectoryMaxBufferSizeMB = other._luceneNRTCachingDirectoryMaxBufferSizeMB; _useLogByteSizeMergePolicy = other._useLogByteSizeMergePolicy; _docIdTranslatorMode = other._docIdTranslatorMode; + _caseSensitive = other._caseSensitive; } public TextIndexConfig build() { @@ -305,7 +330,7 @@ public class TextIndexConfig extends IndexConfig { CsvParser.serialize(_luceneAnalyzerClassArgTypes, true, false), _luceneQueryParserClass, _enablePrefixSuffixMatchingInPhraseQueries, _reuseMutableIndex, _luceneNRTCachingDirectoryMaxBufferSizeMB, _useLogByteSizeMergePolicy, - _docIdTranslatorMode); + _docIdTranslatorMode, _caseSensitive); } public abstract AbstractBuilder withProperties(@Nullable Map<String, String> textIndexProperties); @@ -395,6 +420,11 @@ public class TextIndexConfig extends IndexConfig { _docIdTranslatorMode = DocIdTranslatorMode.of(mode); return this; } + + public AbstractBuilder withCaseSensitive(boolean caseSensitive) { + _caseSensitive = caseSensitive; + return this; + } } @Override @@ -425,7 +455,8 @@ public class TextIndexConfig extends IndexConfig { && Objects.equals(_luceneAnalyzerClass, that._luceneAnalyzerClass) && Objects.equals(_luceneAnalyzerClassArgs, that._luceneAnalyzerClassArgs) && Objects.equals(_luceneAnalyzerClassArgTypes, that._luceneAnalyzerClassArgTypes) - && Objects.equals(_luceneQueryParserClass, that._luceneQueryParserClass); + && Objects.equals(_luceneQueryParserClass, that._luceneQueryParserClass) + && _caseSensitive == that._caseSensitive; } @Override @@ -434,7 +465,8 @@ public class TextIndexConfig extends IndexConfig { _useANDForMultiTermQueries, _stopWordsInclude, _stopWordsExclude, _luceneUseCompoundFile, _luceneMaxBufferSizeMB, _luceneAnalyzerClass, _luceneAnalyzerClassArgs, _luceneAnalyzerClassArgTypes, _luceneQueryParserClass, _enablePrefixSuffixMatchingInPhraseQueries, _reuseMutableIndex, - _luceneNRTCachingDirectoryMaxBufferSizeMB, _useLogByteSizeMergePolicy, _docIdTranslatorMode); + _luceneNRTCachingDirectoryMaxBufferSizeMB, _useLogByteSizeMergePolicy, _docIdTranslatorMode, + _caseSensitive); } public static boolean isProperty(String prop) { diff --git a/pinot-tools/src/main/resources/examples/batch/fineFoodReviews/fineFoodReviews_offline_table_config.json b/pinot-tools/src/main/resources/examples/batch/fineFoodReviews/fineFoodReviews_offline_table_config.json index ad082e3612..750d18a3d3 100644 --- a/pinot-tools/src/main/resources/examples/batch/fineFoodReviews/fineFoodReviews_offline_table_config.json +++ b/pinot-tools/src/main/resources/examples/batch/fineFoodReviews/fineFoodReviews_offline_table_config.json @@ -28,16 +28,19 @@ "vectorIndexType": "HNSW", "vectorDimension": 1536, "vectorDistanceFunction": "COSINE", - "version": 1 + "version": 1, + "commitDocs": "1" } }, { - "encodingType": "RAW", - "indexType": "TEXT", "name": "Text", - "properties": { - "deriveNumDocsPerChunkForRawIndex": "true", - "rawIndexWriterVersion": "3" + "encodingType": "RAW", + "indexes": { + "text": { + "deriveNumDocsPerChunkForRawIndex": "true", + "rawIndexWriterVersion": "3", + "caseSensitive": "true" + } } } ] diff --git a/pinot-tools/src/main/resources/examples/stream/fineFoodReviews/fineFoodReviews_realtime_table_config.json b/pinot-tools/src/main/resources/examples/stream/fineFoodReviews/fineFoodReviews_realtime_table_config.json index dd9d551e19..665f762569 100644 --- a/pinot-tools/src/main/resources/examples/stream/fineFoodReviews/fineFoodReviews_realtime_table_config.json +++ b/pinot-tools/src/main/resources/examples/stream/fineFoodReviews/fineFoodReviews_realtime_table_config.json @@ -61,12 +61,14 @@ } }, { - "encodingType": "RAW", - "indexType": "TEXT", "name": "Text", - "properties": { - "deriveNumDocsPerChunkForRawIndex": "true", - "rawIndexWriterVersion": "3" + "encodingType": "RAW", + "indexes": { + "text": { + "deriveNumDocsPerChunkForRawIndex": "true", + "rawIndexWriterVersion": "3", + "caseSensitive": "false" + } } } ] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org