Jackie-Jiang commented on code in PR #12532: URL: https://github.com/apache/pinot/pull/12532#discussion_r1533254346
########## pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java: ########## @@ -90,15 +90,12 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c } String resultsType = ((LiteralTransformFunction) thirdArgument).getStringLiteral().toUpperCase(); boolean isSingleValue = !resultsType.endsWith("_ARRAY"); - // TODO: will support array type; the underlying jsonIndexReader.getMatchingDocsMap supports the json path [*] - if (!isSingleValue) { - throw new IllegalArgumentException("jsonExtractIndex only supports single value type"); - } if (isSingleValue && inputJsonPath.contains("[*]")) { Review Comment: Should we consider returning the first matching value when a SV matches multiple values? ########## pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java: ########## @@ -90,15 +90,12 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c } String resultsType = ((LiteralTransformFunction) thirdArgument).getStringLiteral().toUpperCase(); boolean isSingleValue = !resultsType.endsWith("_ARRAY"); - // TODO: will support array type; the underlying jsonIndexReader.getMatchingDocsMap supports the json path [*] - if (!isSingleValue) { - throw new IllegalArgumentException("jsonExtractIndex only supports single value type"); - } if (isSingleValue && inputJsonPath.contains("[*]")) { - throw new IllegalArgumentException("[*] syntax in json path is unsupported as json_extract_index" - + "currently does not support returning array types"); + throw new IllegalArgumentException( + "[*] syntax in json path is unsupported for singleValue field json_extract_index"); } - DataType dataType = DataType.valueOf(resultsType); + DataType dataType = isSingleValue ? DataType.valueOf(resultsType) + : DataType.valueOf(resultsType.substring(0, resultsType.length() - 6)); if (arguments.size() == 4) { Review Comment: Should this be `>= 4`? Or we will miss the default value. Does default value apply to MV? ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/JsonIndexReader.java: ########## @@ -35,16 +35,28 @@ public interface JsonIndexReader extends IndexReader { MutableRoaringBitmap getMatchingDocIds(String filterString); /** - * For an array of docIds and context specific to a JSON key, returns the corresponding values for each docId. The - * context should be created from the getMatchingDocsMap method. - * - * @return String[] where String[i] is the value for docIds[i] + * For an array of docIds and context specific to a JSON key, returns the corresponding mv array for each docId. + * @param docIds array of docIds + * @param length length of the array + * @param matchingValueToFlattenedDocs Map from each unique value for the jsonPathKey value to the flattened docId + * posting list + * @return String[][] where String[i] is the mv array for docIds[i] */ - String[] getValuesForKeyAndDocs(int[] docIds, Map<String, RoaringBitmap> context); + String[][] getValuesForMv(int[] docIds, int length, Map<String, RoaringBitmap> matchingValueToFlattenedDocs); Review Comment: ```suggestion String[][] getValuesMV(int[] docIds, int length, Map<String, RoaringBitmap> matchingValueToFlattenedDocs); ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java: ########## @@ -62,6 +66,7 @@ public class ImmutableJsonIndexReader implements JsonIndexReader { private final int _version; private final StringDictionary _dictionary; private final BitmapInvertedIndexReader _invertedIndex; + private final long _numFlattenedDocs; Review Comment: (minor) Is this used? ########## pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java: ########## @@ -108,8 +105,9 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c _defaultValue = dataType.convert(((LiteralTransformFunction) fourthArgument).getStringLiteral()); } - _resultMetadata = new TransformResultMetadata(dataType, true, false); - _matchingDocsMap = _jsonIndexReader.getMatchingDocsMap(_jsonPathString); + _resultMetadata = new TransformResultMetadata(dataType, isSingleValue, false); + _valueToMatchingFlattenedDocIdsMap = + _jsonIndexReader.getMatchingFlattenedDocsMap(inputJsonPath.substring(1)); Review Comment: (minor) Suggest passing in the full path, and JSON index reader should validate the path ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/JsonIndexReader.java: ########## @@ -35,16 +35,28 @@ public interface JsonIndexReader extends IndexReader { MutableRoaringBitmap getMatchingDocIds(String filterString); /** - * For an array of docIds and context specific to a JSON key, returns the corresponding values for each docId. The - * context should be created from the getMatchingDocsMap method. - * - * @return String[] where String[i] is the value for docIds[i] + * For an array of docIds and context specific to a JSON key, returns the corresponding mv array for each docId. + * @param docIds array of docIds + * @param length length of the array + * @param matchingValueToFlattenedDocs Map from each unique value for the jsonPathKey value to the flattened docId + * posting list + * @return String[][] where String[i] is the mv array for docIds[i] */ - String[] getValuesForKeyAndDocs(int[] docIds, Map<String, RoaringBitmap> context); + String[][] getValuesForMv(int[] docIds, int length, Map<String, RoaringBitmap> matchingValueToFlattenedDocs); /** - * For a JSON key, returns a Map from each value to the docId posting list. This map should be used to avoid reading - * and converting the posting list of flattened docIds to real docIds + * For an array of docIds and context specific to a JSON key, returns the corresponding sv value for each docId. + * @param docIds array of docIds + * @param length length of the array + * @param matchingValueToFlattenedDocs Map from each unique value for the jsonPathKey value to the flattened docId + * posting list + * @return String[] where String[i] is the sv value for docIds[i] */ - Map<String, RoaringBitmap> getMatchingDocsMap(String key); + String[] getValuesForSv(int[] docIds, int length, Map<String, RoaringBitmap> matchingValueToFlattenedDocs); Review Comment: ```suggestion String[] getValuesSV(int[] docIds, int length, Map<String, RoaringBitmap> matchingValueToFlattenedDocs); ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java: ########## @@ -129,7 +135,8 @@ private MutableRoaringBitmap getMatchingFlattenedDocIds(FilterContext filter) { case AND: { List<FilterContext> children = filter.getChildren(); int numChildren = children.size(); - MutableRoaringBitmap matchingDocIds = getMatchingFlattenedDocIds(children.get(0)); + MutableRoaringBitmap matchingDocIds = + getMatchingFlattenedDocIds(children.get(0)); Review Comment: (format) Please reformat all the changes ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/JsonIndexReader.java: ########## @@ -35,16 +35,28 @@ public interface JsonIndexReader extends IndexReader { MutableRoaringBitmap getMatchingDocIds(String filterString); /** - * For an array of docIds and context specific to a JSON key, returns the corresponding values for each docId. The - * context should be created from the getMatchingDocsMap method. - * - * @return String[] where String[i] is the value for docIds[i] + * For an array of docIds and context specific to a JSON key, returns the corresponding mv array for each docId. + * @param docIds array of docIds + * @param length length of the array + * @param matchingValueToFlattenedDocs Map from each unique value for the jsonPathKey value to the flattened docId + * posting list + * @return String[][] where String[i] is the mv array for docIds[i] */ - String[] getValuesForKeyAndDocs(int[] docIds, Map<String, RoaringBitmap> context); + String[][] getValuesForMv(int[] docIds, int length, Map<String, RoaringBitmap> matchingValueToFlattenedDocs); Review Comment: (nit) We usually put API for SV in front of MV ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java: ########## @@ -350,58 +354,48 @@ private RoaringBitmap getMatchingFlattenedDocIds(Predicate predicate) { } } - @Override - public Map<String, RoaringBitmap> getMatchingDocsMap(String key) { - Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>(); + @VisibleForTesting + public Map<String, RoaringBitmap> convertFlattenedDocIdsToDocIds(Map<String, RoaringBitmap> valueToFlattenedDocIds) { _readLock.lock(); try { - Pair<String, RoaringBitmap> result = getKeyAndFlattenDocId(key); - key = result.getLeft(); - RoaringBitmap arrayIndexFlattenDocIds = result.getRight(); - if (arrayIndexFlattenDocIds != null && arrayIndexFlattenDocIds.isEmpty()) { - return matchingDocsMap; - } - Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key); - for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) { - RoaringBitmap kvPairFlattenedDocIds = entry.getValue(); - PeekableIntIterator it = arrayIndexFlattenDocIds == null ? kvPairFlattenedDocIds.getIntIterator() - : RoaringBitmap.and(arrayIndexFlattenDocIds, kvPairFlattenedDocIds).getIntIterator(); - if (!it.hasNext()) { - continue; - } - MutableRoaringBitmap postingList = new MutableRoaringBitmap(); - while (it.hasNext()) { - postingList.add(_docIdMapping.getInt(it.next())); - } - String val = entry.getKey().substring(key.length() + 1); - matchingDocsMap.put(val, postingList.toRoaringBitmap()); + Map<String, RoaringBitmap> valueToDocIds = new HashMap<>(); + for (Map.Entry<String, RoaringBitmap> entry : valueToFlattenedDocIds.entrySet()) { + RoaringBitmap docIds = new RoaringBitmap(); + entry.getValue().forEach((IntConsumer) flattenedDocId -> docIds.add(_docIdMapping.getInt(flattenedDocId))); + valueToDocIds.put(entry.getKey(), docIds); } + return valueToDocIds; } finally { _readLock.unlock(); } - return matchingDocsMap; } @Override - public String[] getValuesForKeyAndDocs(int[] docIds, Map<String, RoaringBitmap> matchingDocsMap) { - Int2ObjectOpenHashMap<String> docIdToValues = new Int2ObjectOpenHashMap<>(docIds.length); - RoaringBitmap docIdMask = RoaringBitmap.bitmapOf(docIds); - - for (Map.Entry<String, RoaringBitmap> entry : matchingDocsMap.entrySet()) { - RoaringBitmap intersection = RoaringBitmap.and(entry.getValue(), docIdMask); - if (intersection.isEmpty()) { - continue; + public Map<String, RoaringBitmap> getMatchingFlattenedDocsMap(String jsonPathKey) { + Map<String, RoaringBitmap> valueToMatchingFlattenedDocIdsMap = new HashMap<>(); + _readLock.lock(); + try { + Pair<String, RoaringBitmap> result = getKeyAndFlattenDocId(jsonPathKey); Review Comment: (minor, not introduced here) Rename the method to `getKeyAndFlattenedDocId` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org