itschrispeck commented on code in PR #12466: URL: https://github.com/apache/pinot/pull/12466#discussion_r1508495654
########## pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java: ########## @@ -90,9 +90,13 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c } String resultsType = ((LiteralTransformFunction) thirdArgument).getStringLiteral().toUpperCase(); boolean isSingleValue = !resultsType.endsWith("_ARRAY"); + // TODO: will support mv; the underlying jsonIndexReader.getMatchingDocsMap already works for the json path [*] if (!isSingleValue) { throw new IllegalArgumentException("jsonExtractIndex only supports single value type"); } + if (isSingleValue && inputJsonPath.contains("[*]")) { + throw new IllegalArgumentException("json path should not use [*] if the return type is single value"); Review Comment: nit: something like `[*] syntax is unsupported as json_extract_index does not support returning array types` is clearer to a user ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java: ########## @@ -370,6 +312,90 @@ private int[] getDictIdRangeForKey(String key) { return new int[]{minDictId, maxDictId}; } + // If key doesn't contain the array index, return <original key, null bitmap> + // Elif the key, i.e. the json path provided by user doesn't match any data, return <null, empty bitmap> + // Else, return the json path that is generated by replacing array index with . on the original key + // and the associated flattenDocId bitmap Review Comment: nit: javadoc formatting ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java: ########## @@ -301,12 +272,22 @@ public Map<String, RoaringBitmap> getMatchingDocsMap(String key) { Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>(); _readLock.lock(); try { + Pair<String, RoaringBitmap> result = processArrayIndex(key); + key = result.getLeft(); + RoaringBitmap arrayIndexFlattenDocIds = result.getRight(); + if (arrayIndexFlattenDocIds != null && arrayIndexFlattenDocIds.isEmpty()) { + return matchingDocsMap; + } for (Map.Entry<String, RoaringBitmap> entry : _postingListMap.entrySet()) { if (!entry.getKey().startsWith(key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR)) { continue; } - MutableRoaringBitmap flattenedDocIds = entry.getValue().toMutableRoaringBitmap(); - PeekableIntIterator it = flattenedDocIds.getIntIterator(); + RoaringBitmap kvPairFlattenedDocIds = entry.getValue(); + PeekableIntIterator it = arrayIndexFlattenDocIds == null ? kvPairFlattenedDocIds.getIntIterator() + : intersect(arrayIndexFlattenDocIds.clone(), kvPairFlattenedDocIds).getIntIterator(); Review Comment: `RoaringBitmap.and(arrayIndexFlattenDocIds, flattenedDocIds).getIntIterator()` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java: ########## @@ -342,6 +323,51 @@ public String[] getValuesForKeyAndDocs(int[] docIds, Map<String, RoaringBitmap> return values; } + private Pair<String, RoaringBitmap> processArrayIndex(String key) { Review Comment: `processArrayIndex` -> `getKeyAndFlattenedDocIds`? Can you add a comment here about input/output (already done in the immutable reader) ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java: ########## @@ -309,12 +242,21 @@ private int getDocId(int flattenedDocId) { @Override public Map<String, RoaringBitmap> getMatchingDocsMap(String key) { Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>(); + Pair<String, MutableRoaringBitmap> result = processArrayIndex(key); + key = result.getLeft(); + MutableRoaringBitmap arrayIndexFlattenDocIds = result.getRight(); + if (arrayIndexFlattenDocIds != null && arrayIndexFlattenDocIds.isEmpty()) { + return matchingDocsMap; + } int[] dictIds = getDictIdRangeForKey(key); - for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) { // get docIds from posting list, convert these to the actual docIds ImmutableRoaringBitmap flattenedDocIds = _invertedIndex.getDocIds(dictId); - PeekableIntIterator it = flattenedDocIds.getIntIterator(); + PeekableIntIterator it = arrayIndexFlattenDocIds == null ? flattenedDocIds.getIntIterator() + : intersect(arrayIndexFlattenDocIds.clone(), flattenedDocIds); Review Comment: `RoaringBitmap.and(arrayIndexFlattenDocIds, flattenedDocIds).getIntIterator()` ########## pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java: ########## @@ -158,6 +159,11 @@ public void setUp() + "\"stringVal\":\"%s\"}", RANDOM.nextInt(), RANDOM.nextLong(), RANDOM.nextFloat(), RANDOM.nextDouble(), BigDecimal.valueOf(RANDOM.nextDouble()).multiply(BigDecimal.valueOf(RANDOM.nextInt())), df.format(RANDOM.nextInt() * RANDOM.nextDouble())); + _jsonArrayValues[i] = String.format( + "{\"intVals\":[%s,%s], \"longVals\":[%s,%s], \"floatVals\":[%s,%s], \"doubleVals\":[%s,%s], " + + "\"bigDecimalVals\":[%s,%s], \"stringVals\":[\"%s\",\"%s\"]}", + 0, 1, 0L, 1L, 0.0f, 1.0f, 0.0, 1.0, BigDecimal.valueOf(0.0), BigDecimal.valueOf(1.0), + df.format(0.0), df.format(1.0)); Review Comment: I think it's clearer to combine this w/ `_jsonSVValues`, the column type is still SV (not MV). The duplicated test could be removed and that existing data provider can be extended to cover all cases -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org