Jackie-Jiang commented on code in PR #12532:
URL: https://github.com/apache/pinot/pull/12532#discussion_r1533254346


##########
pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java:
##########
@@ -90,15 +90,12 @@ public void init(List<TransformFunction> arguments, 
Map<String, ColumnContext> c
     }
     String resultsType = ((LiteralTransformFunction) 
thirdArgument).getStringLiteral().toUpperCase();
     boolean isSingleValue = !resultsType.endsWith("_ARRAY");
-    // TODO: will support array type; the underlying 
jsonIndexReader.getMatchingDocsMap supports the json path [*]
-    if (!isSingleValue) {
-      throw new IllegalArgumentException("jsonExtractIndex only supports 
single value type");
-    }
     if (isSingleValue && inputJsonPath.contains("[*]")) {

Review Comment:
   Should we consider returning the first matching value when a SV matches 
multiple values?



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java:
##########
@@ -90,15 +90,12 @@ public void init(List<TransformFunction> arguments, 
Map<String, ColumnContext> c
     }
     String resultsType = ((LiteralTransformFunction) 
thirdArgument).getStringLiteral().toUpperCase();
     boolean isSingleValue = !resultsType.endsWith("_ARRAY");
-    // TODO: will support array type; the underlying 
jsonIndexReader.getMatchingDocsMap supports the json path [*]
-    if (!isSingleValue) {
-      throw new IllegalArgumentException("jsonExtractIndex only supports 
single value type");
-    }
     if (isSingleValue && inputJsonPath.contains("[*]")) {
-      throw new IllegalArgumentException("[*] syntax in json path is 
unsupported as json_extract_index"
-          + "currently does not support returning array types");
+      throw new IllegalArgumentException(
+          "[*] syntax in json path is unsupported for singleValue field 
json_extract_index");
     }
-    DataType dataType = DataType.valueOf(resultsType);
+    DataType dataType = isSingleValue ? DataType.valueOf(resultsType)
+        : DataType.valueOf(resultsType.substring(0, resultsType.length() - 6));
 
     if (arguments.size() == 4) {

Review Comment:
   Should this be `>= 4`? Or we will miss the default value. Does default value 
apply to MV?



##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/JsonIndexReader.java:
##########
@@ -35,16 +35,28 @@ public interface JsonIndexReader extends IndexReader {
   MutableRoaringBitmap getMatchingDocIds(String filterString);
 
   /**
-   * For an array of docIds and context specific to a JSON key, returns the 
corresponding values for each docId. The
-   * context should be created from the getMatchingDocsMap method.
-   *
-   * @return String[] where String[i] is the value for docIds[i]
+   * For an array of docIds and context specific to a JSON key, returns the 
corresponding mv array for each docId.
+   * @param docIds array of docIds
+   * @param length length of the array
+   * @param matchingValueToFlattenedDocs Map from each unique value for the 
jsonPathKey value to the flattened docId
+   *                                     posting list
+   * @return String[][] where String[i] is the mv array for docIds[i]
    */
-  String[] getValuesForKeyAndDocs(int[] docIds, Map<String, RoaringBitmap> 
context);
+  String[][] getValuesForMv(int[] docIds, int length, Map<String, 
RoaringBitmap> matchingValueToFlattenedDocs);

Review Comment:
   ```suggestion
     String[][] getValuesMV(int[] docIds, int length, Map<String, 
RoaringBitmap> matchingValueToFlattenedDocs);
   ```



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java:
##########
@@ -62,6 +66,7 @@ public class ImmutableJsonIndexReader implements 
JsonIndexReader {
   private final int _version;
   private final StringDictionary _dictionary;
   private final BitmapInvertedIndexReader _invertedIndex;
+  private final long _numFlattenedDocs;

Review Comment:
   (minor) Is this used?



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java:
##########
@@ -108,8 +105,9 @@ public void init(List<TransformFunction> arguments, 
Map<String, ColumnContext> c
       _defaultValue = dataType.convert(((LiteralTransformFunction) 
fourthArgument).getStringLiteral());
     }
 
-    _resultMetadata = new TransformResultMetadata(dataType, true, false);
-    _matchingDocsMap = _jsonIndexReader.getMatchingDocsMap(_jsonPathString);
+    _resultMetadata = new TransformResultMetadata(dataType, isSingleValue, 
false);
+    _valueToMatchingFlattenedDocIdsMap =
+        
_jsonIndexReader.getMatchingFlattenedDocsMap(inputJsonPath.substring(1));

Review Comment:
   (minor) Suggest passing in the full path, and JSON index reader should 
validate the path



##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/JsonIndexReader.java:
##########
@@ -35,16 +35,28 @@ public interface JsonIndexReader extends IndexReader {
   MutableRoaringBitmap getMatchingDocIds(String filterString);
 
   /**
-   * For an array of docIds and context specific to a JSON key, returns the 
corresponding values for each docId. The
-   * context should be created from the getMatchingDocsMap method.
-   *
-   * @return String[] where String[i] is the value for docIds[i]
+   * For an array of docIds and context specific to a JSON key, returns the 
corresponding mv array for each docId.
+   * @param docIds array of docIds
+   * @param length length of the array
+   * @param matchingValueToFlattenedDocs Map from each unique value for the 
jsonPathKey value to the flattened docId
+   *                                     posting list
+   * @return String[][] where String[i] is the mv array for docIds[i]
    */
-  String[] getValuesForKeyAndDocs(int[] docIds, Map<String, RoaringBitmap> 
context);
+  String[][] getValuesForMv(int[] docIds, int length, Map<String, 
RoaringBitmap> matchingValueToFlattenedDocs);
 
   /**
-   * For a JSON key, returns a Map from each value to the docId posting list. 
This map should be  used to avoid reading
-   * and converting the posting list of flattened docIds to real docIds
+   * For an array of docIds and context specific to a JSON key, returns the 
corresponding sv value for each docId.
+   * @param docIds array of docIds
+   * @param length length of the array
+   * @param matchingValueToFlattenedDocs Map from each unique value for the 
jsonPathKey value to the flattened docId
+   *                                     posting list
+   * @return String[] where String[i] is the sv value for docIds[i]
    */
-  Map<String, RoaringBitmap> getMatchingDocsMap(String key);
+  String[] getValuesForSv(int[] docIds, int length, Map<String, RoaringBitmap> 
matchingValueToFlattenedDocs);

Review Comment:
   ```suggestion
     String[] getValuesSV(int[] docIds, int length, Map<String, RoaringBitmap> 
matchingValueToFlattenedDocs);
   ```



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java:
##########
@@ -129,7 +135,8 @@ private MutableRoaringBitmap 
getMatchingFlattenedDocIds(FilterContext filter) {
       case AND: {
         List<FilterContext> children = filter.getChildren();
         int numChildren = children.size();
-        MutableRoaringBitmap matchingDocIds = 
getMatchingFlattenedDocIds(children.get(0));
+        MutableRoaringBitmap matchingDocIds =
+            getMatchingFlattenedDocIds(children.get(0));

Review Comment:
   (format) Please reformat all the changes



##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/JsonIndexReader.java:
##########
@@ -35,16 +35,28 @@ public interface JsonIndexReader extends IndexReader {
   MutableRoaringBitmap getMatchingDocIds(String filterString);
 
   /**
-   * For an array of docIds and context specific to a JSON key, returns the 
corresponding values for each docId. The
-   * context should be created from the getMatchingDocsMap method.
-   *
-   * @return String[] where String[i] is the value for docIds[i]
+   * For an array of docIds and context specific to a JSON key, returns the 
corresponding mv array for each docId.
+   * @param docIds array of docIds
+   * @param length length of the array
+   * @param matchingValueToFlattenedDocs Map from each unique value for the 
jsonPathKey value to the flattened docId
+   *                                     posting list
+   * @return String[][] where String[i] is the mv array for docIds[i]
    */
-  String[] getValuesForKeyAndDocs(int[] docIds, Map<String, RoaringBitmap> 
context);
+  String[][] getValuesForMv(int[] docIds, int length, Map<String, 
RoaringBitmap> matchingValueToFlattenedDocs);

Review Comment:
   (nit) We usually put API for SV in front of MV



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java:
##########
@@ -350,58 +354,48 @@ private RoaringBitmap 
getMatchingFlattenedDocIds(Predicate predicate) {
     }
   }
 
-  @Override
-  public Map<String, RoaringBitmap> getMatchingDocsMap(String key) {
-    Map<String, RoaringBitmap> matchingDocsMap = new HashMap<>();
+  @VisibleForTesting
+  public Map<String, RoaringBitmap> convertFlattenedDocIdsToDocIds(Map<String, 
RoaringBitmap> valueToFlattenedDocIds) {
     _readLock.lock();
     try {
-      Pair<String, RoaringBitmap> result = getKeyAndFlattenDocId(key);
-      key = result.getLeft();
-      RoaringBitmap arrayIndexFlattenDocIds = result.getRight();
-      if (arrayIndexFlattenDocIds != null && 
arrayIndexFlattenDocIds.isEmpty()) {
-        return matchingDocsMap;
-      }
-      Map<String, RoaringBitmap> subMap = getMatchingKeysMap(key);
-      for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
-        RoaringBitmap kvPairFlattenedDocIds = entry.getValue();
-        PeekableIntIterator it = arrayIndexFlattenDocIds == null ? 
kvPairFlattenedDocIds.getIntIterator()
-                : RoaringBitmap.and(arrayIndexFlattenDocIds, 
kvPairFlattenedDocIds).getIntIterator();
-        if (!it.hasNext()) {
-          continue;
-        }
-        MutableRoaringBitmap postingList = new MutableRoaringBitmap();
-        while (it.hasNext()) {
-          postingList.add(_docIdMapping.getInt(it.next()));
-        }
-        String val = entry.getKey().substring(key.length() + 1);
-        matchingDocsMap.put(val, postingList.toRoaringBitmap());
+      Map<String, RoaringBitmap> valueToDocIds = new HashMap<>();
+      for (Map.Entry<String, RoaringBitmap> entry : 
valueToFlattenedDocIds.entrySet()) {
+        RoaringBitmap docIds = new RoaringBitmap();
+        entry.getValue().forEach((IntConsumer) flattenedDocId -> 
docIds.add(_docIdMapping.getInt(flattenedDocId)));
+        valueToDocIds.put(entry.getKey(), docIds);
       }
+      return valueToDocIds;
     } finally {
       _readLock.unlock();
     }
-    return matchingDocsMap;
   }
 
   @Override
-  public String[] getValuesForKeyAndDocs(int[] docIds, Map<String, 
RoaringBitmap> matchingDocsMap) {
-    Int2ObjectOpenHashMap<String> docIdToValues = new 
Int2ObjectOpenHashMap<>(docIds.length);
-    RoaringBitmap docIdMask = RoaringBitmap.bitmapOf(docIds);
-
-    for (Map.Entry<String, RoaringBitmap> entry : matchingDocsMap.entrySet()) {
-      RoaringBitmap intersection = RoaringBitmap.and(entry.getValue(), 
docIdMask);
-      if (intersection.isEmpty()) {
-        continue;
+  public Map<String, RoaringBitmap> getMatchingFlattenedDocsMap(String 
jsonPathKey) {
+    Map<String, RoaringBitmap> valueToMatchingFlattenedDocIdsMap = new 
HashMap<>();
+    _readLock.lock();
+    try {
+      Pair<String, RoaringBitmap> result = getKeyAndFlattenDocId(jsonPathKey);

Review Comment:
   (minor, not introduced here) Rename the method to `getKeyAndFlattenedDocId`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to