Jackie-Jiang commented on code in PR #11739: URL: https://github.com/apache/pinot/pull/11739#discussion_r1367566522
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java: ########## @@ -294,6 +297,73 @@ private RoaringBitmap getMatchingFlattenedDocIds(Predicate predicate) { } } + /** + * MutableJsonIndexReaderContext holds a cache that is used to accelerate the following reads. The cache is specific + * to a key, and therefore should NOT be reused for other keys. For each key, MutableJsonIndexImpl.createContext() + * should be invoked to create a fresh context. + */ + public static class MutableJsonIndexReaderContext implements JsonIndexReaderContext { + private final Map<String, RoaringBitmap> _cache; + + public MutableJsonIndexReaderContext() { + _cache = new HashMap<>(); + } + + public Map<String, RoaringBitmap> getCache() { + return _cache; + } + } + + @Override + public MutableJsonIndexReaderContext createContext() { + return new MutableJsonIndexReaderContext(); + } + + @Override + public String[] getValuesForKeyAndDocs(String key, int[] docIds, JsonIndexReaderContext context) { + MutableJsonIndexReaderContext mutableContext = (MutableJsonIndexReaderContext) context; + Map<String, RoaringBitmap> cache = mutableContext.getCache(); + + Int2ObjectOpenHashMap<String> docIdToValues = new Int2ObjectOpenHashMap<>(docIds.length); + RoaringBitmap docIdMask = RoaringBitmap.bitmapOf(docIds); + + if (cache.isEmpty()) { + _readLock.lock(); + try { + for (Map.Entry<String, RoaringBitmap> entry : _postingListMap.entrySet()) { + if (!entry.getKey().startsWith(key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR)) { + continue; + } + MutableRoaringBitmap flattenedDocIds = entry.getValue().toMutableRoaringBitmap(); + PeekableIntIterator it = flattenedDocIds.getIntIterator(); + MutableRoaringBitmap postingList = new MutableRoaringBitmap(); Review Comment: Directly create `RoaringBitmap` here ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java: ########## @@ -300,6 +306,92 @@ private int getDocId(int flattenedDocId) { return _docIdMapping.getInt((long) flattenedDocId << 2); } + /** + * ImmutableJsonIndexReaderContext holds a cache that is used to accelerate following reads. The cache is specific + * to a key, and therefore should NOT be reused for other keys. For each key, ImmutableJsonIndexReader.createContext() + * should be invoked to create a fresh context. + */ + public static class ImmutableJsonIndexReaderContext implements JsonIndexReaderContext { + private final List<RoaringBitmap> _cache; + + public ImmutableJsonIndexReaderContext() { + _cache = new ArrayList<>(); + } + + public List<RoaringBitmap> getCache() { + return _cache; + } + } + + @Override + public ImmutableJsonIndexReaderContext createContext() { + return new ImmutableJsonIndexReaderContext(); + } + + @Override + public String[] getValuesForKeyAndDocs(String key, int[] docIds, JsonIndexReaderContext context) { + ImmutableJsonIndexReaderContext immutableContext = (ImmutableJsonIndexReaderContext) context; + List<RoaringBitmap> cache = immutableContext.getCache(); + + RoaringBitmap docIdMask = RoaringBitmap.bitmapOf(docIds); + int[] dictIds = getDictIdRangeForKey(key); Review Comment: `dictIds` can also be part of the context ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/json/MutableJsonIndexImpl.java: ########## @@ -294,6 +297,73 @@ private RoaringBitmap getMatchingFlattenedDocIds(Predicate predicate) { } } + /** + * MutableJsonIndexReaderContext holds a cache that is used to accelerate the following reads. The cache is specific + * to a key, and therefore should NOT be reused for other keys. For each key, MutableJsonIndexImpl.createContext() + * should be invoked to create a fresh context. + */ + public static class MutableJsonIndexReaderContext implements JsonIndexReaderContext { + private final Map<String, RoaringBitmap> _cache; + + public MutableJsonIndexReaderContext() { + _cache = new HashMap<>(); + } + + public Map<String, RoaringBitmap> getCache() { + return _cache; + } + } + + @Override + public MutableJsonIndexReaderContext createContext() { + return new MutableJsonIndexReaderContext(); + } + + @Override + public String[] getValuesForKeyAndDocs(String key, int[] docIds, JsonIndexReaderContext context) { + MutableJsonIndexReaderContext mutableContext = (MutableJsonIndexReaderContext) context; + Map<String, RoaringBitmap> cache = mutableContext.getCache(); + + Int2ObjectOpenHashMap<String> docIdToValues = new Int2ObjectOpenHashMap<>(docIds.length); + RoaringBitmap docIdMask = RoaringBitmap.bitmapOf(docIds); + + if (cache.isEmpty()) { + _readLock.lock(); + try { + for (Map.Entry<String, RoaringBitmap> entry : _postingListMap.entrySet()) { + if (!entry.getKey().startsWith(key + BaseJsonIndexCreator.KEY_VALUE_SEPARATOR)) { + continue; + } + MutableRoaringBitmap flattenedDocIds = entry.getValue().toMutableRoaringBitmap(); + PeekableIntIterator it = flattenedDocIds.getIntIterator(); + MutableRoaringBitmap postingList = new MutableRoaringBitmap(); + while (it.hasNext()) { + postingList.add(_docIdMapping.getInt(it.next())); + } + String val = entry.getKey().substring(key.length() + 1); + cache.put(val, postingList.toRoaringBitmap()); + } + } finally { + _readLock.unlock(); + } + } + + for (Map.Entry<String, RoaringBitmap> entry : cache.entrySet()) { + RoaringBitmap intersection = RoaringBitmap.and(entry.getValue(), docIdMask); Review Comment: (code format) Reformat ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/json/ImmutableJsonIndexReader.java: ########## @@ -300,6 +305,92 @@ private int getDocId(int flattenedDocId) { return _docIdMapping.getInt((long) flattenedDocId << 2); } + /** + * ImmutableJsonIndexReaderContext holds a cache that is used to accelerate following reads. The cache is specific + * to a key, and therefore should NOT be reused for other keys. For each key, ImmutableJsonIndexReader.createContext() + * should be invoked to create a fresh context. + */ + public static class ImmutableJsonIndexReaderContext implements JsonIndexReaderContext { + private final Int2ObjectOpenHashMap<RoaringBitmap> _cache; + + public ImmutableJsonIndexReaderContext() { + _cache = new Int2ObjectOpenHashMap<>(); + } + + public Int2ObjectOpenHashMap<RoaringBitmap> getCache() { + return _cache; + } + } + + @Override + public ImmutableJsonIndexReaderContext createContext() { + return new ImmutableJsonIndexReaderContext(); + } + + @Override + public String[] getValuesForKeyAndDocs(String key, int[] docIds, JsonIndexReaderContext context) { Review Comment: The main concern is that the context here is associated with a key instead of the index. This can cause confusion because it is not used the same way as the forward index context, and creating context doesn't even take the key as the argument. Taking another look, seems we can always use `Map<String, RoaringBitmap>` as the context and avoid extra dictionary lookup. Basically using `public Map<String, RoaringBitmap> getMatchingDocsMap(String key)` for both immutable and mutable index -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org