Jackie-Jiang commented on code in PR #8674: URL: https://github.com/apache/pinot/pull/8674#discussion_r869591842
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java: ########## @@ -246,13 +251,18 @@ public void removeSegment(IndexSegment segment) { LOGGER.info("Removing upsert metadata for segment: {}", segmentName); if (!Objects.requireNonNull(segment.getValidDocIds()).getMutableRoaringBitmap().isEmpty()) { - // Remove all the record locations that point to the removed segment - _primaryKeyToRecordLocationMap.forEach((primaryKey, recordLocation) -> { - if (recordLocation.getSegment() == segment) { - // Check and remove to prevent removing the key that is just updated - _primaryKeyToRecordLocationMap.remove(primaryKey, recordLocation); + PeekableIntIterator iterator = segment.getValidDocIds().getMutableRoaringBitmap().getIntIterator(); + while (iterator.hasNext()) { + _reuse.clear(); + int docId = iterator.next(); + GenericRow record = segment.getRecord(docId, _reuse); Review Comment: Let's add an api `PrimaryKey getPrimaryKey(int docId, @Nullable PrimaryKey reuse)` to the `IndexSegment`. We don't want to read all columns especially when the table is wide ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java: ########## @@ -246,13 +251,18 @@ public void removeSegment(IndexSegment segment) { LOGGER.info("Removing upsert metadata for segment: {}", segmentName); if (!Objects.requireNonNull(segment.getValidDocIds()).getMutableRoaringBitmap().isEmpty()) { - // Remove all the record locations that point to the removed segment - _primaryKeyToRecordLocationMap.forEach((primaryKey, recordLocation) -> { - if (recordLocation.getSegment() == segment) { - // Check and remove to prevent removing the key that is just updated - _primaryKeyToRecordLocationMap.remove(primaryKey, recordLocation); + PeekableIntIterator iterator = segment.getValidDocIds().getMutableRoaringBitmap().getIntIterator(); + while (iterator.hasNext()) { + _reuse.clear(); + int docId = iterator.next(); + GenericRow record = segment.getRecord(docId, _reuse); + PrimaryKey primaryKey = record.getPrimaryKey(_primaryKeyColumns); + if (_primaryKeyToRecordLocationMap.containsKey(primaryKey) + && _primaryKeyToRecordLocationMap.get(primaryKey).getSegment() == segment + && _primaryKeyToRecordLocationMap.get(primaryKey).getDocId() == docId) { Review Comment: No need to check `docId` because we want to remove the entire segment, so as long as the segment matches, we want to remove the entry ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java: ########## @@ -246,13 +251,18 @@ public void removeSegment(IndexSegment segment) { LOGGER.info("Removing upsert metadata for segment: {}", segmentName); if (!Objects.requireNonNull(segment.getValidDocIds()).getMutableRoaringBitmap().isEmpty()) { - // Remove all the record locations that point to the removed segment - _primaryKeyToRecordLocationMap.forEach((primaryKey, recordLocation) -> { - if (recordLocation.getSegment() == segment) { - // Check and remove to prevent removing the key that is just updated - _primaryKeyToRecordLocationMap.remove(primaryKey, recordLocation); + PeekableIntIterator iterator = segment.getValidDocIds().getMutableRoaringBitmap().getIntIterator(); + while (iterator.hasNext()) { + _reuse.clear(); + int docId = iterator.next(); + GenericRow record = segment.getRecord(docId, _reuse); + PrimaryKey primaryKey = record.getPrimaryKey(_primaryKeyColumns); + if (_primaryKeyToRecordLocationMap.containsKey(primaryKey) + && _primaryKeyToRecordLocationMap.get(primaryKey).getSegment() == segment + && _primaryKeyToRecordLocationMap.get(primaryKey).getDocId() == docId) { Review Comment: No need to check `docId` because we want to remove the entire segment, so as long as the segment matches, we want to remove the entry ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java: ########## @@ -246,13 +251,18 @@ public void removeSegment(IndexSegment segment) { LOGGER.info("Removing upsert metadata for segment: {}", segmentName); if (!Objects.requireNonNull(segment.getValidDocIds()).getMutableRoaringBitmap().isEmpty()) { - // Remove all the record locations that point to the removed segment - _primaryKeyToRecordLocationMap.forEach((primaryKey, recordLocation) -> { - if (recordLocation.getSegment() == segment) { - // Check and remove to prevent removing the key that is just updated - _primaryKeyToRecordLocationMap.remove(primaryKey, recordLocation); + PeekableIntIterator iterator = segment.getValidDocIds().getMutableRoaringBitmap().getIntIterator(); + while (iterator.hasNext()) { + _reuse.clear(); + int docId = iterator.next(); Review Comment: Avoid boxing ```suggestion int docId = iterator.nextInt(); ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java: ########## @@ -246,13 +251,18 @@ public void removeSegment(IndexSegment segment) { LOGGER.info("Removing upsert metadata for segment: {}", segmentName); if (!Objects.requireNonNull(segment.getValidDocIds()).getMutableRoaringBitmap().isEmpty()) { - // Remove all the record locations that point to the removed segment - _primaryKeyToRecordLocationMap.forEach((primaryKey, recordLocation) -> { - if (recordLocation.getSegment() == segment) { - // Check and remove to prevent removing the key that is just updated - _primaryKeyToRecordLocationMap.remove(primaryKey, recordLocation); + PeekableIntIterator iterator = segment.getValidDocIds().getMutableRoaringBitmap().getIntIterator(); + while (iterator.hasNext()) { + _reuse.clear(); + int docId = iterator.next(); + GenericRow record = segment.getRecord(docId, _reuse); + PrimaryKey primaryKey = record.getPrimaryKey(_primaryKeyColumns); + if (_primaryKeyToRecordLocationMap.containsKey(primaryKey) Review Comment: We need to do `computeIfPresent` to avoid the race condition of concurrent updates to the map ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java: ########## @@ -246,13 +251,18 @@ public void removeSegment(IndexSegment segment) { LOGGER.info("Removing upsert metadata for segment: {}", segmentName); if (!Objects.requireNonNull(segment.getValidDocIds()).getMutableRoaringBitmap().isEmpty()) { - // Remove all the record locations that point to the removed segment - _primaryKeyToRecordLocationMap.forEach((primaryKey, recordLocation) -> { - if (recordLocation.getSegment() == segment) { - // Check and remove to prevent removing the key that is just updated - _primaryKeyToRecordLocationMap.remove(primaryKey, recordLocation); + PeekableIntIterator iterator = segment.getValidDocIds().getMutableRoaringBitmap().getIntIterator(); + while (iterator.hasNext()) { + _reuse.clear(); + int docId = iterator.next(); Review Comment: Avoid boxing ```suggestion int docId = iterator.nextInt(); ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartitionUpsertMetadataManager.java: ########## @@ -246,13 +251,18 @@ public void removeSegment(IndexSegment segment) { LOGGER.info("Removing upsert metadata for segment: {}", segmentName); if (!Objects.requireNonNull(segment.getValidDocIds()).getMutableRoaringBitmap().isEmpty()) { - // Remove all the record locations that point to the removed segment - _primaryKeyToRecordLocationMap.forEach((primaryKey, recordLocation) -> { - if (recordLocation.getSegment() == segment) { - // Check and remove to prevent removing the key that is just updated - _primaryKeyToRecordLocationMap.remove(primaryKey, recordLocation); + PeekableIntIterator iterator = segment.getValidDocIds().getMutableRoaringBitmap().getIntIterator(); + while (iterator.hasNext()) { + _reuse.clear(); + int docId = iterator.next(); + GenericRow record = segment.getRecord(docId, _reuse); + PrimaryKey primaryKey = record.getPrimaryKey(_primaryKeyColumns); + if (_primaryKeyToRecordLocationMap.containsKey(primaryKey) Review Comment: We need to do `computeIfPresent` to avoid the race condition of concurrent updates to the map -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org