zhtaoxiang commented on code in PR #13636:
URL: https://github.com/apache/pinot/pull/13636#discussion_r1719322780


##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/dedup/ConcurrentMapPartitionDedupMetadataManager.java:
##########
@@ -19,106 +19,124 @@
 package org.apache.pinot.segment.local.dedup;
 
 import com.google.common.annotations.VisibleForTesting;
-import java.util.HashMap;
+import com.google.common.util.concurrent.AtomicDouble;
+import java.io.IOException;
 import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.pinot.common.metrics.ServerGauge;
-import org.apache.pinot.common.metrics.ServerMetrics;
-import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader;
 import org.apache.pinot.segment.local.utils.HashUtils;
 import org.apache.pinot.segment.spi.IndexSegment;
-import org.apache.pinot.spi.config.table.HashFunction;
-import org.apache.pinot.spi.data.readers.PrimaryKey;
-import org.apache.pinot.spi.utils.ByteArray;
 
-class ConcurrentMapPartitionDedupMetadataManager implements 
PartitionDedupMetadataManager {
-  private final String _tableNameWithType;
-  private final List<String> _primaryKeyColumns;
-  private final int _partitionId;
-  private final ServerMetrics _serverMetrics;
-  private final HashFunction _hashFunction;
 
+class ConcurrentMapPartitionDedupMetadataManager extends 
BasePartitionDedupMetadataManager {
   @VisibleForTesting
-  final ConcurrentHashMap<Object, IndexSegment> _primaryKeyToSegmentMap = new 
ConcurrentHashMap<>();
+  final AtomicDouble _largestSeenTime = new AtomicDouble(0);
+  @VisibleForTesting
+  final ConcurrentHashMap<Object, Pair<IndexSegment, Double>> 
_primaryKeyToSegmentAndTimeMap =
+      new ConcurrentHashMap<>();
 
-  public ConcurrentMapPartitionDedupMetadataManager(String tableNameWithType, 
List<String> primaryKeyColumns,
-      int partitionId, ServerMetrics serverMetrics, HashFunction hashFunction) 
{
-    _tableNameWithType = tableNameWithType;
-    _primaryKeyColumns = primaryKeyColumns;
-    _partitionId = partitionId;
-    _serverMetrics = serverMetrics;
-    _hashFunction = hashFunction;
+  protected ConcurrentMapPartitionDedupMetadataManager(String 
tableNameWithType, int partitionId,
+      DedupContext dedupContext) {
+    super(tableNameWithType, partitionId, dedupContext);
   }
 
-  public void addSegment(IndexSegment segment) {
-    // Add all PKs to _primaryKeyToSegmentMap
-    Iterator<PrimaryKey> primaryKeyIterator = getPrimaryKeyIterator(segment);
-    while (primaryKeyIterator.hasNext()) {
-      PrimaryKey pk = primaryKeyIterator.next();
-      _primaryKeyToSegmentMap.put(HashUtils.hashPrimaryKey(pk, _hashFunction), 
segment);
+  @Override
+  protected void doAddOrReplaceSegment(IndexSegment oldSegment, IndexSegment 
newSegment,
+      Iterator<DedupRecordInfo> dedupRecordInfoIteratorOfNewSegment) {
+    String segmentName = newSegment.getSegmentName();
+    while (dedupRecordInfoIteratorOfNewSegment.hasNext()) {
+      DedupRecordInfo dedupRecordInfo = 
dedupRecordInfoIteratorOfNewSegment.next();
+      double dedupTime = dedupRecordInfo.getDedupTime();
+      _largestSeenTime.getAndUpdate(time -> Math.max(time, dedupTime));
+      
_primaryKeyToSegmentAndTimeMap.compute(HashUtils.hashPrimaryKey(dedupRecordInfo.getPrimaryKey(),
 _hashFunction),
+          (primaryKey, segmentAndTime) -> {
+            if (segmentAndTime == null) {
+              return Pair.of(newSegment, dedupTime);
+            } else {
+              // when oldSegment is null, it means we are adding a new segment
+              // when oldSegment is not null, it means we are replacing an 
existing segment
+              if (oldSegment == null) {
+                _logger.warn("When adding a new segment: dedup record in 
segment: {} with primary key: {} and dedup "
+                        + "time: {} already exists in segment: {} with dedup 
time: {}", segmentName,
+                    dedupRecordInfo.getPrimaryKey(), dedupTime, 
segmentAndTime.getLeft().getSegmentName(),
+                    segmentAndTime.getRight());
+              } else {
+                if (segmentAndTime.getLeft() != oldSegment) {
+                  _logger.warn("When replacing a segment: dedup record in 
segment: {} with primary key: {} and dedup "

Review Comment:
   It may not happen, just log here in case there are corner cases that we are 
not aware of. For example, for some reason, a primary key is overriden by 
another record before the segment replacement logic takes place.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to