zhtaoxiang commented on code in PR #13636: URL: https://github.com/apache/pinot/pull/13636#discussion_r1717998687
########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/dedup/ConcurrentMapPartitionDedupMetadataManager.java: ########## @@ -19,106 +19,99 @@ package org.apache.pinot.segment.local.dedup; import com.google.common.annotations.VisibleForTesting; -import java.util.HashMap; +import com.google.common.util.concurrent.AtomicDouble; +import java.io.IOException; import java.util.Iterator; -import java.util.List; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.common.metrics.ServerGauge; -import org.apache.pinot.common.metrics.ServerMetrics; -import org.apache.pinot.segment.local.segment.readers.PinotSegmentColumnReader; import org.apache.pinot.segment.local.utils.HashUtils; import org.apache.pinot.segment.spi.IndexSegment; -import org.apache.pinot.spi.config.table.HashFunction; -import org.apache.pinot.spi.data.readers.PrimaryKey; -import org.apache.pinot.spi.utils.ByteArray; -class ConcurrentMapPartitionDedupMetadataManager implements PartitionDedupMetadataManager { - private final String _tableNameWithType; - private final List<String> _primaryKeyColumns; - private final int _partitionId; - private final ServerMetrics _serverMetrics; - private final HashFunction _hashFunction; +class ConcurrentMapPartitionDedupMetadataManager extends BasePartitionDedupMetadataManager { @VisibleForTesting - final ConcurrentHashMap<Object, IndexSegment> _primaryKeyToSegmentMap = new ConcurrentHashMap<>(); + final AtomicDouble _largestSeenTime = new AtomicDouble(0); + @VisibleForTesting + final ConcurrentHashMap<Object, Pair<IndexSegment, Double>> _primaryKeyToSegmentAndTimeMap = + new ConcurrentHashMap<>(); - public ConcurrentMapPartitionDedupMetadataManager(String tableNameWithType, List<String> primaryKeyColumns, - int partitionId, ServerMetrics serverMetrics, HashFunction hashFunction) { - _tableNameWithType = tableNameWithType; - _primaryKeyColumns = primaryKeyColumns; - _partitionId = partitionId; - _serverMetrics = serverMetrics; - _hashFunction = hashFunction; + protected ConcurrentMapPartitionDedupMetadataManager(String tableNameWithType, int partitionId, + DedupContext dedupContext) { + super(tableNameWithType, partitionId, dedupContext); } - public void addSegment(IndexSegment segment) { - // Add all PKs to _primaryKeyToSegmentMap - Iterator<PrimaryKey> primaryKeyIterator = getPrimaryKeyIterator(segment); - while (primaryKeyIterator.hasNext()) { - PrimaryKey pk = primaryKeyIterator.next(); - _primaryKeyToSegmentMap.put(HashUtils.hashPrimaryKey(pk, _hashFunction), segment); + @Override + protected void doAddSegment(IndexSegment segment, Iterator<DedupRecordInfo> dedupRecordInfoIterator) { + while (dedupRecordInfoIterator.hasNext()) { + DedupRecordInfo dedupRecordInfo = dedupRecordInfoIterator.next(); + double dedupTime = dedupRecordInfo.getDedupTime(); + _largestSeenTime.getAndUpdate(time -> Math.max(time, dedupTime)); + _primaryKeyToSegmentAndTimeMap.compute(HashUtils.hashPrimaryKey(dedupRecordInfo.getPrimaryKey(), _hashFunction), Review Comment: This is actually tricky. The addSegment is used in 2 different cases: adding a new segment or replace an existing segment. For the former case, we should log a WARN message; however for the latter case, it is legit, we should not log a WARN message. To resolve this issue, we should add a replaceSegment method, which can differentiate those 2 cases. Let me see if it's easy to add such a method. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org