9aman commented on code in PR #16492:
URL: https://github.com/apache/pinot/pull/16492#discussion_r2262085804


##########
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java:
##########
@@ -962,6 +962,78 @@ private void createNewSegmentZKMetadata(TableConfig 
tableConfig, StreamConfig st
     persistSegmentZKMetadata(realtimeTableName, newSegmentZKMetadata, -1);
   }
 
+  /**
+   * Creates and persists segment ZK metadata for the new CONSUMING segment.
+   */
+  private void createNewSegmentZKMetadataWithOffsetAutoReset(TableConfig 
tableConfig, StreamConfig streamConfig,
+      LLCSegmentName newLLCSegmentName, long creationTimeMs, 
CommittingSegmentDescriptor committingSegmentDescriptor,
+      @Nullable SegmentZKMetadata committingSegmentZKMetadata, 
InstancePartitions instancePartitions, int numPartitions,
+      int numReplicas, boolean skipAutoReset) {
+    String oldStartOffset = committingSegmentDescriptor.getNextOffset();
+    String startOffset = skipAutoReset ? oldStartOffset : computeStartOffset(
+        oldStartOffset, streamConfig, newLLCSegmentName.getPartitionGroupId());
+    createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, 
creationTimeMs,
+        committingSegmentDescriptor, committingSegmentZKMetadata, 
instancePartitions, numPartitions, numReplicas,
+        startOffset);
+  }
+
+  private String computeStartOffset(String nextOffset, StreamConfig 
streamConfig, int partitionId) {
+    if (!streamConfig.isEnableOffsetAutoReset()) {
+      return nextOffset;
+    }
+    long timeThreshold = streamConfig.getOffsetAutoResetTimeSecThreshold();
+    int offsetThreshold = streamConfig.getOffsetAutoResetOffsetThreshold();
+    if (timeThreshold <= 0 && offsetThreshold <= 0) {
+      LOGGER.warn("Invalid offset auto reset configuration for table: {}, 
topic: {}. "
+              + "timeThreshold: {}, offsetThreshold: {}",
+          streamConfig.getTableNameWithType(), streamConfig.getTopicName(), 
timeThreshold, offsetThreshold);
+      return nextOffset;
+    }
+    String clientId =
+        PinotLLCRealtimeSegmentManager.class.getSimpleName() + "-" + 
streamConfig.getTableNameWithType() + "-"
+            + streamConfig.getTopicName();
+    StreamConsumerFactory consumerFactory = 
StreamConsumerFactoryProvider.create(streamConfig);
+    StreamPartitionMsgOffsetFactory offsetFactory = 
consumerFactory.createStreamMsgOffsetFactory();
+    StreamPartitionMsgOffset nextOffsetWithType = 
offsetFactory.create(nextOffset);
+    StreamPartitionMsgOffset offsetAtSLA;
+    StreamPartitionMsgOffset latestOffset;
+    try (StreamMetadataProvider metadataProvider = 
consumerFactory.createPartitionMetadataProvider(clientId,
+        partitionId)) {
+      // Fetching timestamp from an offset is an expensive operation which 
requires reading the data,
+      // while fetching offset from timestamp is lightweight and only needs to 
read metadata.
+      // Hence, instead of checking if latestOffset's time - nextOffset's time 
< SLA, we would check
+      // (CurrentTime - SLA)'s offset > nextOffset.
+      // TODO: it is relying on System.currentTimeMillis() which might be 
affected by time drift. If we are able to
+      // get nextOffset's time, we should instead check (nextOffset's time + 
SLA)'s offset < latestOffset
+      latestOffset = 
metadataProvider.fetchStreamPartitionOffset(OffsetCriteria.LARGEST_OFFSET_CRITERIA,
 5000);

Review Comment:
   ```suggestion
         latestOffset = 
metadataProvider.fetchStreamPartitionOffset(OffsetCriteria.LARGEST_OFFSET_CRITERIA,
 STREAM_FETCH_TIMEOUT_MS);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to