Jackie-Jiang commented on code in PR #12697: URL: https://github.com/apache/pinot/pull/12697#discussion_r1552473668
########## pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java: ########## @@ -52,114 +53,66 @@ */ public class KinesisConsumer extends KinesisConnectionHandler implements PartitionGroupConsumer { private static final Logger LOGGER = LoggerFactory.getLogger(KinesisConsumer.class); - public static final long SLEEP_TIME_BETWEEN_REQUESTS = 1000L; - private final String _streamTopicName; - private final int _numMaxRecordsToFetch; - private final ExecutorService _executorService; - private final ShardIteratorType _shardIteratorType; - private final int _rpsLimit; + private static final long SLEEP_TIME_BETWEEN_REQUESTS = 1000L; - public KinesisConsumer(KinesisConfig kinesisConfig) { - super(kinesisConfig); - _streamTopicName = kinesisConfig.getStreamTopicName(); - _numMaxRecordsToFetch = kinesisConfig.getNumMaxRecordsToFetch(); - _shardIteratorType = kinesisConfig.getShardIteratorType(); - _rpsLimit = kinesisConfig.getRpsLimit(); - _executorService = Executors.newSingleThreadExecutor(); + // TODO: Revisit the logic of using a separate executor to manage the request timeout. Currently it is not thread safe + private final ExecutorService _executorService = Executors.newSingleThreadExecutor(); + + public KinesisConsumer(KinesisConfig config) { + super(config); + LOGGER.info("Created Kinesis consumer with topic: {}, RPS limit: {}, max records per fetch: {}", + config.getStreamTopicName(), config.getRpsLimit(), config.getNumMaxRecordsToFetch()); } @VisibleForTesting - public KinesisConsumer(KinesisConfig kinesisConfig, KinesisClient kinesisClient) { - super(kinesisConfig, kinesisClient); - _kinesisClient = kinesisClient; - _streamTopicName = kinesisConfig.getStreamTopicName(); - _numMaxRecordsToFetch = kinesisConfig.getNumMaxRecordsToFetch(); - _shardIteratorType = kinesisConfig.getShardIteratorType(); - _rpsLimit = kinesisConfig.getRpsLimit(); - _executorService = Executors.newSingleThreadExecutor(); + public KinesisConsumer(KinesisConfig config, KinesisClient kinesisClient) { + super(config, kinesisClient); } /** * Fetch records from the Kinesis stream between the start and end KinesisCheckpoint */ @Override - public KinesisRecordsBatch fetchMessages(StreamPartitionMsgOffset startCheckpoint, - StreamPartitionMsgOffset endCheckpoint, int timeoutMs) { - List<KinesisStreamMessage> recordList = new ArrayList<>(); - Future<KinesisRecordsBatch> kinesisFetchResultFuture = - _executorService.submit(() -> getResult(startCheckpoint, endCheckpoint, recordList)); - + public KinesisMessageBatch fetchMessages(StreamPartitionMsgOffset startMsgOffset, int timeoutMs) { + KinesisPartitionGroupOffset startOffset = (KinesisPartitionGroupOffset) startMsgOffset; + List<BytesStreamMessage> messages = new ArrayList<>(); + Future<KinesisMessageBatch> kinesisFetchResultFuture = + _executorService.submit(() -> getResult(startOffset, messages)); try { return kinesisFetchResultFuture.get(timeoutMs, TimeUnit.MILLISECONDS); } catch (TimeoutException e) { kinesisFetchResultFuture.cancel(true); - return handleException((KinesisPartitionGroupOffset) startCheckpoint, recordList); } catch (Exception e) { - return handleException((KinesisPartitionGroupOffset) startCheckpoint, recordList); + // Ignored } + return buildKinesisMessageBatch(startOffset, messages, false); Review Comment: Yes. This race condition exists in the current code as well. Will try to address it separately. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org