chia7712 commented on code in PR #19699:
URL: https://github.com/apache/kafka/pull/19699#discussion_r2265363560
##########
transaction-coordinator/src/main/java/org/apache/kafka/coordinator/transaction/TxnTransitMetadata.java:
##########
@@ -32,7 +32,9 @@ public record TxnTransitMetadata(
short lastProducerEpoch,
int txnTimeoutMs,
TransactionState txnState,
- Set<TopicPartition> topicPartitions,
+ // The TransactionMetadata#topicPartitions field is mutable.
+ // To avoid deepcopy when assigning value from TxnTransitMetadata to
TransactionMetadata, use HashSet here.
Review Comment:
`deep copy`
##########
transaction-coordinator/src/main/java/org/apache/kafka/coordinator/transaction/TransactionMetadata.java:
##########
@@ -0,0 +1,689 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.coordinator.transaction;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.config.LogLevelConfig;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.record.RecordBatch;
+import org.apache.kafka.server.common.TransactionVersion;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.MarkerFactory;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.Supplier;
+
+public class TransactionMetadata {
+ private static final Logger LOGGER =
LoggerFactory.getLogger(TransactionMetadata.class);
+ private final String transactionalId;
+ private long producerId;
+ private long prevProducerId;
+ private long nextProducerId;
+ private short producerEpoch;
+ private short lastProducerEpoch;
+ private int txnTimeoutMs;
+ private TransactionState state;
+ private Set<TopicPartition> topicPartitions;
+ private volatile long txnStartTimestamp;
+ private volatile long txnLastUpdateTimestamp;
+ private TransactionVersion clientTransactionVersion;
+
+ // pending state is used to indicate the state that this transaction is
going to
+ // transit to, and for blocking future attempts to transit it again if it
is not legal;
+ // initialized as the same as the current state
+ private Optional<TransactionState> pendingState;
+
+ // Indicates that during a previous attempt to fence a producer, the
bumped epoch may not have been
+ // successfully written to the log. If this is true, we will not bump the
epoch again when fencing
+ private boolean hasFailedEpochFence;
+
+ private final ReentrantLock lock;
+
+ public static boolean isEpochExhausted(short producerEpoch) {
+ return producerEpoch >= Short.MAX_VALUE - 1;
+ }
+
+ /**
+ * @param transactionalId transactional id
+ * @param producerId producer id
+ * @param prevProducerId producer id for the last committed
transaction with this transactional ID
+ * @param nextProducerId Latest producer ID sent to the producer
for the given transactional ID
+ * @param producerEpoch current epoch of the producer
+ * @param lastProducerEpoch last epoch of the producer
+ * @param txnTimeoutMs timeout to be used to abort long
running transactions
+ * @param state current state of the transaction
+ * @param topicPartitions current set of partitions that are part
of this transaction
+ * @param txnStartTimestamp time the transaction was started, i.e.,
when first partition is added
+ * @param txnLastUpdateTimestamp updated when any operation updates the
TransactionMetadata. To be used for expiration
+ * @param clientTransactionVersion TransactionVersion used by the client
when the state was transitioned
+ */
+ public TransactionMetadata(String transactionalId,
+ long producerId,
+ long prevProducerId,
+ long nextProducerId,
+ short producerEpoch,
+ short lastProducerEpoch,
+ int txnTimeoutMs,
+ TransactionState state,
+ Set<TopicPartition> topicPartitions,
+ long txnStartTimestamp,
+ long txnLastUpdateTimestamp,
+ TransactionVersion clientTransactionVersion) {
+ this.transactionalId = transactionalId;
+ this.producerId = producerId;
+ this.prevProducerId = prevProducerId;
+ this.nextProducerId = nextProducerId;
+ this.producerEpoch = producerEpoch;
+ this.lastProducerEpoch = lastProducerEpoch;
+ this.txnTimeoutMs = txnTimeoutMs;
+ this.state = state;
+ this.topicPartitions = new HashSet<>(topicPartitions);
+ this.txnStartTimestamp = txnStartTimestamp;
+ this.txnLastUpdateTimestamp = txnLastUpdateTimestamp;
+ this.clientTransactionVersion = clientTransactionVersion;
+ this.pendingState = Optional.empty();
+ this.hasFailedEpochFence = false;
+ this.lock = new ReentrantLock();
+ }
+
+ public <T> T inLock(Supplier<T> function) {
+ lock.lock();
+ try {
+ return function.get();
+ } finally {
+ lock.unlock();
+ }
+ }
+
+ public void addPartitions(Collection<TopicPartition> partitions) {
+ topicPartitions.addAll(partitions);
+ }
+
+ public void removePartition(TopicPartition topicPartition) {
+ if (state != TransactionState.PREPARE_COMMIT && state !=
TransactionState.PREPARE_ABORT)
+ throw new IllegalStateException("Transaction metadata's current
state is " + state + ", and its pending state is " +
+ pendingState + " while trying to remove partitions whose txn
marker has been sent, this is not expected");
+
+ topicPartitions.remove(topicPartition);
+ }
+
+ // this is visible for test only
+ public TxnTransitMetadata prepareNoTransit() {
+ // do not call transitTo as it will set the pending state, a follow-up
call to abort the transaction will set its pending state
+ return new TxnTransitMetadata(producerId, prevProducerId,
nextProducerId, producerEpoch, lastProducerEpoch, txnTimeoutMs,
+ state, new HashSet<>(topicPartitions), txnStartTimestamp,
txnLastUpdateTimestamp, clientTransactionVersion);
+ }
+
+ public TxnTransitMetadata prepareFenceProducerEpoch() {
+ if (producerEpoch == Short.MAX_VALUE)
+ throw new IllegalStateException("Cannot fence producer with epoch
equal to Short.MaxValue since this would overflow");
+
+ // If we've already failed to fence an epoch (because the write to the
log failed), we don't increase it again.
+ // This is safe because we never return the epoch to client if we fail
to fence the epoch
+ short bumpedEpoch = hasFailedEpochFence ? producerEpoch : (short)
(producerEpoch + 1);
+
+ TransitionData data = new
TransitionData(TransactionState.PREPARE_EPOCH_FENCE);
+ data.producerEpoch = bumpedEpoch;
+ return prepareTransitionTo(data);
+ }
+
+ public TxnTransitMetadata prepareIncrementProducerEpoch(
+ int newTxnTimeoutMs,
+ Optional<Short> expectedProducerEpoch,
+ long updateTimestamp) {
+ if (isProducerEpochExhausted())
+ throw new IllegalStateException("Cannot allocate any more producer
epochs for producerId " + producerId);
+
+ short bumpedEpoch = (short) (producerEpoch + 1);
+ short produceEpochResult;
+ short lastProducerEpochResult;
+
+ if (expectedProducerEpoch.isEmpty()) {
+ // If no expected epoch was provided by the producer, bump the
current epoch and set the last epoch to -1
+ // In the case of a new producer, producerEpoch will be -1 and
bumpedEpoch will be 0
+ produceEpochResult = bumpedEpoch;
+ lastProducerEpochResult = RecordBatch.NO_PRODUCER_EPOCH;
+ } else {
+ short expectedEpoch = expectedProducerEpoch.get();
+ if (producerEpoch == RecordBatch.NO_PRODUCER_EPOCH ||
expectedEpoch == producerEpoch) {
+ // If the expected epoch matches the current epoch, or if
there is no current epoch, the producer is attempting
+ // to continue after an error and no other producer has been
initialized. Bump the current and last epochs.
+ // The no current epoch case means this is a new producer;
producerEpoch will be -1 and bumpedEpoch will be 0
+ produceEpochResult = bumpedEpoch;
+ lastProducerEpochResult = producerEpoch;
+ } else if (expectedEpoch == lastProducerEpoch) {
+ // If the expected epoch matches the previous epoch, it is a
retry of a successful call, so just return the
+ // current epoch without bumping. There is no danger of this
producer being fenced, because a new producer
+ // calling InitProducerId would have caused the last epoch to
be set to -1.
+ // Note that if the IBP is prior to 2.4.IV1, the
lastProducerId and lastProducerEpoch will not be written to
+ // the transaction log, so a retry that spans a coordinator
change will fail. We expect this to be a rare case.
+ produceEpochResult = producerEpoch;
+ lastProducerEpochResult = lastProducerEpoch;
+ } else {
+ // Otherwise, the producer has a fenced epoch and should
receive an PRODUCER_FENCED error
+ LOGGER.info("Expected producer epoch {} does not match current
producer epoch {} or previous producer epoch {}",
+ expectedEpoch, producerEpoch, lastProducerEpoch);
+ throw Errors.PRODUCER_FENCED.exception();
+ }
+ }
+
+ TransitionData data = new TransitionData(TransactionState.EMPTY);
+ data.producerEpoch = produceEpochResult;
+ data.lastProducerEpoch = lastProducerEpochResult;
+ data.txnTimeoutMs = newTxnTimeoutMs;
+ data.topicPartitions = Set.of();
+ data.txnStartTimestamp = -1L;
+ data.txnLastUpdateTimestamp = updateTimestamp;
+ return prepareTransitionTo(data);
+ }
+
+ public TxnTransitMetadata prepareProducerIdRotation(long newProducerId,
+ int newTxnTimeoutMs,
+ long updateTimestamp,
+ boolean
recordLastEpoch) {
+ if (hasPendingTransaction())
+ throw new IllegalStateException("Cannot rotate producer ids while
a transaction is still pending");
+
+ TransitionData data = new TransitionData(TransactionState.EMPTY);
+ data.producerId = newProducerId;
+ data.producerEpoch = 0;
+ data.lastProducerEpoch = recordLastEpoch ? producerEpoch :
RecordBatch.NO_PRODUCER_EPOCH;
+ data.txnTimeoutMs = newTxnTimeoutMs;
+ data.topicPartitions = Set.of();
+ data.txnStartTimestamp = -1L;
+ data.txnLastUpdateTimestamp = updateTimestamp;
+ return prepareTransitionTo(data);
+ }
+
+ public TxnTransitMetadata prepareAddPartitions(Set<TopicPartition>
addedTopicPartitions,
+ long updateTimestamp,
+ TransactionVersion
clientTransactionVersion) {
+ long newTxnStartTimestamp;
+ if (state == TransactionState.EMPTY || state ==
TransactionState.COMPLETE_ABORT || state == TransactionState.COMPLETE_COMMIT) {
+ newTxnStartTimestamp = updateTimestamp;
+ } else {
+ newTxnStartTimestamp = txnStartTimestamp;
+ }
+
+ Set<TopicPartition> newTopicPartitions = new
HashSet<>(topicPartitions);
+ newTopicPartitions.addAll(addedTopicPartitions);
+
+ TransitionData data = new TransitionData(TransactionState.ONGOING);
+ data.topicPartitions = newTopicPartitions;
+ data.txnStartTimestamp = newTxnStartTimestamp;
+ data.txnLastUpdateTimestamp = updateTimestamp;
+ data.clientTransactionVersion = clientTransactionVersion;
+ return prepareTransitionTo(data);
+ }
+
+ public TxnTransitMetadata prepareAbortOrCommit(TransactionState newState,
+ TransactionVersion
clientTransactionVersion,
+ long nextProducerId,
+ long updateTimestamp,
+ boolean noPartitionAdded) {
+ short updatedProducerEpoch;
+ short updatedLastProducerEpoch;
+
+ if (clientTransactionVersion.supportsEpochBump()) {
+ // We already ensured that we do not overflow here. MAX_SHORT is
the highest possible value.
+ updatedProducerEpoch = (short) (producerEpoch + 1);
+ updatedLastProducerEpoch = producerEpoch;
+ } else {
+ updatedProducerEpoch = producerEpoch;
+ updatedLastProducerEpoch = lastProducerEpoch;
+ }
+
+ // With transaction V2, it is allowed to abort the transaction without
adding any partitions. Then, the transaction
+ // start time is uncertain but it is still required. So we can use the
update time as the transaction start time.
+ long newTxnStartTimestamp = noPartitionAdded ? updateTimestamp :
txnStartTimestamp;
+
+ TransitionData data = new TransitionData(newState);
+ data.nextProducerId = nextProducerId;
+ data.producerEpoch = updatedProducerEpoch;
+ data.lastProducerEpoch = updatedLastProducerEpoch;
+ data.txnStartTimestamp = newTxnStartTimestamp;
+ data.txnLastUpdateTimestamp = updateTimestamp;
+ data.clientTransactionVersion = clientTransactionVersion;
+ return prepareTransitionTo(data);
+ }
+
+ public TxnTransitMetadata prepareComplete(long updateTimestamp) {
+ TransactionState newState = state == TransactionState.PREPARE_COMMIT ?
+ TransactionState.COMPLETE_COMMIT : TransactionState.COMPLETE_ABORT;
+
+ // Since the state change was successfully written to the log, unset
the flag for a failed epoch fence
+ hasFailedEpochFence = false;
+
+ long updatedProducerId;
+ short updatedProducerEpoch;
+
+ // In the prepareComplete transition for the overflow case, the
lastProducerEpoch is kept at MAX-1,
+ // which is the last epoch visible to the client.
+ // Internally, however, during the transition between
prepareAbort/prepareCommit and prepareComplete, the producer epoch
+ // reaches MAX but the client only sees the transition as MAX-1
followed by 0.
+ // When an epoch overflow occurs, we set the producerId to
nextProducerId and reset the epoch to 0,
+ // but lastProducerEpoch remains MAX-1 to maintain consistency with
what the client last saw.
+ if (clientTransactionVersion.supportsEpochBump() && nextProducerId !=
RecordBatch.NO_PRODUCER_ID) {
+ updatedProducerId = nextProducerId;
+ updatedProducerEpoch = 0;
+ } else {
+ updatedProducerId = producerId;
+ updatedProducerEpoch = producerEpoch;
+ }
+
+ TransitionData data = new TransitionData(newState);
+ data.producerId = updatedProducerId;
+ data.nextProducerId = RecordBatch.NO_PRODUCER_ID;
+ data.producerEpoch = updatedProducerEpoch;
+ data.topicPartitions = Set.of();
+ data.txnLastUpdateTimestamp = updateTimestamp;
+ return prepareTransitionTo(data);
+ }
+
+ public TxnTransitMetadata prepareDead() {
+ TransitionData data = new TransitionData(TransactionState.DEAD);
+ data.topicPartitions = Set.of();
+ return prepareTransitionTo(data);
+ }
+
+ /**
+ * Check if the epochs have been exhausted for the current producerId. We
do not allow the client to use an
+ * epoch equal to Short.MaxValue to ensure that the coordinator will
always be able to fence an existing producer.
+ */
+ public boolean isProducerEpochExhausted() {
+ return isEpochExhausted(producerEpoch);
+ }
+
+ /**
+ * Check if this is a distributed two phase commit transaction.
+ * Such transactions have no timeout (identified by maximum value for
timeout).
+ */
+ public boolean isDistributedTwoPhaseCommitTxn() {
+ return txnTimeoutMs == Integer.MAX_VALUE;
+ }
+
+ private boolean hasPendingTransaction() {
+ return state == TransactionState.ONGOING ||
+ state == TransactionState.PREPARE_ABORT ||
+ state == TransactionState.PREPARE_COMMIT;
+ }
+
+ private TxnTransitMetadata prepareTransitionTo(TransitionData data) {
+ if (pendingState.isPresent())
+ throw new IllegalStateException("Preparing transaction state
transition to " + state +
+ " while it already a pending state " + pendingState.get());
+
+ if (data.producerId < 0)
+ throw new IllegalArgumentException("Illegal new producer id " +
producerId);
+
+ // The epoch is initialized to NO_PRODUCER_EPOCH when the
TransactionMetadata
+ // is created for the first time and it could stay like this until
transitioning
+ // to Dead.
+ if (data.state != TransactionState.DEAD && data.producerEpoch < 0)
+ throw new IllegalArgumentException("Illegal new producer epoch " +
producerEpoch);
+
+ // check that the new state transition is valid and update the pending
state if necessary
+ if (data.state.validPreviousStates().contains(this.state)) {
+ TxnTransitMetadata transitMetadata = new TxnTransitMetadata(
+ data.producerId, this.producerId, data.nextProducerId,
data.producerEpoch, data.lastProducerEpoch,
+ data.txnTimeoutMs, data.state, new
HashSet<>(data.topicPartitions),
Review Comment:
> are we not sure if this set should be mutable or not?
We discussed whether `TxnTransitMetadata` should hold a reference to
`topicPartitions` or perform a deep copy. I prefer to keep the original
behavior of holding the reference
```scala
val transitMetadata = new TxnTransitMetadata(producerId,
this.producerId, nextProducerId, producerEpoch, lastProducerEpoch,
txnTimeoutMs, state,
// no deep copy
topicPartitions.asJava, txnStartTimestamp, txnLastUpdateTimestamp,
clientTransactionVersion)
```
@FrankYang0529 WDYT?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]