This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit e54ae4519d2967b6cd93d371fb35093de98c19cc
Author: walter <w41te...@gmail.com>
AuthorDate: Wed Apr 24 11:25:07 2024 +0800

    [fix](bdb) Write OP_TIMESTAMP operation until it successed (#33967)
    
    For now, it will reset the next journal id and return if the OP_TIMESTAMP
    operation writes failed. Because BDBJE will replicate the committed txns 
(only
    persisted in BDB log, but not replicated to other members) to FOLLOWERs 
after
    the connection resumed, directly resetting the next journal id and returning
    will cause subsequent txn written to the same journal ID not to be replayed 
by
    the FOLLOWERS. So for OP_TIMESTAMP operation, try to write until it 
succeeds.
---
 .../apache/doris/journal/bdbje/BDBJEJournal.java   | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java 
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
index 13fa926c06d..603a9def72c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
@@ -244,9 +244,18 @@ public class BDBJEJournal implements Journal { // 
CHECKSTYLE IGNORE THIS LINE: B
         if (LOG.isDebugEnabled()) {
             LOG.debug("opCode = {}, journal size = {}", op, theData.getSize());
         }
+
         // Write the key value pair to bdb.
         boolean writeSucceed = false;
-        for (int i = 0; i < RETRY_TIME; i++) {
+        // ATTN: If all the followers exit except master, master should 
continue provide
+        // query service, so do not exit if the write operation is 
OP_TIMESTAMP.
+        //
+        // Because BDBJE will replicate the committed txns to FOLLOWERs after 
the connection
+        // resumed, directly reseting the next journal id and returning will 
cause subsequent
+        // txn written to the same journal ID not to be replayed by the 
FOLLOWERS. So for
+        // OP_TIMESTAMP operation, try to write until it succeeds here.
+        int retryTimes = op == OperationType.OP_TIMESTAMP ? Integer.MAX_VALUE 
: RETRY_TIME;
+        for (int i = 0; i < retryTimes; i++) {
             try {
                 // Parameter null means auto commit
                 if (currentJournalDB.put(null, theKey, theData) == 
OperationStatus.SUCCESS) {
@@ -288,17 +297,6 @@ public class BDBJEJournal implements Journal { // 
CHECKSTYLE IGNORE THIS LINE: B
         }
 
         if (!writeSucceed) {
-            if (op == OperationType.OP_TIMESTAMP) {
-                /*
-                 * Do not exit if the write operation is OP_TIMESTAMP.
-                 * If all the followers exit except master, master should 
continue provide query
-                 * service.
-                 * To prevent master exit, we should exempt OP_TIMESTAMP write
-                 */
-                nextJournalId.set(id);
-                LOG.warn("master can not achieve quorum. write timestamp fail. 
but will not exit.");
-                return -1;
-            }
             String msg = "write bdb failed. will exit. journalId: " + id + ", 
bdb database Name: "
                     + currentJournalDB.getDatabaseName();
             LOG.error(msg);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to