This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 2748d4ed8b7 [branch-1.2](recover) support skipping missing version in 
select by session variable (#27075)
2748d4ed8b7 is described below

commit 2748d4ed8b75f3e8d12270438cfbb640f78d1581
Author: xy720 <22125576+xy...@users.noreply.github.com>
AuthorDate: Thu Nov 23 14:46:51 2023 +0800

    [branch-1.2](recover) support skipping missing version in select by session 
variable (#27075)
    
    This commit support session variable skip_missing_version to control the 
query behavior.
    
    If `skip_missing_version` is set to true, the query will always try to 
select the one with the highest lastSuccessVersion among all surviving BE 
replicas.
    
    If `skip_missing_version` is set to true, the query will always skip the 
missing rowsets in BE and only return the data from existing rowsets.
---
 be/src/exec/olap_scanner.cpp                       |  3 +-
 be/src/olap/schema_change.cpp                      |  2 +-
 be/src/olap/tablet.cpp                             | 19 +++++++---
 be/src/olap/tablet.h                               |  7 +++-
 be/src/runtime/runtime_state.h                     |  4 ++
 be/src/vec/exec/scan/new_olap_scanner.cpp          |  4 +-
 be/test/olap/tablet_test.cpp                       |  4 +-
 docs/en/docs/admin-manual/config/fe-config.md      | 14 -------
 docs/en/docs/advanced/variables.md                 |  4 ++
 docs/zh-CN/docs/admin-manual/config/fe-config.md   | 18 ---------
 docs/zh-CN/docs/advanced/variables.md              |  4 ++
 .../main/java/org/apache/doris/common/Config.java  | 14 -------
 .../java/org/apache/doris/catalog/Replica.java     | 18 +++++++++
 .../main/java/org/apache/doris/catalog/Tablet.java |  6 +--
 .../org/apache/doris/planner/OlapScanNode.java     | 33 ++++++++++-------
 .../java/org/apache/doris/qe/SessionVariable.java  | 17 +++++++++
 gensrc/thrift/PaloInternalService.thrift           |  3 ++
 .../session_variable/test_skip_missing_version.out |  5 +++
 .../test_skip_missing_version.groovy               | 43 ++++++++++++++++++++++
 19 files changed, 146 insertions(+), 76 deletions(-)

diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 72769f77694..1ab514194c8 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -104,7 +104,8 @@ Status OlapScanner::prepare(
             // the rowsets maybe compacted when the last olap scanner starts
             Version rd_version(0, _version);
             Status acquire_reader_st =
-                    _tablet->capture_rs_readers(rd_version, 
&_tablet_reader_params.rs_readers);
+                    _tablet->capture_rs_readers(rd_version, 
&_tablet_reader_params.rs_readers,
+                                                
_runtime_state->skip_missing_version());
             if (!acquire_reader_st.ok()) {
                 LOG(WARNING) << "fail to init reader.res=" << 
acquire_reader_st;
                 std::stringstream ss;
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index b22ba4f32db..1cc71662f24 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -2138,7 +2138,7 @@ Status SchemaChangeHandler::_get_versions_to_be_changed(
     *max_rowset = rowset;
 
     RETURN_NOT_OK(base_tablet->capture_consistent_versions(Version(0, 
rowset->version().second),
-                                                           
versions_to_be_changed));
+                                                           
versions_to_be_changed, false, false));
 
     return Status::OK();
 }
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 98ef536d9f4..2f4a9c1d6ad 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -581,7 +581,7 @@ void Tablet::delete_expired_stale_rowset() {
         Version test_version = Version(0, lastest_delta->end_version());
         stale_version_path_map[*path_id_iter] = version_path;
 
-        Status status = capture_consistent_versions(test_version, nullptr);
+        Status status = capture_consistent_versions(test_version, nullptr, 
false, false);
         // 1. When there is no consistent versions, we must reconstruct the 
tracker.
         if (!status.ok()) {
             // 2. fetch missing version after delete
@@ -696,7 +696,8 @@ bool Tablet::_reconstruct_version_tracker_if_necessary() {
 }
 
 Status Tablet::capture_consistent_versions(const Version& spec_version,
-                                           std::vector<Version>* version_path, 
bool quiet) const {
+                                           std::vector<Version>* version_path,
+                                           bool skip_missing_version, bool 
quiet) const {
     Status status =
             
_timestamped_version_tracker.capture_consistent_versions(spec_version, 
version_path);
     if (!status.ok() && !quiet) {
@@ -715,6 +716,10 @@ Status Tablet::capture_consistent_versions(const Version& 
spec_version,
                 LOG(WARNING) << "status:" << status << ", tablet:" << 
full_name()
                              << ", missed version for version:" << 
spec_version;
                 _print_missed_versions(missed_versions);
+                if (skip_missing_version) {
+                    LOG(WARNING) << "force skipping missing version for 
tablet:" << full_name();
+                    return Status::OK();
+                }
             }
         }
     }
@@ -723,7 +728,7 @@ Status Tablet::capture_consistent_versions(const Version& 
spec_version,
 
 Status Tablet::check_version_integrity(const Version& version, bool quiet) {
     std::shared_lock rdlock(_meta_lock);
-    return capture_consistent_versions(version, nullptr, quiet);
+    return capture_consistent_versions(version, nullptr, false, quiet);
 }
 
 // If any rowset contains the specific version, it means the version already 
exist
@@ -747,7 +752,7 @@ void Tablet::acquire_version_and_rowsets(
 Status Tablet::capture_consistent_rowsets(const Version& spec_version,
                                           std::vector<RowsetSharedPtr>* 
rowsets) const {
     std::vector<Version> version_path;
-    RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path));
+    RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path, 
false, false));
     RETURN_NOT_OK(_capture_consistent_rowsets_unlocked(version_path, rowsets));
     return Status::OK();
 }
@@ -784,9 +789,11 @@ Status Tablet::_capture_consistent_rowsets_unlocked(const 
std::vector<Version>&
 }
 
 Status Tablet::capture_rs_readers(const Version& spec_version,
-                                  std::vector<RowsetReaderSharedPtr>* 
rs_readers) const {
+                                  std::vector<RowsetReaderSharedPtr>* 
rs_readers,
+                                  bool skip_missing_version) const {
     std::vector<Version> version_path;
-    RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path));
+    RETURN_NOT_OK(
+            capture_consistent_versions(spec_version, &version_path, 
skip_missing_version, false));
     RETURN_NOT_OK(capture_rs_readers(version_path, rs_readers));
     return Status::OK();
 }
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index ce7bb1cc44b..816b50c188d 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -148,9 +148,10 @@ public:
 
     // Given spec_version, find a continuous version path and store it in 
version_path.
     // If quiet is true, then only "does this path exist" is returned.
+    // If skip_missing_version is true, return ok even there are missing 
versions.
     Status capture_consistent_versions(const Version& spec_version,
                                        std::vector<Version>* version_path,
-                                       bool quiet = false) const;
+                                       bool skip_missing_version, bool quiet) 
const;
     // if quiet is true, no error log will be printed if there are missing 
versions
     Status check_version_integrity(const Version& version, bool quiet = false);
     bool check_version_exist(const Version& version) const;
@@ -159,8 +160,10 @@ public:
 
     Status capture_consistent_rowsets(const Version& spec_version,
                                       std::vector<RowsetSharedPtr>* rowsets) 
const;
+    // If skip_missing_version is true, skip versions if they are missing.
     Status capture_rs_readers(const Version& spec_version,
-                              std::vector<RowsetReaderSharedPtr>* rs_readers) 
const;
+                              std::vector<RowsetReaderSharedPtr>* rs_readers,
+                              bool skip_missing_version) const;
 
     Status capture_rs_readers(const std::vector<Version>& version_path,
                               std::vector<RowsetReaderSharedPtr>* rs_readers) 
const;
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index dc4c5f97d1d..21cb70e73b3 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -362,6 +362,10 @@ public:
         return _query_options.__isset.skip_delete_bitmap && 
_query_options.skip_delete_bitmap;
     }
 
+    bool skip_missing_version() const {
+        return _query_options.__isset.skip_missing_version && 
_query_options.skip_missing_version;
+    }
+
     int partitioned_hash_join_rows_threshold() const {
         if (!_query_options.__isset.partitioned_hash_join_rows_threshold) {
             return 0;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp 
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index a702b2b6e56..1b961af6da1 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -90,8 +90,8 @@ Status NewOlapScanner::prepare(const TPaloScanRange& 
scan_range,
             // to prevent this case: when there are lots of olap scanners to 
run for example 10000
             // the rowsets maybe compacted when the last olap scanner starts
             Version rd_version(0, _version);
-            Status acquire_reader_st =
-                    _tablet->capture_rs_readers(rd_version, 
&_tablet_reader_params.rs_readers);
+            Status acquire_reader_st = _tablet->capture_rs_readers(
+                    rd_version, &_tablet_reader_params.rs_readers, 
_state->skip_missing_version());
             if (!acquire_reader_st.ok()) {
                 LOG(WARNING) << "fail to init reader.res=" << 
acquire_reader_st;
                 std::stringstream ss;
diff --git a/be/test/olap/tablet_test.cpp b/be/test/olap/tablet_test.cpp
index f97fd6833c0..496d67a9e52 100644
--- a/be/test/olap/tablet_test.cpp
+++ b/be/test/olap/tablet_test.cpp
@@ -300,12 +300,12 @@ TEST_F(TestTablet, pad_rowset) {
 
     Version version(5, 5);
     std::vector<RowsetReaderSharedPtr> readers;
-    ASSERT_FALSE(_tablet->capture_rs_readers(version, &readers).ok());
+    ASSERT_FALSE(_tablet->capture_rs_readers(version, &readers, false).ok());
     readers.clear();
 
     PadRowsetAction action;
     action._pad_rowset(_tablet, version);
-    ASSERT_TRUE(_tablet->capture_rs_readers(version, &readers).ok());
+    ASSERT_TRUE(_tablet->capture_rs_readers(version, &readers, false).ok());
 }
 
 TEST_F(TestTablet, cooldown_policy) {
diff --git a/docs/en/docs/admin-manual/config/fe-config.md 
b/docs/en/docs/admin-manual/config/fe-config.md
index 1e69299a1df..9d557772b64 100644
--- a/docs/en/docs/admin-manual/config/fe-config.md
+++ b/docs/en/docs/admin-manual/config/fe-config.md
@@ -1805,20 +1805,6 @@ In some very special circumstances, such as code bugs, 
or human misoperation, et
 
 Set to true so that Doris will automatically use blank replicas to fill 
tablets which all replicas have been damaged or missing
 
-#### `recover_with_skip_missing_version`
-
-Default:disable
-
-IsMutable:true
-
-MasterOnly:true
-
-In some scenarios, there is an unrecoverable metadata problem in the cluster, 
and the visibleVersion of the data does not match be. In this case, it is still 
necessary to restore the remaining data (which may cause problems with the 
correctness of the data). This configuration is the same as` 
recover_with_empty_tablet` should only be used in emergency situations
-This configuration has three values:
-* disable : If an exception occurs, an error will be reported normally.
-* ignore_version: ignore the visibleVersion information recorded in fe 
partition, use replica version
-* ignore_all: In addition to ignore_version, when encountering no queryable 
replica, skip it directly instead of throwing an exception
-
 #### `min_clone_task_timeout_sec` `And max_clone_task_timeout_sec`
 
 Default:Minimum 3 minutes, maximum two hours
diff --git a/docs/en/docs/advanced/variables.md 
b/docs/en/docs/advanced/variables.md
index 7152fa49bb7..23880635e6e 100644
--- a/docs/en/docs/advanced/variables.md
+++ b/docs/en/docs/advanced/variables.md
@@ -574,6 +574,10 @@ Translated with www.DeepL.com/Translator (free version)
 
     For debugging purpose. In Unique Key MoW table, in case of problems of 
reading data, setting value to `true` will also read deleted data.
 
+* `skip_missing_version`
+
+     In some scenarios, all replicas of tablet are having missing versions, 
and the tablet is unable to recover. This config can control the behavior of 
query. When it is opened, the query will ignore the visible version recorded in 
FE partition, use the replica version. If the replica on be has missing 
versions, the query will directly skip this missing version, and only return 
the data of the existing version, In addition, the query will always try to 
select the one with the highest la [...]
+
 * `default_password_lifetime`
 
        Default password expiration time. The default value is 0, which means 
no expiration. The unit is days. This parameter is only enabled if the user's 
password expiration property has a value of DEFAULT. like:
diff --git a/docs/zh-CN/docs/admin-manual/config/fe-config.md 
b/docs/zh-CN/docs/admin-manual/config/fe-config.md
index d78ae3804c1..ec4d759be35 100644
--- a/docs/zh-CN/docs/admin-manual/config/fe-config.md
+++ b/docs/zh-CN/docs/admin-manual/config/fe-config.md
@@ -1805,24 +1805,6 @@ show data (其他用法:HELP SHOW DATA)
 
 在这种情况下,您可以将此配置设置为 true。 系统会将损坏的 tablet 替换为空 tablet,以确保查询可以执行。 
(但此时数据已经丢失,所以查询结果可能不准确)
 
-#### `recover_with_skip_missing_version`
-
-默认值:disable
-
-是否可以动态配置:true
-
-是否为 Master FE 节点独有的配置项:true
-
-有些场景下集群出现了不可恢复的元数据问题,数据已的visibleversion 已经和be 不匹配,
-
-这种情况下仍然需要恢复剩余的数据(可能能会导致数据的正确性有问题),这个配置同`recover_with_empty_tablet` 一样只能在紧急情况下使用
-
-这个配置有三个值:
-
-   * disable :出现异常会正常报错。
-   * ignore_version: 忽略 fe partition 中记录的visibleVersion 信息, 使用replica version 
-   * ignore_all: 除了ignore_version, 在遇到找不到可查询的replica 时,直接跳过而不是抛出异常
-
 #### `min_clone_task_timeout_sec`  和 `max_clone_task_timeout_sec`
 
 默认值:最小3分钟,最大两小时
diff --git a/docs/zh-CN/docs/advanced/variables.md 
b/docs/zh-CN/docs/advanced/variables.md
index 2309389367a..a002749107a 100644
--- a/docs/zh-CN/docs/advanced/variables.md
+++ b/docs/zh-CN/docs/advanced/variables.md
@@ -561,6 +561,10 @@ try (Connection conn = 
DriverManager.getConnection("jdbc:mysql://127.0.0.1:9030/
 
     用于调试目的。在Unique Key MoW表中,当发现读取表的数据结果有误的时候,把此变量的值设置为`true`,将会把被delete 
bitmap标记删除的数据当成正常数据读取。
 
+* `skip_missing_version`
+
+     有些极端场景下,表的 Tablet 下的所有的所有副本都有版本缺失,使得这些 Tablet 
没有办法被恢复,导致整张表都不能查询。这个变量可以用来控制查询的行为,打设置为`true`时,查询会忽略 FE partition 中记录的 
visibleVersion,使用 replica version。如果 Be 上的 Replica 
有缺失的版本,则查询会直接跳过这些缺失的版本,只返回仍存在版本的数据。此外,查询将会总是选择所有存活的 BE 中所有 Replica 里 
lastSuccessVersion 
最大的那一个,这样可以尽可能的恢复更多的数据。这个变量应该只在上述紧急情况下才被设置为`true`,仅用于临时让表恢复查询。注意,此变量与 
use_fix_replica 变量冲突,当 use_fix_replica 变量不等于 -1 时,此变量会不起作用
+
 * `default_password_lifetime`
 
        默认的密码过期时间。默认值为 0,即表示不过期。单位为天。该参数只有当用户的密码过期属性为 DEFAULT 值时,才启用。如:
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index f0953957f23..722c2303636 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1406,20 +1406,6 @@ public class Config extends ConfigBase {
     @ConfField(mutable = true, masterOnly = true)
     public static boolean recover_with_empty_tablet = false;
 
-    /**
-     * In some scenarios, there is an unrecoverable metadata problem in the 
cluster,
-     * and the visibleVersion of the data does not match be. In this case, it 
is still
-     * necessary to restore the remaining data (which may cause problems with 
the correctness of the data).
-     * This configuration is the same as` recover_with_empty_tablet` should 
only be used in emergency situations
-     * This configuration has three values:
-     *   disable : If an exception occurs, an error will be reported normally.
-     *   ignore_version: ignore the visibleVersion information recorded in fe 
partition, use replica version
-     *   ignore_all: In addition to ignore_version, when encountering no 
queryable replica,
-     *   skip it directly instead of throwing an exception
-     */
-    @ConfField(mutable = true, masterOnly = true)
-    public static String recover_with_skip_missing_version = "disable";
-
     /**
      * Whether to add a delete sign column when create unique table
      */
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java
index da845d2cd67..39ea83214ab 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java
@@ -35,6 +35,8 @@ import java.util.Comparator;
 public class Replica implements Writable {
     private static final Logger LOG = LogManager.getLogger(Replica.class);
     public static final VersionComparator<Replica> VERSION_DESC_COMPARATOR = 
new VersionComparator<Replica>();
+    public static final LastSuccessVersionComparator<Replica> 
LAST_SUCCESS_VERSION_COMPARATOR =
+             new LastSuccessVersionComparator<Replica>();
     public static final IdComparator<Replica> ID_COMPARATOR = new 
IdComparator<Replica>();
 
     public enum ReplicaState {
@@ -528,6 +530,22 @@ public class Replica implements Writable {
         }
     }
 
+    private static class LastSuccessVersionComparator<T extends Replica> 
implements Comparator<T> {
+        public LastSuccessVersionComparator() {
+        }
+
+        @Override
+        public int compare(T replica1, T replica2) {
+            if (replica1.getLastSuccessVersion() < 
replica2.getLastSuccessVersion()) {
+                return 1;
+            } else if (replica1.getLastSuccessVersion() == 
replica2.getLastSuccessVersion()) {
+                return 0;
+            } else {
+                return -1;
+            }
+        }
+    }
+
     private static class IdComparator<T extends Replica> implements 
Comparator<T> {
         public IdComparator() {
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index 1ba726cffc9..0e60a951974 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -214,7 +214,7 @@ public class Tablet extends MetaObject implements Writable {
     }
 
     // for query
-    public List<Replica> getQueryableReplicas(long visibleVersion) {
+    public List<Replica> getQueryableReplicas(long visibleVersion, boolean 
allowFailedVersion) {
         List<Replica> allQueryableReplica = 
Lists.newArrayListWithCapacity(replicas.size());
         List<Replica> auxiliaryReplica = 
Lists.newArrayListWithCapacity(replicas.size());
         for (Replica replica : replicas) {
@@ -222,8 +222,8 @@ public class Tablet extends MetaObject implements Writable {
                 continue;
             }
 
-            // Skip the missing version replica
-            if (replica.getLastFailedVersion() > 0) {
+            // Skip the missing version replica.
+            if (replica.getLastFailedVersion() > 0 && !allowFailedVersion) {
                 continue;
             }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index 05fcf0bc6e7..fc0a4210c39 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -627,14 +627,19 @@ public class OlapScanNode extends ScanNode {
         String visibleVersionStr = String.valueOf(visibleVersion);
 
         Set<Tag> allowedTags = Sets.newHashSet();
+        int useFixReplica = -1;
         boolean needCheckTags = false;
+        boolean skipMissingVersion = false;
         if (ConnectContext.get() != null) {
             allowedTags = ConnectContext.get().getResourceTags();
             needCheckTags = ConnectContext.get().isResourceTagsSet();
+            useFixReplica = 
ConnectContext.get().getSessionVariable().useFixReplica;
+            // if use_fix_replica is set to true, set skip_missing_version to 
false
+            skipMissingVersion = useFixReplica == -1 && 
ConnectContext.get().getSessionVariable().skipMissingVersion;
         }
         for (Tablet tablet : tablets) {
             long tabletId = tablet.getId();
-            if 
(!Config.recover_with_skip_missing_version.equalsIgnoreCase("disable")) {
+            if (skipMissingVersion) {
                 long tabletVersion = -1L;
                 for (Replica replica : tablet.getReplicas()) {
                     if (replica.getVersion() > tabletVersion) {
@@ -657,7 +662,7 @@ public class OlapScanNode extends ScanNode {
             paloRange.setTabletId(tabletId);
 
             // random shuffle List && only collect one copy
-            List<Replica> replicas = 
tablet.getQueryableReplicas(visibleVersion);
+            List<Replica> replicas = 
tablet.getQueryableReplicas(visibleVersion, skipMissingVersion);
             if (replicas.isEmpty()) {
                 LOG.error("no queryable replica found in tablet {}. visible 
version {}",
                         tabletId, visibleVersion);
@@ -669,12 +674,13 @@ public class OlapScanNode extends ScanNode {
                 throw new UserException("Failed to get scan range, no 
queryable replica found in tablet: " + tabletId);
             }
 
-            int useFixReplica = -1;
-            if (ConnectContext.get() != null) {
-                useFixReplica = 
ConnectContext.get().getSessionVariable().useFixReplica;
-            }
             if (useFixReplica == -1) {
-                Collections.shuffle(replicas);
+                if (skipMissingVersion) {
+                    // sort by replica's last success version, higher success 
version in the front.
+                    replicas.sort(Replica.LAST_SUCCESS_VERSION_COMPARATOR);
+                } else {
+                    Collections.shuffle(replicas);
+                }
             } else {
                 LOG.debug("use fix replica, value: {}, replica num: {}", 
useFixReplica, replicas.size());
                 // sort by replica id
@@ -721,14 +727,15 @@ public class OlapScanNode extends ScanNode {
                     collectedStat = true;
                 }
                 scanBackendIds.add(backend.getId());
+                // For skipping missing version of tablet, we only select the 
backend with the highest last
+                // success version replica to save as much data as possible.
+                if (!tabletIsNull && skipMissingVersion) {
+                    break;
+                }
             }
             if (tabletIsNull) {
-                if 
(Config.recover_with_skip_missing_version.equalsIgnoreCase("ignore_all")) {
-                    continue;
-                } else {
-                    throw new UserException(tabletId + " have no queryable 
replicas. err: "
-                            + Joiner.on(", ").join(errs));
-                }
+                throw new UserException(tabletId + " have no queryable 
replicas. err: "
+                         + Joiner.on(", ").join(errs));
             }
             TScanRange scanRange = new TScanRange();
             scanRange.setPaloScanRange(paloRange);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index ce3660d2d9c..2a9a784173f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -236,6 +236,8 @@ public class SessionVariable implements Serializable, 
Writable {
 
     public static final String SKIP_DELETE_BITMAP = "skip_delete_bitmap";
 
+    public static final String SKIP_MISSING_VERSION = "skip_missing_version";
+
     public static final String ENABLE_NEW_SHUFFLE_HASH_METHOD = 
"enable_new_shuffle_hash_method";
 
     public static final String ENABLE_PUSH_DOWN_NO_GROUP_AGG = 
"enable_push_down_no_group_agg";
@@ -646,6 +648,19 @@ public class SessionVariable implements Serializable, 
Writable {
     @VariableMgr.VarAttr(name = SKIP_DELETE_BITMAP)
     public boolean skipDeleteBitmap = false;
 
+    // This variable replace the original FE config 
`recover_with_skip_missing_version`.
+    // In some scenarios, all replicas of tablet are having missing versions, 
and the tablet is unable to recover.
+    // This config can control the behavior of query. When it is set to 
`true`, the query will ignore the
+    // visible version recorded in FE partition, use the replica version. If 
the replica on BE has missing versions,
+    // the query will directly skip this missing version, and only return the 
data of the existing versions.
+    // Besides, the query will always try to select the one with the highest 
lastSuccessVersion among all surviving
+    // BE replicas, so as to recover as much data as possible.
+    // You should only open it in the emergency scenarios mentioned above, 
only used for temporary recovery queries.
+    // This variable conflicts with the use_fix_replica variable, when the 
use_fix_replica variable is not -1,
+    // this variable will not work.
+    @VariableMgr.VarAttr(name = SKIP_MISSING_VERSION)
+    public boolean skipMissingVersion = false;
+
     // This variable is used to avoid FE fallback to the original parser. When 
we execute SQL in regression tests
     // for nereids, fallback will cause the Doris return the correct result 
although the syntax is unsupported
     // in nereids for some mistaken modification. You should set it on the
@@ -1443,6 +1458,8 @@ public class SessionVariable implements Serializable, 
Writable {
 
         tResult.setSkipDeleteBitmap(skipDeleteBitmap);
 
+        tResult.setSkipMissingVersion(skipMissingVersion);
+
         
tResult.setPartitionedHashJoinRowsThreshold(partitionedHashJoinRowsThreshold);
 
         return tResult;
diff --git a/gensrc/thrift/PaloInternalService.thrift 
b/gensrc/thrift/PaloInternalService.thrift
index 6ebf0f4a3a8..c54d83acb61 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -190,6 +190,9 @@ struct TQueryOptions {
   56: optional bool skip_delete_bitmap = false
 
   57: optional i64 scan_queue_mem_limit
+
+  // For emergency use, skip missing version when reading rowsets
+  58: optional bool skip_missing_version = false;
 }
     
 
diff --git 
a/regression-test/data/query_p0/session_variable/test_skip_missing_version.out 
b/regression-test/data/query_p0/session_variable/test_skip_missing_version.out
new file mode 100644
index 00000000000..37d08502236
--- /dev/null
+++ 
b/regression-test/data/query_p0/session_variable/test_skip_missing_version.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_all --
+1000   a       10
+2000   b       10
+
diff --git 
a/regression-test/suites/query_p0/session_variable/test_skip_missing_version.groovy
 
b/regression-test/suites/query_p0/session_variable/test_skip_missing_version.groovy
new file mode 100644
index 00000000000..c1f4c4464dc
--- /dev/null
+++ 
b/regression-test/suites/query_p0/session_variable/test_skip_missing_version.groovy
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_skip_missing_version") {
+    def test_tbl = "test_skip_missing_version_tbl"
+
+    sql """ DROP TABLE IF EXISTS ${test_tbl}"""
+    sql """
+     CREATE TABLE ${test_tbl} (
+       `k1` int(11) NULL,
+       `k2` char(5) NULL,
+       `k3` tinyint(4) NULL
+     ) ENGINE=OLAP
+     DUPLICATE KEY(`k1`, `k2`, `k3`)
+     DISTRIBUTED BY HASH(`k1`) BUCKETS 5
+     PROPERTIES (
+       "replication_num"="1"
+     );
+    """
+
+    sql """ INSERT INTO ${test_tbl} VALUES(1000, 'a', 10); """
+    sql """ INSERT INTO ${test_tbl} VALUES(2000, 'b', 10); """
+
+    // This case cannot verify the results, but it can verify abnormalities 
after
+    // SET skip_missing_version=true
+    sql """ SET skip_missing_version=true """
+    qt_select_all """ select * from ${test_tbl} order by k1 """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to