This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 84be3bbab3f branch-3.0: [Fix](Compaction) Fix full clone failure when
rowset missing #53984 (#54162)
84be3bbab3f is described below
commit 84be3bbab3f4278a8a4d6b6d6ceacb32327ca09d
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Aug 1 16:45:43 2025 +0800
branch-3.0: [Fix](Compaction) Fix full clone failure when rowset missing
#53984 (#54162)
Cherry-picked from #53984
Co-authored-by: abmdocrt <[email protected]>
---
be/src/olap/cumulative_compaction.cpp | 7 ++-
be/src/olap/task/engine_publish_version_task.cpp | 6 +-
...ion_clone_missing_rowset_fault_injection.groovy | 2 +-
...ll_clone_missing_rowset_fault_injection.groovy} | 63 +++++++++++--------
...sh_clone_missing_rowset_fault_injection.groovy} | 0
...ll_clone_missing_rowset_fault_injection.groovy} | 71 ++++++++++++----------
6 files changed, 84 insertions(+), 65 deletions(-)
diff --git a/be/src/olap/cumulative_compaction.cpp
b/be/src/olap/cumulative_compaction.cpp
index 8db397aa577..ca33cbc887d 100644
--- a/be/src/olap/cumulative_compaction.cpp
+++ b/be/src/olap/cumulative_compaction.cpp
@@ -197,18 +197,19 @@ Status CumulativeCompaction::pick_rowsets_to_compact() {
<< ", first missed version next rowset version=" <<
missing_versions[1]
<< ", tablet=" << _tablet->tablet_id();
if (config::enable_auto_clone_on_compaction_missing_version) {
+ int64_t max_version = tablet()->max_version_unlocked();
LOG_INFO("cumulative compaction submit missing rowset clone task.")
.tag("tablet_id", _tablet->tablet_id())
- .tag("version", missing_versions.back().first)
+ .tag("max_version", max_version)
.tag("replica_id", tablet()->replica_id())
.tag("partition_id", _tablet->partition_id())
.tag("table_id", _tablet->table_id());
- Status st = _engine.submit_clone_task(tablet(),
missing_versions.back().first);
+ Status st = _engine.submit_clone_task(tablet(), max_version);
if (!st) {
LOG_WARNING("cumulative compaction failed to submit missing
rowset clone task.")
.tag("st", st.msg())
.tag("tablet_id", _tablet->tablet_id())
- .tag("version", missing_versions.back().first)
+ .tag("max_version", max_version)
.tag("replica_id", tablet()->replica_id())
.tag("partition_id", _tablet->partition_id())
.tag("table_id", _tablet->table_id());
diff --git a/be/src/olap/task/engine_publish_version_task.cpp
b/be/src/olap/task/engine_publish_version_task.cpp
index 13e7dcd97aa..992e74109cc 100644
--- a/be/src/olap/task/engine_publish_version_task.cpp
+++ b/be/src/olap/task/engine_publish_version_task.cpp
@@ -220,17 +220,17 @@ Status EnginePublishVersionTask::execute() {
if
(config::enable_auto_clone_on_mow_publish_missing_version) {
LOG_INFO("mow publish submit missing rowset clone
task.")
.tag("tablet_id", tablet->tablet_id())
- .tag("version", version.first - 1)
+ .tag("version", version.second)
.tag("replica_id", tablet->replica_id())
.tag("partition_id",
tablet->partition_id())
.tag("table_id", tablet->table_id());
- Status st =
_engine.submit_clone_task(tablet.get(), version.first - 1);
+ Status st =
_engine.submit_clone_task(tablet.get(), version.second);
if (!st) {
LOG_WARNING(
"mow publish failed to submit missing
rowset clone task.")
.tag("st", st.msg())
.tag("tablet_id", tablet->tablet_id())
- .tag("version", version.first - 1)
+ .tag("version", version.second)
.tag("replica_id",
tablet->replica_id())
.tag("partition_id",
tablet->partition_id())
.tag("table_id", tablet->table_id());
diff --git
a/regression-test/suites/fault_injection_p0/test_compaction_clone_missing_rowset_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_compaction_clone_missing_rowset_fault_injection.groovy
index a7f060a1108..f15de709b3d 100644
---
a/regression-test/suites/fault_injection_p0/test_compaction_clone_missing_rowset_fault_injection.groovy
+++
b/regression-test/suites/fault_injection_p0/test_compaction_clone_missing_rowset_fault_injection.groovy
@@ -39,7 +39,7 @@ suite('test_compaction_clone_missing_rowset_fault_injection',
'docker') {
assertNotNull(normalBe)
try {
- def tableName = "test_compaction_clone_missing_rowset"
+ def tableName =
"test_compaction_clone_missing_rowset_fault_injection"
sql """ DROP TABLE IF EXISTS ${tableName} force"""
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
diff --git
a/regression-test/suites/fault_injection_p0/test_compaction_clone_missing_rowset_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_compaction_full_clone_missing_rowset_fault_injection.groovy
similarity index 66%
copy from
regression-test/suites/fault_injection_p0/test_compaction_clone_missing_rowset_fault_injection.groovy
copy to
regression-test/suites/fault_injection_p0/test_compaction_full_clone_missing_rowset_fault_injection.groovy
index a7f060a1108..1e8bdd4f373 100644
---
a/regression-test/suites/fault_injection_p0/test_compaction_clone_missing_rowset_fault_injection.groovy
+++
b/regression-test/suites/fault_injection_p0/test_compaction_full_clone_missing_rowset_fault_injection.groovy
@@ -20,26 +20,32 @@ import org.apache.http.NoHttpResponseException
import org.apache.doris.regression.util.DebugPoint
import org.apache.doris.regression.util.NodeType
-suite('test_compaction_clone_missing_rowset_fault_injection', 'docker') {
+suite('test_compaction_full_clone_missing_rowset_fault_injection', 'docker') {
def options = new ClusterOptions()
options.cloudMode = false
options.enableDebugPoints()
options.feConfigs += [ "disable_tablet_scheduler=true" ]
options.beConfigs += [
"enable_auto_clone_on_compaction_missing_version=true" ]
+ options.beConfigs += [ "tablet_rowset_stale_sweep_time_sec=0" ]
+ options.beConfigs += [ "tablet_rowset_stale_sweep_by_size=true" ]
+ options.beConfigs += [ "tablet_rowset_stale_sweep_threshold_size=0" ]
options.beNum = 3
docker(options) {
def injectBe = null
- def normalBe = null
+ def normalBe1 = null
+ def normalBe2 = null
def backends = sql_return_maparray('show backends')
injectBe = backends[0]
assertNotNull(injectBe)
- normalBe = backends[1]
- assertNotNull(normalBe)
+ normalBe1 = backends[1]
+ assertNotNull(normalBe1)
+ normalBe2 = backends[2]
+ assertNotNull(normalBe2)
try {
- def tableName = "test_compaction_clone_missing_rowset"
+ def tableName =
"test_compaction_full_clone_missing_rowset_fault_injection"
sql """ DROP TABLE IF EXISTS ${tableName} force"""
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
@@ -60,48 +66,55 @@
suite('test_compaction_clone_missing_rowset_fault_injection', 'docker') {
sql """ INSERT INTO ${tableName} VALUES (4,0)"""
DebugPoint.disableDebugPoint(injectBe.Host,
injectBe.HttpPort.toInteger(), NodeType.BE,
"EnginePublishVersionTask.finish.random")
sql """ INSERT INTO ${tableName} VALUES (5,0)"""
-
+ sql """ INSERT INTO ${tableName} VALUES (6,0)"""
+ sql """ INSERT INTO ${tableName} VALUES (7,0)"""
+ sql """ INSERT INTO ${tableName} VALUES (8,0)"""
+ sql """ INSERT INTO ${tableName} VALUES (9,0)"""
+
def array = sql_return_maparray("SHOW TABLETS FROM ${tableName}")
def tabletId = array[0].TabletId
+ // normal BEs compaction
+ logger.info("normal BE run cumu compaction:" + tabletId)
+ def (code, out, err) =
be_run_cumulative_compaction(normalBe1.Host, normalBe1.HttpPort, tabletId)
+ logger.info("normal BE1 Run cumu compaction: code=" + code + ",
out=" + out + ", err=" + err)
+ (code, out, err) = be_run_cumulative_compaction(normalBe2.Host,
normalBe2.HttpPort, tabletId)
+ logger.info("normal BE2 Run cumu compaction: code=" + code + ",
out=" + out + ", err=" + err)
+
+ logger.info("normal BE show:" + tabletId)
+ (code, out, err) = be_show_tablet_status(normalBe1.Host,
normalBe1.HttpPort, tabletId)
+ logger.info("normal BE1 show: code=" + code + ", out=" + out + ",
err=" + err)
+ (code, out, err) = be_show_tablet_status(normalBe2.Host,
normalBe2.HttpPort, tabletId)
+ logger.info("normal BE2 show: code=" + code + ", out=" + out + ",
err=" + err)
+
+ sleep(10000)
+
// 1st check rowsets
logger.info("1st show:" + tabletId)
- def (code, out, err) = be_show_tablet_status(injectBe.Host,
injectBe.HttpPort, tabletId)
+ (code, out, err) = be_show_tablet_status(injectBe.Host,
injectBe.HttpPort, tabletId)
logger.info("1st show: code=" + code + ", out=" + out + ", err=" +
err)
assertTrue(out.contains("[0-1]"))
assertTrue(out.contains("[2-2]"))
// missing rowset [3-5]
assertTrue(out.contains("[3-5]"))
assertTrue(out.contains("[6-6]"))
+ assertTrue(out.contains("[7-7]"))
+ assertTrue(out.contains("[8-8]"))
+ assertTrue(out.contains("[9-9]"))
+ assertTrue(out.contains("[10-10]"))
logger.info("1st run cumu compaction:" + tabletId)
(code, out, err) = be_run_cumulative_compaction(injectBe.Host,
injectBe.HttpPort, tabletId)
logger.info("1st Run cumu compaction: code=" + code + ", out=" +
out + ", err=" + err)
- sleep(10000)
+ sleep(30000)
// 2nd check rowsets
logger.info("2nd show:" + tabletId)
(code, out, err) = be_show_tablet_status(injectBe.Host,
injectBe.HttpPort, tabletId)
logger.info("2nd show: code=" + code + ", out=" + out + ", err=" +
err)
assertTrue(out.contains("[0-1]"))
- assertTrue(out.contains("[2-2]"))
- assertTrue(out.contains("[3-3]"))
- assertTrue(out.contains("[4-4]"))
- assertTrue(out.contains("[5-5]"))
- assertTrue(out.contains("[6-6]"))
-
- logger.info("2nd cumu compaction:" + tabletId)
- (code, out, err) = be_run_cumulative_compaction(injectBe.Host,
injectBe.HttpPort, tabletId)
- logger.info("2nd cumu compaction: code=" + code + ", out=" + out +
", err=" + err)
-
- // check rowsets
- logger.info("3rd show:" + tabletId)
- (code, out, err) = be_show_tablet_status(injectBe.Host,
injectBe.HttpPort, tabletId)
- logger.info("3rd show: code=" + code + ", out=" + out + ", err=" +
err)
- assertTrue(out.contains("[0-1]"))
- assertTrue(out.contains("[2-2]"))
- assertTrue(out.contains("[3-6]"))
+ assertTrue(out.contains("[2-10]"))
} finally {
if (injectBe != null) {
diff --git
a/regression-test/suites/fault_injection_p0/test_mow_publish_clone_missing_rowset.groovy
b/regression-test/suites/fault_injection_p0/test_mow_publish_clone_missing_rowset_fault_injection.groovy
similarity index 100%
copy from
regression-test/suites/fault_injection_p0/test_mow_publish_clone_missing_rowset.groovy
copy to
regression-test/suites/fault_injection_p0/test_mow_publish_clone_missing_rowset_fault_injection.groovy
diff --git
a/regression-test/suites/fault_injection_p0/test_mow_publish_clone_missing_rowset.groovy
b/regression-test/suites/fault_injection_p0/test_mow_publish_full_clone_missing_rowset_fault_injection.groovy
similarity index 67%
rename from
regression-test/suites/fault_injection_p0/test_mow_publish_clone_missing_rowset.groovy
rename to
regression-test/suites/fault_injection_p0/test_mow_publish_full_clone_missing_rowset_fault_injection.groovy
index 14f0073f5c8..e582d3339bf 100644
---
a/regression-test/suites/fault_injection_p0/test_mow_publish_clone_missing_rowset.groovy
+++
b/regression-test/suites/fault_injection_p0/test_mow_publish_full_clone_missing_rowset_fault_injection.groovy
@@ -20,7 +20,7 @@ import org.apache.http.NoHttpResponseException
import org.apache.doris.regression.util.DebugPoint
import org.apache.doris.regression.util.NodeType
-suite('test_mow_publish_clone_missing_rowset_fault_injection', 'docker') {
+suite('test_mow_publish_full_clone_missing_rowset_fault_injection', 'docker') {
def set_be_param = { paramName, paramValue, beIp, bePort ->
def (code, out, err) = curl("POST",
String.format("http://%s:%s/api/update_config?%s=%s", beIp, bePort, paramName,
paramValue))
@@ -31,21 +31,27 @@
suite('test_mow_publish_clone_missing_rowset_fault_injection', 'docker') {
options.cloudMode = false
options.enableDebugPoints()
options.feConfigs += [ "disable_tablet_scheduler=true" ]
- options.beConfigs += [
"enable_auto_clone_on_mow_publish_missing_version=false" ]
+ options.beConfigs += [
"enable_auto_clone_on_compaction_missing_version=true" ]
+ options.beConfigs += [ "tablet_rowset_stale_sweep_time_sec=0" ]
+ options.beConfigs += [ "tablet_rowset_stale_sweep_by_size=true" ]
+ options.beConfigs += [ "tablet_rowset_stale_sweep_threshold_size=0" ]
options.beNum = 3
docker(options) {
def injectBe = null
- def normalBe = null
+ def normalBe1 = null
+ def normalBe2 = null
def backends = sql_return_maparray('show backends')
injectBe = backends[0]
assertNotNull(injectBe)
- normalBe = backends[1]
- assertNotNull(normalBe)
+ normalBe1 = backends[1]
+ assertNotNull(normalBe1)
+ normalBe2 = backends[2]
+ assertNotNull(normalBe2)
try {
- def tableName = "test_mow_publish_clone_missing_rowset"
+ def tableName =
"test_mow_publish_full_clone_missing_rowset_fault_injection"
sql """ DROP TABLE IF EXISTS ${tableName} force"""
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
@@ -66,20 +72,26 @@
suite('test_mow_publish_clone_missing_rowset_fault_injection', 'docker') {
sql """ INSERT INTO ${tableName} VALUES (3,0)"""
sql """ INSERT INTO ${tableName} VALUES (4,0)"""
sql """ INSERT INTO ${tableName} VALUES (5,0)"""
+ sql """ INSERT INTO ${tableName} VALUES (6,0)"""
+ sql """ INSERT INTO ${tableName} VALUES (7,0)"""
def array = sql_return_maparray("SHOW TABLETS FROM ${tableName}")
def tabletId = array[0].TabletId
- // normal be check rowsets
- logger.info("normal be show:" + tabletId)
- def (code, out, err) = be_show_tablet_status(normalBe.Host,
normalBe.HttpPort, tabletId)
- logger.info("normal be show: code=" + code + ", out=" + out + ",
err=" + err)
- assertTrue(out.contains("[0-1]"))
- assertTrue(out.contains("[2-2]"))
- assertTrue(out.contains("[3-3]"))
- assertTrue(out.contains("[4-4]"))
- assertTrue(out.contains("[5-5]"))
- assertTrue(out.contains("[6-6]"))
+ // normal BEs compaction
+ logger.info("normal BE run cumu compaction:" + tabletId)
+ def (code, out, err) =
be_run_cumulative_compaction(normalBe1.Host, normalBe1.HttpPort, tabletId)
+ logger.info("normal BE1 Run cumu compaction: code=" + code + ",
out=" + out + ", err=" + err)
+ (code, out, err) = be_run_cumulative_compaction(normalBe2.Host,
normalBe2.HttpPort, tabletId)
+ logger.info("normal BE2 Run cumu compaction: code=" + code + ",
out=" + out + ", err=" + err)
+
+ logger.info("normal BE show:" + tabletId)
+ (code, out, err) = be_show_tablet_status(normalBe1.Host,
normalBe1.HttpPort, tabletId)
+ logger.info("normal BE1 show: code=" + code + ", out=" + out + ",
err=" + err)
+ (code, out, err) = be_show_tablet_status(normalBe2.Host,
normalBe2.HttpPort, tabletId)
+ logger.info("normal BE2 show: code=" + code + ", out=" + out + ",
err=" + err)
+
+ sleep(10000)
// 1st inject be check rowsets
logger.info("1st inject be show:" + tabletId)
@@ -91,40 +103,33 @@
suite('test_mow_publish_clone_missing_rowset_fault_injection', 'docker') {
assertFalse(out.contains("[4-4]"))
assertFalse(out.contains("[5-5]"))
assertFalse(out.contains("[6-6]"))
+ assertFalse(out.contains("[7-7]"))
set_be_param("enable_auto_clone_on_mow_publish_missing_version",
"true", injectBe.Host, injectBe.HttpPort);
Thread.sleep(10000)
// submit clone task
- sql """ INSERT INTO ${tableName} VALUES (6,0)"""
+ sql """ INSERT INTO ${tableName} VALUES (8,0)"""
- sleep(10000)
+ sleep(30000)
// 2nd inject be check rowsets
logger.info("2nd inject be show:" + tabletId)
(code, out, err) = be_show_tablet_status(injectBe.Host,
injectBe.HttpPort, tabletId)
logger.info("2nd inject be show: code=" + code + ", out=" + out +
", err=" + err)
assertTrue(out.contains("[0-1]"))
- assertTrue(out.contains("[2-2]"))
- assertTrue(out.contains("[3-3]"))
- assertTrue(out.contains("[4-4]"))
- assertTrue(out.contains("[5-5]"))
- assertTrue(out.contains("[6-6]"))
- assertTrue(out.contains("[7-7]"))
+ assertTrue(out.contains("[2-8]"))
+ assertTrue(out.contains("[9-9]"))
- sql """ INSERT INTO ${tableName} VALUES (7,0)"""
+ // inject be compaction
+ logger.info("run cumu compaction:" + tabletId)
+ (code, out, err) = be_run_cumulative_compaction(injectBe.Host,
injectBe.HttpPort, tabletId)
+ logger.info("Run cumu compaction: code=" + code + ", out=" + out +
", err=" + err)
- // 3rd inject be check rowsets
logger.info("3rd inject be show:" + tabletId)
(code, out, err) = be_show_tablet_status(injectBe.Host,
injectBe.HttpPort, tabletId)
logger.info("3rd inject be show: code=" + code + ", out=" + out +
", err=" + err)
assertTrue(out.contains("[0-1]"))
- assertTrue(out.contains("[2-2]"))
- assertTrue(out.contains("[3-3]"))
- assertTrue(out.contains("[4-4]"))
- assertTrue(out.contains("[5-5]"))
- assertTrue(out.contains("[6-6]"))
- assertTrue(out.contains("[7-7]"))
- assertTrue(out.contains("[8-8]"))
+ assertTrue(out.contains("[2-8]"))
} finally {
if (injectBe != null) {
DebugPoint.disableDebugPoint(injectBe.Host,
injectBe.HttpPort.toInteger(), NodeType.BE,
"EnginePublishVersionTask.finish.random")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]