This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d5575e05e1 [fix](cold hot separation) Fix the issue that files on the 
remote storage are not deleted after triggering cold data compaction. (#48109)
3d5575e05e1 is described below

commit 3d5575e05e11911683653df716ef93d9585fcc9c
Author: yagagagaga <zhangminkefromflyd...@gmail.com>
AuthorDate: Thu Feb 27 21:10:18 2025 +0800

    [fix](cold hot separation) Fix the issue that files on the remote storage 
are not deleted after triggering cold data compaction. (#48109)
---
 be/src/olap/olap_server.cpp                        |   2 +-
 .../org/apache/doris/regression/suite/Suite.groovy |  18 +++
 regression-test/pipeline/p0/conf/be.conf           |   4 +
 .../cold_data_compaction.groovy                    | 132 +++++++++++++++++++++
 4 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 19eea915723..0ab3c694292 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -1285,7 +1285,7 @@ void StorageEngine::do_remove_unused_remote_files() {
             }
             cooldown_meta_id = t->tablet_meta()->cooldown_meta_id();
         }
-        auto [cooldown_replica_id, cooldown_term] = t->cooldown_conf();
+        auto [cooldown_term, cooldown_replica_id] = t->cooldown_conf();
         if (cooldown_replica_id != t->replica_id()) {
             return;
         }
diff --git 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
index 957a13efe5e..f567cd57d8f 100644
--- 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
+++ 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
@@ -17,6 +17,12 @@
 
 package org.apache.doris.regression.suite
 
+import com.amazonaws.auth.AWSStaticCredentialsProvider
+import com.amazonaws.auth.BasicAWSCredentials
+import com.amazonaws.client.builder.AwsClientBuilder
+import com.amazonaws.services.s3.AmazonS3
+import com.amazonaws.services.s3.AmazonS3ClientBuilder
+
 import static java.util.concurrent.TimeUnit.SECONDS
 
 import com.google.common.base.Strings
@@ -97,6 +103,8 @@ class Suite implements GroovyInterceptable {
     final List<Future> lazyCheckFutures = new Vector<>()
     static Boolean isTrinoConnectorDownloaded = false
 
+    private AmazonS3 s3Client = null
+
     Suite(String name, String group, SuiteContext context, SuiteCluster 
cluster) {
         this.name = name
         this.group = group
@@ -989,6 +997,16 @@ class Suite implements GroovyInterceptable {
         return s3Url
     }
 
+    synchronized AmazonS3 getS3Client() {
+        if (s3Client == null) {
+            def credentials = new BasicAWSCredentials(getS3AK(), getS3SK())
+            def endpointConfiguration = new 
AwsClientBuilder.EndpointConfiguration(getS3Endpoint(), getS3Region())
+            s3Client = 
AmazonS3ClientBuilder.standard().withEndpointConfiguration(endpointConfiguration)
+                    .withCredentials(new 
AWSStaticCredentialsProvider(credentials)).build()
+        }
+        return s3Client
+    }
+
     String getJdbcPassword() {
         String sk = context.config.otherConfigs.get("jdbcPassword");
         return sk
diff --git a/regression-test/pipeline/p0/conf/be.conf 
b/regression-test/pipeline/p0/conf/be.conf
index 8b240d9f069..958f901db59 100644
--- a/regression-test/pipeline/p0/conf/be.conf
+++ b/regression-test/pipeline/p0/conf/be.conf
@@ -78,5 +78,9 @@ enable_write_index_searcher_cache=true
 # enable download small files in batch, see apache/doris#45061 for details
 enable_batch_download = true
 
+remove_unused_remote_files_interval_sec=60
+cold_data_compaction_interval_sec=60
+
 # So feature has bug, so by default is false, only open it in pipeline to 
observe
 enable_parquet_page_index=true
+
diff --git 
a/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy 
b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy
new file mode 100644
index 00000000000..bf9e33e7528
--- /dev/null
+++ b/regression-test/suites/cold_heat_separation/cold_data_compaction.groovy
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import com.amazonaws.services.s3.model.ListObjectsRequest
+import java.util.function.Supplier
+
+suite("test_cold_data_compaction") {
+    def retryUntilTimeout = { int timeoutSecond, Supplier<Boolean> closure ->
+        long start = System.currentTimeMillis()
+        while (true) {
+            if (closure.get()) {
+                return
+            } else {
+                if (System.currentTimeMillis() - start > timeoutSecond * 1000) 
{
+                    throw new RuntimeException("" +
+                            "Operation timeout, maybe you need to check " +
+                            "remove_unused_remote_files_interval_sec and " +
+                            "cold_data_compaction_interval_sec in be.conf")
+                } else {
+                    sleep(10_000)
+                }
+            }
+        }
+    }
+
+    String suffix = UUID.randomUUID().hashCode().abs().toString()
+    String s3Prefix = "regression/cold_data_compaction"
+    multi_sql """
+            DROP TABLE IF EXISTS t_recycle_in_s3;
+            DROP STORAGE POLICY IF EXISTS test_policy_${suffix};
+            DROP RESOURCE IF EXISTS 'remote_s3_${suffix}';
+
+            CREATE RESOURCE "remote_s3_${suffix}"
+            PROPERTIES
+            (
+                "type" = "s3",
+                "s3.endpoint" = "${getS3Endpoint()}",
+                "s3.region" = "${getS3Region()}",
+                "s3.bucket" = "${getS3BucketName()}",
+                "s3.root.path" = "${s3Prefix}",
+                "s3.access_key" = "${getS3AK()}",
+                "s3.secret_key" = "${getS3SK()}",
+                "s3.connection.maximum" = "50",
+                "s3.connection.request.timeout" = "3000",
+                "s3.connection.timeout" = "1000"
+            );
+            CREATE STORAGE POLICY test_policy_${suffix}
+            PROPERTIES(
+                "storage_resource" = "remote_s3_${suffix}",
+                "cooldown_ttl" = "5"
+            );
+            CREATE TABLE IF NOT EXISTS t_recycle_in_s3
+            (
+                k1 BIGINT,
+                k2 LARGEINT,
+                v1 VARCHAR(2048)
+            )
+            DISTRIBUTED BY HASH (k1) BUCKETS 1
+            PROPERTIES(
+                "storage_policy" = "test_policy_${suffix}",
+                "disable_auto_compaction" = "true",
+                "replication_num" = "1"
+            );
+        """
+
+    // insert 5 RowSets
+    multi_sql """
+        insert into t_recycle_in_s3 values(1, 1, 'Tom');
+        insert into t_recycle_in_s3 values(2, 2, 'Jelly');
+        insert into t_recycle_in_s3 values(3, 3, 'Spike');
+        insert into t_recycle_in_s3 values(4, 4, 'Tyke');
+        insert into t_recycle_in_s3 values(5, 5, 'Tuffy');
+    """
+
+    // wait until files upload to S3
+    retryUntilTimeout(1800, {
+        def res = sql_return_maparray "show data from t_recycle_in_s3"
+        String size = ""
+        String remoteSize = ""
+        for (final def line in res) {
+            if ("t_recycle_in_s3".equals(line.TableName)) {
+                size = line.Size
+                remoteSize = line.RemoteSize
+                break
+            }
+        }
+        logger.info("waiting for data to be uploaded to S3: t_recycle_in_s3's 
local data size: ${size}, remote data size: ${remoteSize}")
+        return size.startsWith("0") && !remoteSize.startsWith("0")
+    })
+
+    String tabletId = sql_return_maparray("show tablets from 
t_recycle_in_s3")[0].TabletId
+    // check number of remote files
+    def filesBeforeCompaction = getS3Client().listObjects(
+            new 
ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix + 
"/data/${tabletId}")).getObjectSummaries()
+
+    // 5 RowSets + 1 meta
+    assertEquals(6, filesBeforeCompaction.size())
+
+    // trigger cold data compaction
+    sql """alter table t_recycle_in_s3 set ("disable_auto_compaction" = 
"false")"""
+
+    // wait until compaction finish
+    retryUntilTimeout(1800, {
+        def filesAfterCompaction = getS3Client().listObjects(
+                new 
ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ 
"/data/${tabletId}")).getObjectSummaries()
+        logger.info("t_recycle_in_s3's remote file number is 
${filesAfterCompaction.size()}")
+        // 1 RowSet + 1 meta
+        return filesAfterCompaction.size() == 2
+    })
+
+    sql "drop table t_recycle_in_s3 force"
+    retryUntilTimeout(1800, {
+        def filesAfterDrop = getS3Client().listObjects(
+                new 
ListObjectsRequest().withBucketName(getS3BucketName()).withPrefix(s3Prefix+ 
"/data/${tabletId}")).getObjectSummaries()
+        logger.info("after drop t_recycle_in_s3, remote file number is 
${filesAfterDrop.size()}")
+        return filesAfterDrop.size() == 0
+    })
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to