This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 824c357cd94 [fix] let backup work on azure (#46411)
824c357cd94 is described below

commit 824c357cd94d93462ecad4642c8f33aac2311934
Author: Yongqiang YANG <yangyongqi...@selectdb.com>
AuthorDate: Mon Jan 6 13:50:20 2025 +0800

    [fix] let backup work on azure (#46411)
    
    1. use https for azure
    2. region is unnecessary for gcp.
    3. fe pass provider to be
    4. be listblob endless loop
    5. fe globlist use hierarchy api
    6. azure type should be s3 in fe
    7. add azure file system adaptor in gson
---
 be/src/io/fs/azure_obj_storage_client.cpp          |  2 +-
 be/src/util/s3_util.cpp                            |  4 +-
 .../org/apache/doris/analysis/StorageBackend.java  |  1 +
 .../datasource/property/S3ClientBEProperties.java  |  3 ++
 .../org/apache/doris/fs/obj/AzureObjStorage.java   | 46 ++++++++++++++++------
 .../java/org/apache/doris/fs/obj/S3ObjStorage.java |  5 +++
 .../apache/doris/fs/remote/AzureFileSystem.java    |  5 +--
 .../org/apache/doris/persist/gson/GsonUtils.java   |  4 +-
 .../datasource/property/PropertyConverterTest.java |  2 +-
 .../apache/doris/fs/obj/AzureObjStorageTest.java   | 27 ++++++++-----
 10 files changed, 71 insertions(+), 28 deletions(-)

diff --git a/be/src/io/fs/azure_obj_storage_client.cpp 
b/be/src/io/fs/azure_obj_storage_client.cpp
index 9f33db3400a..44d45077ebc 100644
--- a/be/src/io/fs/azure_obj_storage_client.cpp
+++ b/be/src/io/fs/azure_obj_storage_client.cpp
@@ -311,7 +311,7 @@ ObjectStorageResponse 
AzureObjStorageClient::list_objects(const ObjectStoragePat
                     return _client->ListBlobs(list_opts);
                 });
                 get_file_file(resp);
-                while (!resp.NextPageToken->empty()) {
+                while (resp.NextPageToken.HasValue()) {
                     list_opts.ContinuationToken = resp.NextPageToken;
                     resp = s3_get_rate_limit([&]() {
                         SCOPED_BVAR_LATENCY(s3_bvar::s3_list_latency);
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index f87389b6b3e..dece1074e60 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -254,8 +254,8 @@ std::shared_ptr<io::ObjStorageClient> 
S3ClientFactory::_create_azure_client(
             
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(s3_conf.ak, 
s3_conf.sk);
 
     const std::string container_name = s3_conf.bucket;
-    const std::string uri = fmt::format("{}://{}.blob.core.windows.net/{}",
-                                        config::s3_client_http_scheme, 
s3_conf.ak, container_name);
+    const std::string uri =
+            fmt::format("{}://{}.blob.core.windows.net/{}", "https", 
s3_conf.ak, container_name);
 
     auto containerClient = 
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(uri, cred);
     LOG_INFO("create one azure client with {}", s3_conf.to_string());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
index b07725d2507..67a76cec450 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
@@ -178,6 +178,7 @@ public class StorageBackend implements ParseNode {
                     return TStorageBackendType.JFS;
                 case LOCAL:
                     return TStorageBackendType.LOCAL;
+                // deprecated
                 case AZURE:
                     return TStorageBackendType.AZURE;
                 default:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
index 7d8c2668fea..093b74b80ae 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
@@ -85,6 +85,9 @@ public class S3ClientBEProperties {
         if (properties.containsKey(PropertyConverter.USE_PATH_STYLE)) {
             beProperties.put(PropertyConverter.USE_PATH_STYLE, 
properties.get(PropertyConverter.USE_PATH_STYLE));
         }
+        if (properties.containsKey(S3Properties.PROVIDER)) {
+            beProperties.put(S3Properties.PROVIDER, 
properties.get(S3Properties.PROVIDER));
+        }
         return beProperties;
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 780d2ab9fa3..0253993cc42 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -36,6 +36,7 @@ import com.azure.storage.blob.BlobServiceClientBuilder;
 import com.azure.storage.blob.batch.BlobBatch;
 import com.azure.storage.blob.batch.BlobBatchClient;
 import com.azure.storage.blob.batch.BlobBatchClientBuilder;
+import com.azure.storage.blob.models.BlobErrorCode;
 import com.azure.storage.blob.models.BlobItem;
 import com.azure.storage.blob.models.BlobProperties;
 import com.azure.storage.blob.models.BlobStorageException;
@@ -53,6 +54,7 @@ import java.nio.file.FileSystems;
 import java.nio.file.PathMatcher;
 import java.nio.file.Paths;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
@@ -196,6 +198,9 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
             LOG.info("delete file " + remotePath + " success");
             return Status.OK;
         } catch (BlobStorageException e) {
+            if (e.getErrorCode() == BlobErrorCode.BLOB_NOT_FOUND) {
+                return Status.OK;
+            }
             return new Status(
                     Status.ErrCode.COMMON_ERROR,
                     "get file from azure error: " + e.getServiceMessage());
@@ -331,6 +336,7 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
             LOG.info("path pattern {}", pathPattern.toString());
             PathMatcher matcher = 
FileSystems.getDefault().getPathMatcher("glob:" + pathPattern.toString());
 
+            HashSet<String> directorySet = new HashSet<>();
             String listPrefix = getLongestPrefix(globPath);
             LOG.info("azure glob list prefix is {}", listPrefix);
             ListBlobsOptions options = new 
ListBlobsOptions().setPrefix(listPrefix);
@@ -343,18 +349,36 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
                     elementCnt++;
                     java.nio.file.Path blobPath = 
Paths.get(blobItem.getName());
 
-                    if (!matcher.matches(blobPath)) {
-                        continue;
+                    boolean isPrefix = false;
+                    while 
(blobPath.normalize().toString().startsWith(listPrefix)) {
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("get blob {}", 
blobPath.normalize().toString());
+                        }
+                        if (!matcher.matches(blobPath)) {
+                            isPrefix = true;
+                            blobPath = blobPath.getParent();
+                            continue;
+                        }
+                        if 
(directorySet.contains(blobPath.normalize().toString())) {
+                            break;
+                        }
+                        if (isPrefix) {
+                            directorySet.add(blobPath.normalize().toString());
+                        }
+
+                        matchCnt++;
+                        RemoteFile remoteFile = new RemoteFile(
+                                fileNameOnly ? 
blobPath.getFileName().toString() : constructS3Path(blobPath.toString(),
+                                        uri.getBucket()),
+                                !isPrefix,
+                                isPrefix ? -1 : 
blobItem.getProperties().getContentLength(),
+                                isPrefix ? -1 : 
blobItem.getProperties().getContentLength(),
+                                isPrefix ? 0 : 
blobItem.getProperties().getLastModified().getSecond());
+                        result.add(remoteFile);
+
+                        blobPath = blobPath.getParent();
+                        isPrefix = true;
                     }
-                    matchCnt++;
-                    RemoteFile remoteFile = new RemoteFile(
-                            fileNameOnly ? blobPath.getFileName().toString() : 
constructS3Path(blobPath.toString(),
-                                    uri.getBucket()),
-                            !blobItem.isPrefix(),
-                            blobItem.isPrefix() ? -1 : 
blobItem.getProperties().getContentLength(),
-                            blobItem.getProperties().getContentLength(),
-                            
blobItem.getProperties().getLastModified().getSecond());
-                    result.add(remoteFile);
                 }
                 newContinuationToken = pagedResponse.getContinuationToken();
             } while (newContinuationToken != null);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 69a8ac4d794..edcb54bf8fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -102,6 +102,11 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
                 .equalsIgnoreCase("true");
         forceParsingByStandardUri = 
this.properties.getOrDefault(PropertyConverter.FORCE_PARSING_BY_STANDARD_URI,
                 "false").equalsIgnoreCase("true");
+
+        String endpoint = this.properties.get(S3Properties.ENDPOINT);
+        String region = this.properties.get(S3Properties.REGION);
+
+        this.properties.put(S3Properties.REGION, 
PropertyConverter.checkRegion(endpoint, region, S3Properties.REGION));
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
index 5004cfd2f12..c116182d3a4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/AzureFileSystem.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.fs.remote;
 
-import org.apache.doris.analysis.StorageBackend;
 import org.apache.doris.analysis.StorageBackend.StorageType;
 import org.apache.doris.backup.Status;
 import org.apache.doris.common.UserException;
@@ -35,13 +34,13 @@ public class AzureFileSystem extends ObjFileSystem {
     private static final Logger LOG = 
LogManager.getLogger(AzureFileSystem.class);
 
     public AzureFileSystem(Map<String, String> properties) {
-        super(StorageType.AZURE.name(), StorageType.AZURE, new 
AzureObjStorage(properties));
+        super(StorageType.AZURE.name(), StorageType.S3, new 
AzureObjStorage(properties));
         initFsProperties();
     }
 
     @VisibleForTesting
     public AzureFileSystem(AzureObjStorage storage) {
-        super(StorageBackend.StorageType.AZURE.name(), 
StorageBackend.StorageType.AZURE, storage);
+        super(StorageType.AZURE.name(), StorageType.S3, storage);
         initFsProperties();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java 
b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
index 6132bbe64eb..8a5516b7616 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
@@ -172,6 +172,7 @@ import 
org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalCatalog;
 import 
org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalDatabase;
 import org.apache.doris.datasource.trinoconnector.TrinoConnectorExternalTable;
 import org.apache.doris.fs.PersistentFileSystem;
+import org.apache.doris.fs.remote.AzureFileSystem;
 import org.apache.doris.fs.remote.BrokerFileSystem;
 import org.apache.doris.fs.remote.ObjFileSystem;
 import org.apache.doris.fs.remote.S3FileSystem;
@@ -570,7 +571,8 @@ public class GsonUtils {
             .registerSubtype(JFSFileSystem.class, 
JFSFileSystem.class.getSimpleName())
             .registerSubtype(OFSFileSystem.class, 
OFSFileSystem.class.getSimpleName())
             .registerSubtype(ObjFileSystem.class, 
ObjFileSystem.class.getSimpleName())
-            .registerSubtype(S3FileSystem.class, 
S3FileSystem.class.getSimpleName());
+            .registerSubtype(S3FileSystem.class, 
S3FileSystem.class.getSimpleName())
+            .registerSubtype(AzureFileSystem.class, 
AzureFileSystem.class.getSimpleName());
 
     private static 
RuntimeTypeAdapterFactory<org.apache.doris.backup.AbstractJob>
             jobBackupTypeAdapterFactory
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
index 8967ca5fae0..eacd0bacbb3 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java
@@ -195,7 +195,7 @@ public class PropertyConverterTest extends 
TestWithFeService {
         CreateRepositoryStmt analyzedStmtNew = createStmt(s3RepoNew);
         Assertions.assertEquals(analyzedStmtNew.getProperties().size(), 3);
         Repository repositoryNew = getRepository(analyzedStmtNew, 
"s3_repo_new");
-        
Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(),
 4);
+        
Assertions.assertEquals(repositoryNew.getRemoteFileSystem().getProperties().size(),
 5);
     }
 
     private static Repository getRepository(CreateRepositoryStmt analyzedStmt, 
String name) throws DdlException {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
index 41f49b7eab1..f8869db9cf2 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/AzureObjStorageTest.java
@@ -42,6 +42,7 @@ import java.nio.file.Paths;
 import java.time.OffsetDateTime;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -58,19 +59,20 @@ public class AzureObjStorageTest {
     }
 
     public static List<I> genInputs() {
+        // refer genObjectKeys
         List<I> inputs = new ArrayList<I>();
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/*/tmp*", 8196L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp*", 4098L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/*tmp*", 4098L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/**/tmp*", 20490L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/**/tmp*", 32784L));
-        inputs.add(new I("s3://gavin-test-jp/azure-test/*", 0L)); // no files 
at 1st level
+        inputs.add(new I("s3://gavin-test-jp/azure-test/*", 3L)); // no files 
at 1st level
         inputs.add(new I("s3://gavin-test-jp/azure-test/2/*", 4098L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/2*/*", 4098L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/2/*I*", 591L));
-        inputs.add(new I("s3://gavin-test-jp/azure-test/1", 0L));
-        inputs.add(new I("s3://gavin-test-jp/azure-test/2", 0L));
-        inputs.add(new I("s3://gavin-test-jp/azure-test/3", 0L));
+        inputs.add(new I("s3://gavin-test-jp/azure-test/1", 1L));
+        inputs.add(new I("s3://gavin-test-jp/azure-test/2", 1L));
+        inputs.add(new I("s3://gavin-test-jp/azure-test/3", 1L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp.k*", 61L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/1/tmp.[a-z]*", 1722L));
         inputs.add(new I("s3://gavin-test-jp/azure-test/[12]/tmp.[a-z]*", 
3444L));
@@ -120,13 +122,13 @@ public class AzureObjStorageTest {
             boolean fileNameOnly = false;
             // FIXME(gavin): Mock the result returned from azure blob to make 
this UT work when no aksk and network
             Status st = azs.globList(i.pattern, result, fileNameOnly);
+            System.out.println("testGlobListWithMockedAzureStorage pattern: " 
+ i.pattern + " matched " + result.size());
             Assertions.assertTrue(st.ok());
             Assertions.assertEquals(i.expectedMatchSize, result.size());
             for (int j = 0; j < result.size() && j < 10; ++j) {
                 System.out.println(result.get(j).getName());
             }
-            System.out.println("pattern: " + i.pattern + " matched " + 
result.size());
-            System.out.println("====================");
+
         });
     }
 
@@ -136,13 +138,20 @@ public class AzureObjStorageTest {
             String pattern = i.pattern.substring(19); // remove prefix 
s3://gavin-test-jp/
             PathMatcher matcher = 
FileSystems.getDefault().getPathMatcher("glob:" + pattern);
             List<String> matchedPaths = new ArrayList<>();
+            HashSet<String> directories = new HashSet<>();
             for (String p : genObjectKeys()) {
                 java.nio.file.Path blobPath = Paths.get(p);
-                if (!matcher.matches(blobPath)) {
-                    continue;
+
+                while (blobPath != null) {
+                    if (matcher.matches(blobPath) && 
!directories.contains(blobPath.toString())) {
+                        matchedPaths.add(blobPath.toString());
+                        directories.add(blobPath.toString());
+                    }
+                    blobPath = blobPath.getParent();
                 }
-                matchedPaths.add(p);
             }
+            System.out.println("pattern: " + i.pattern + " matched " + 
matchedPaths.size());
+            System.out.println("====================");
             Assertions.assertEquals(i.expectedMatchSize, matchedPaths.size());
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to