This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7d488688b4 [fix](multi-catalog)fix minio default region and throw minio error msg, support s3 bucket root path (#21994) 7d488688b4 is described below commit 7d488688b4c21ca1eb98b8afdf9144642b18bcdc Author: slothever <18522955+w...@users.noreply.github.com> AuthorDate: Thu Jul 20 20:48:55 2023 +0800 [fix](multi-catalog)fix minio default region and throw minio error msg, support s3 bucket root path (#21994) 1. check minio region, set default region if user region is not provided, and throw minio error msg 2. support read root path s3://bucket1 3. fix max compute public access --- be/src/util/s3_uri.cpp | 5 +++-- docs/en/docs/lakehouse/multi-catalog/hive.md | 1 + docs/en/docs/lakehouse/multi-catalog/iceberg.md | 1 + .../en/docs/sql-manual/sql-functions/table-functions/s3.md | 2 ++ .../Backup-and-Restore/CREATE-REPOSITORY.md | 1 + docs/zh-CN/docs/lakehouse/multi-catalog/hive.md | 1 + docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md | 1 + .../docs/sql-manual/sql-functions/table-functions/s3.md | 2 ++ .../org/apache/doris/maxcompute/MaxComputeTableScan.java | 12 +++++++----- .../apache/doris/datasource/MaxComputeExternalCatalog.java | 14 +++++++++----- .../doris/datasource/property/PropertyConverter.java | 5 +++-- .../doris/datasource/property/S3ClientBEProperties.java | 6 +++--- .../datasource/property/constants/MinioProperties.java | 1 + .../doris/datasource/property/constants/S3Properties.java | 2 +- .../main/java/org/apache/doris/fs/remote/S3FileSystem.java | 12 ++++++++++++ .../apache/doris/tablefunction/S3TableValuedFunction.java | 8 ++++++-- 16 files changed, 54 insertions(+), 20 deletions(-) diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp index c2e4a72098..3162b63f0e 100644 --- a/be/src/util/s3_uri.cpp +++ b/be/src/util/s3_uri.cpp @@ -50,11 +50,12 @@ Status S3URI::parse() { rest = scheme_split[1]; std::vector<std::string> authority_split = strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1)); - if (authority_split.size() != 2) { + if (authority_split.size() < 1) { return Status::InvalidArgument("Invalid S3 URI: {}", _location); } _bucket = authority_split[0]; - _key = authority_split[1]; + // support s3://bucket1 + _key = authority_split.size() == 1 ? "/" : authority_split[1]; } else if (scheme_split[0] == _SCHEME_HTTP || scheme_split[0] == _SCHEME_HTTPS) { // has scheme, eg: http(s)://host/bucket1/path/to/file.txt rest = scheme_split[1]; diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md b/docs/en/docs/lakehouse/multi-catalog/hive.md index 9daacbcd22..26c1b98042 100644 --- a/docs/en/docs/lakehouse/multi-catalog/hive.md +++ b/docs/en/docs/lakehouse/multi-catalog/hive.md @@ -119,6 +119,7 @@ CREATE CATALOG hive PROPERTIES ( "type"="hms", "hive.metastore.uris" = "thrift://172.0.0.1:9083", "s3.endpoint" = "s3.us-east-1.amazonaws.com", + "s3.region" = "us-east-1", "s3.access_key" = "ak", "s3.secret_key" = "sk" "use_path_style" = "true" diff --git a/docs/en/docs/lakehouse/multi-catalog/iceberg.md b/docs/en/docs/lakehouse/multi-catalog/iceberg.md index 18d66a5350..4509fbc4fa 100644 --- a/docs/en/docs/lakehouse/multi-catalog/iceberg.md +++ b/docs/en/docs/lakehouse/multi-catalog/iceberg.md @@ -126,6 +126,7 @@ If the data is stored on S3, the following parameters can be used in properties: "s3.access_key" = "ak" "s3.secret_key" = "sk" "s3.endpoint" = "http://endpoint-uri" +"s3.region" = "your-region" "s3.credentials.provider" = "provider-class-name" // 可选,默认凭证类基于BasicAWSCredentials实现。 ``` diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md index dcdd092bd2..e793daec79 100644 --- a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md +++ b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md @@ -40,6 +40,7 @@ s3( "uri" = "..", "s3.access_key" = "...", "s3.secret_key" = "...", + "s3.region" = "...", "format" = "csv", "keyn" = "valuen", ... @@ -55,6 +56,7 @@ Related parameters for accessing S3: - `uri`: (required) The S3 tvf will decide whether to use the path style access method according to the `use_path_style` parameter, and the default access method is the virtual-hosted style method. - `s3.access_key`: (required) - `s3.secret_key`: (required) +- `s3.region`: (optional). Mandatory if the Minio has set another region. Otherwise, `us-east-1` is used by default. - `s3.session_token`: (optional) - `use_path_style`: (optional) default `false` . The S3 SDK uses the virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style access method. diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md index 438bac678b..2308fd6b9c 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md @@ -102,6 +102,7 @@ ON LOCATION "s3://s3-repo" PROPERTIES ( "s3.endpoint" = "http://s3-REGION.amazonaws.com", + "s3.region" = "s3-REGION", "s3.access_key" = "AWS_ACCESS_KEY", "s3.secret_key"="AWS_SECRET_KEY", "s3.region" = "REGION" diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md index 25c5c6bd26..779442b81a 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md @@ -118,6 +118,7 @@ CREATE CATALOG hive PROPERTIES ( "type"="hms", "hive.metastore.uris" = "thrift://172.0.0.1:9083", "s3.endpoint" = "s3.us-east-1.amazonaws.com", + "s3.region" = "us-east-1", "s3.access_key" = "ak", "s3.secret_key" = "sk" "use_path_style" = "true" diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md index bf5333388b..c93fca28e5 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md @@ -126,6 +126,7 @@ CREATE CATALOG iceberg PROPERTIES ( "s3.access_key" = "ak" "s3.secret_key" = "sk" "s3.endpoint" = "http://endpoint-uri" +"s3.region" = "your-region" "s3.credentials.provider" = "provider-class-name" // 可选,默认凭证类基于BasicAWSCredentials实现。 ``` diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md index fe1f246bd9..f5c65e3f41 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md @@ -45,6 +45,7 @@ s3( "uri" = "..", "s3.access_key" = "...", "s3.secret_key" = "...", + "s3.region" = "...", "format" = "csv", "keyn" = "valuen", ... @@ -58,6 +59,7 @@ S3 tvf中的每一个参数都是一个 `"key"="value"` 对。 - `uri`: (必填) 访问S3的uri,S3表函数会根据 `use_path_style` 参数来决定是否使用 path style 访问方式,默认为 virtual-hosted style 方式 - `s3.access_key`: (必填) - `s3.secret_key`: (必填) +- `s3.region`: (选填)。如果Minio服务设置了其他的region,那么必填,否则默认使用`us-east-1`。 - `s3.session_token`: (选填) - `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持virtual-hosted style 方式的访问,此时我们可以添加 use_path_style 参数来强制使用 path style 方式。比如 `minio`默认情况下只允许`path style`访问方式,所以在访问minio时要加上`use_path_style=true`。 diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java index 5102330a4d..da67196a3a 100644 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java +++ b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java @@ -29,7 +29,7 @@ import java.io.IOException; * MaxComputeJ JniScanner. BE will read data from the scanner object. */ public class MaxComputeTableScan { - private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun.com/api"; + private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun-inc.com/api"; private static final String tunnelUrlTemplate = "http://dt.{}.maxcompute.aliyun-inc.com"; private final Odps odps; private final TableTunnel tunnel; @@ -43,13 +43,15 @@ public class MaxComputeTableScan { this.project = project; this.table = table; odps = new Odps(new AliyunAccount(accessKey, secretKey)); - odps.setEndpoint(odpsUrlTemplate.replace("{}", region)); - odps.setDefaultProject(this.project); - tunnel = new TableTunnel(odps); + String odpsUrl = odpsUrlTemplate.replace("{}", region); String tunnelUrl = tunnelUrlTemplate.replace("{}", region); if (enablePublicAccess) { - tunnelUrl = tunnelUrlTemplate.replace("-inc", ""); + odpsUrl = odpsUrl.replace("-inc", ""); + tunnelUrl = tunnelUrl.replace("-inc", ""); } + odps.setEndpoint(odpsUrl); + odps.setDefaultProject(this.project); + tunnel = new TableTunnel(odps); tunnel.setEndpoint(tunnelUrl); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java index c62abf9ada..bfe6b2c18c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java @@ -43,11 +43,11 @@ public class MaxComputeExternalCatalog extends ExternalCatalog { private String secretKey; @SerializedName(value = "publicAccess") private boolean enablePublicAccess; - private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun.com/api"; + private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun-inc.com/api"; private static final String tunnelUrlTemplate = "http://dt.{}.maxcompute.aliyun-inc.com"; public MaxComputeExternalCatalog(long catalogId, String name, String resource, Map<String, String> props, - String comment) { + String comment) { super(catalogId, name, InitCatalogLog.Type.MAX_COMPUTE, comment); catalogProperty = new CatalogProperty(resource, props); } @@ -77,9 +77,13 @@ public class MaxComputeExternalCatalog extends ExternalCatalog { secretKey = credential.getSecretKey(); Account account = new AliyunAccount(accessKey, secretKey); this.odps = new Odps(account); - odps.setEndpoint(odpsUrlTemplate.replace("{}", region)); - odps.setDefaultProject(defaultProject); enablePublicAccess = Boolean.parseBoolean(props.getOrDefault(MCProperties.PUBLIC_ACCESS, "false")); + String odpsUrl = odpsUrlTemplate.replace("{}", region); + if (enablePublicAccess) { + odpsUrl = odpsUrl.replace("-inc", ""); + } + odps.setEndpoint(odpsUrl); + odps.setDefaultProject(defaultProject); } public long getTotalRows(String project, String table) throws TunnelException { @@ -87,7 +91,7 @@ public class MaxComputeExternalCatalog extends ExternalCatalog { TableTunnel tunnel = new TableTunnel(odps); String tunnelUrl = tunnelUrlTemplate.replace("{}", region); if (enablePublicAccess) { - tunnelUrl = tunnelUrlTemplate.replace("-inc", ""); + tunnelUrl = tunnelUrl.replace("-inc", ""); } tunnel.setEndpoint(tunnelUrl); return tunnel.createDownloadSession(project, table).getRecordCount(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java index 8787233e7d..304cc95882 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java @@ -309,8 +309,9 @@ public class PropertyConverter { } private static Map<String, String> convertToMinioProperties(Map<String, String> props, CloudCredential credential) { - // minio does not have region, use an arbitrary one. - props.put(MinioProperties.REGION, "us-east-1"); + if (!props.containsKey(MinioProperties.REGION)) { + props.put(MinioProperties.REGION, MinioProperties.DEFAULT_REGION); + } return convertToS3Properties(S3Properties.prefixToS3(props), credential); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java index b0639be2df..fe912c1038 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java @@ -28,15 +28,15 @@ import java.util.HashMap; import java.util.Map; public class S3ClientBEProperties { - /** * convert FE properties to BE S3 client properties * On BE, should use properties like AWS_XXX. */ public static Map<String, String> getBeFSProperties(Map<String, String> properties) { if (properties.containsKey(MinioProperties.ENDPOINT)) { - // minio does not have region, use an arbitrary one. - properties.put(MinioProperties.REGION, "us-east-1"); + if (!properties.containsKey(MinioProperties.REGION)) { + properties.put(MinioProperties.REGION, MinioProperties.DEFAULT_REGION); + } return getBeAWSPropertiesFromS3(S3Properties.prefixToS3(properties)); } else if (properties.containsKey(S3Properties.ENDPOINT)) { // s3,oss,cos,obs use this. diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java index c444384220..a286718ea8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java @@ -31,6 +31,7 @@ public class MinioProperties extends BaseProperties { public static final String ACCESS_KEY = "minio.access_key"; public static final String SECRET_KEY = "minio.secret_key"; public static final String SESSION_TOKEN = "minio.session_token"; + public static final String DEFAULT_REGION = "us-east-1"; public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT, ACCESS_KEY, SECRET_KEY, REGION); public static CloudCredential getCredential(Map<String, String> props) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java index b5a96ddcec..13fb6ee336 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java @@ -126,7 +126,7 @@ public class S3Properties extends BaseProperties { if (endpointSplit.length < 2) { return null; } - if (endpointSplit[0].startsWith("oss-")) { + if (endpointSplit[0].contains("oss-")) { // compatible with the endpoint: oss-cn-bejing.aliyuncs.com return endpointSplit[0]; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java index 0d09037c81..f04abae879 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java @@ -23,6 +23,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.datasource.property.PropertyConverter; import org.apache.doris.fs.obj.S3ObjStorage; +import com.amazonaws.services.s3.model.AmazonS3Exception; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -86,6 +87,17 @@ public class S3FileSystem extends ObjFileSystem { LOG.info("file not found: " + e.getMessage()); return new Status(Status.ErrCode.NOT_FOUND, "file not found: " + e.getMessage()); } catch (Exception e) { + if (e.getCause() instanceof AmazonS3Exception) { + // process minio error msg + AmazonS3Exception ea = (AmazonS3Exception) e.getCause(); + Map<String, String> callbackHeaders = ea.getHttpHeaders(); + if (callbackHeaders != null && !callbackHeaders.isEmpty()) { + String minioErrMsg = callbackHeaders.get("X-Minio-Error-Desc"); + if (minioErrMsg != null) { + return new Status(Status.ErrCode.COMMON_ERROR, "Minio request error: " + minioErrMsg); + } + } + } LOG.error("errors while get file status ", e); return new Status(Status.ErrCode.COMMON_ERROR, "errors while get file status " + e.getMessage()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java index a54b0e80d6..9f6339b0c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java @@ -67,7 +67,7 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { private final S3URI s3uri; private final boolean forceVirtualHosted; - private String virtualBucket; + private String virtualBucket = ""; private String virtualKey; public S3TableValuedFunction(Map<String, String> params) throws AnalysisException { @@ -77,8 +77,12 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction { final String endpoint = forceVirtualHosted ? getEndpointAndSetVirtualBucket(params) : s3uri.getBucketScheme(); + if (!tvfParams.containsKey(S3Properties.REGION)) { + String region = S3Properties.getRegionOfEndpoint(endpoint); + tvfParams.put(S3Properties.REGION, region); + } CloudCredentialWithEndpoint credential = new CloudCredentialWithEndpoint(endpoint, - tvfParams.getOrDefault(S3Properties.REGION, S3Properties.getRegionOfEndpoint(endpoint)), + tvfParams.get(S3Properties.REGION), tvfParams.get(S3Properties.ACCESS_KEY), tvfParams.get(S3Properties.SECRET_KEY)); if (tvfParams.containsKey(S3Properties.SESSION_TOKEN)) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org