github-actions[bot] commented on code in PR #63400:
URL: https://github.com/apache/doris/pull/63400#discussion_r3412863803
##########
fe/fe-filesystem/fe-filesystem-oss/src/main/java/org/apache/doris/filesystem/oss/OssObjStorage.java:
##########
@@ -34,128 +61,328 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.InputStream;
import java.net.URL;
+import java.util.Collections;
import java.util.Date;
-import java.util.HashMap;
+import java.util.List;
import java.util.Map;
-import java.util.UUID;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
/**
- * Alibaba Cloud OSS implementation of {@link
org.apache.doris.filesystem.spi.ObjStorage}.
+ * Alibaba Cloud OSS implementation backed by the native OSS SDK.
*
- * <p>Extends {@link S3ObjStorage} so all core I/O operations (list, head,
put, delete, copy,
- * multipart upload) delegate to the parent using S3-compatible APIs —
matching the reference pattern
- * where {@code OssRemote extends DefaultRemote} (the S3-backed base class).
- *
- * <p>The two cloud-specific extension methods that <em>require</em> a native
SDK are overridden:
- * <ul>
- * <li>{@link #getPresignedUrl(String)} — uses Alibaba OSS SDK
- * ({@code OSS.generatePresignedUrl}) to produce the correct OSS
signature format.</li>
- * <li>{@link #getStsToken()} — uses Alibaba RAM/STS SDK
- * ({@code DefaultAcsClient}) to call {@code sts.aliyuncs.com}.</li>
- * </ul>
- *
- * <p>Recognized property keys (OSS-specific; AWS_* equivalents accepted as
fallback):
- * <ul>
- * <li>{@code OSS_ENDPOINT} / {@code AWS_ENDPOINT} — OSS endpoint URL</li>
- * <li>{@code OSS_ACCESS_KEY} / {@code AWS_ACCESS_KEY} — access key ID</li>
- * <li>{@code OSS_SECRET_KEY} / {@code AWS_SECRET_KEY} — secret access
key</li>
- * <li>{@code OSS_TOKEN} / {@code AWS_TOKEN} — STS session token
(optional)</li>
- * <li>{@code OSS_BUCKET} / {@code AWS_BUCKET} — bucket name (required for
cloud extensions)</li>
- * <li>{@code OSS_REGION} / {@code AWS_REGION} — region for STS calls</li>
- * <li>{@code OSS_ROLE_ARN} / {@code AWS_ROLE_ARN} — role ARN for STS
assumption</li>
- * </ul>
+ * <p>This class consumes typed OSS properties. Raw key aliases are resolved by
+ * {@link OssFileSystemProperties}; client construction and authentication do
not
+ * translate through AWS-compatible keys.
*/
-public class OssObjStorage extends S3ObjStorage {
+public class OssObjStorage implements ObjStorage<OSS> {
private static final Logger LOG =
LogManager.getLogger(OssObjStorage.class);
- /** Validity period for presigned URLs and STS tokens, in seconds. */
private static final int SESSION_EXPIRE_SECONDS = 3600;
+ private static final int DELETE_BATCH_SIZE = 1000;
+ private static final Credentials ANONYMOUS_CREDENTIALS =
+ new DefaultCredentials("anonymous", "anonymous");
+ // A bucket name that can serve as a virtual-hosted subdomain: 3-63 chars,
lowercase
+ // letters/digits/hyphens, must start and end with an alphanumeric.
+ private static final java.util.regex.Pattern VIRTUAL_HOST_BUCKET_NAME =
+ java.util.regex.Pattern.compile("[a-z0-9][a-z0-9-]{1,61}[a-z0-9]");
- private final Map<String, String> ossProperties;
+ private final OssFileSystemProperties properties;
+ private final AtomicBoolean closed = new AtomicBoolean(false);
private volatile OSS ossClient;
public OssObjStorage(Map<String, String> properties) {
- super(toS3Props(properties));
- this.ossProperties = properties;
+ this(OssFileSystemProperties.of(properties));
+ }
+
+ public OssObjStorage(OssFileSystemProperties properties) {
+ this.properties = properties;
+ }
+
+ /** Whether path-style (vs virtual-hosted-style) bucket access is
explicitly configured. */
+ public boolean isUsePathStyle() {
+ return properties.isUsePathStyle();
+ }
+
+ @Override
+ public OSS getClient() throws IOException {
+ return getClient(properties.getBucket());
}
/**
- * Translates OSS-specific property keys to the AWS keys expected by
{@link S3ObjStorage}.
- * If both forms are present, the AWS_* key takes precedence.
+ * Returns the lazily-built OSS client, choosing path-style vs
virtual-hosted addressing
+ * based on {@code bucket} (see {@link #resolvePathStyle(String)}).
+ *
+ * <p>The addressing decision is made once, when the client is first
built, from the first
+ * bucket accessed through this instance. An {@code OssObjStorage} is
scoped to a single
+ * endpoint/bucket in practice (e.g. one backup repository), so this
matches the per-bucket
+ * behavior the legacy AWS-SDK-based client relied on for that case.
*/
- static Map<String, String> toS3Props(Map<String, String> ossProps) {
- Map<String, String> s3Props = new HashMap<>(ossProps);
- if (ossProps.containsKey("OSS_ENDPOINT") &&
!ossProps.containsKey("AWS_ENDPOINT")) {
- s3Props.put("AWS_ENDPOINT", ossProps.get("OSS_ENDPOINT"));
+ private OSS getClient(String bucket) throws IOException {
+ if (closed.get()) {
+ throw new IOException("OssObjStorage is already closed");
}
- if (ossProps.containsKey("OSS_ACCESS_KEY") &&
!ossProps.containsKey("AWS_ACCESS_KEY")) {
- s3Props.put("AWS_ACCESS_KEY", ossProps.get("OSS_ACCESS_KEY"));
+ if (ossClient == null) {
+ synchronized (this) {
+ if (ossClient == null) {
+ ossClient = buildOssClient(resolvePathStyle(bucket));
+ }
+ }
}
- if (ossProps.containsKey("OSS_SECRET_KEY") &&
!ossProps.containsKey("AWS_SECRET_KEY")) {
- s3Props.put("AWS_SECRET_KEY", ossProps.get("OSS_SECRET_KEY"));
+ return ossClient;
+ }
+
+ /**
+ * Decides whether to use path-style (SLD) addressing for {@code bucket}.
+ *
+ * <p>Honors an explicit {@code use_path_style=true}, and otherwise falls
back to path-style
+ * when the bucket name cannot be expressed as a virtual-hosted DNS label
(for example it
+ * contains an underscore, which is illegal in a hostname). This mirrors
the AWS SDK v2
+ * behavior the legacy S3-compatible client relied on, so buckets with
non-DNS-safe names
+ * keep working instead of failing with a virtual-hosted {@code
NoSuchBucket}. The native OSS
+ * SDK does no such fallback on its own.
+ *
+ * <p>Note: the OSS SDK's own {@code validateBucketName} is intentionally
not used here — it
+ * accepts underscores (a legal object-storage name) even though such a
name is not a legal
+ * DNS host label, which is precisely the case that must trigger
path-style.
+ */
+ boolean resolvePathStyle(String bucket) {
+ if (properties.isUsePathStyle()) {
+ return true;
}
- if (ossProps.containsKey("OSS_TOKEN") &&
!ossProps.containsKey("AWS_TOKEN")) {
- s3Props.put("AWS_TOKEN", ossProps.get("OSS_TOKEN"));
+ return hasText(bucket) && !isVirtualHostCompatible(bucket);
+ }
+
+ /**
+ * Returns true when {@code bucket} is a valid virtual-hosted DNS label:
3-63 characters of
+ * lowercase letters, digits and hyphens, starting and ending with an
alphanumeric. Names
+ * with underscores, uppercase letters or dots cannot be safely used as a
virtual-hosted
+ * subdomain and therefore require path-style addressing.
+ */
+ private static boolean isVirtualHostCompatible(String bucket) {
+ return VIRTUAL_HOST_BUCKET_NAME.matcher(bucket).matches();
+ }
+
+ protected OSS buildOssClient(boolean pathStyle) throws IOException {
+ String endpoint = properties.getEndpoint();
+ String accessKey = properties.getAccessKey();
+ String secretKey = properties.getSecretKey();
+ if (!hasText(accessKey)) {
+ return new OSSClientBuilder().build(endpoint,
anonymousCredentialsProvider(),
+ anonymousClientConfiguration(pathStyle));
}
- if (ossProps.containsKey("OSS_BUCKET") &&
!ossProps.containsKey("AWS_BUCKET")) {
- s3Props.put("AWS_BUCKET", ossProps.get("OSS_BUCKET"));
+ ClientBuilderConfiguration config = clientConfiguration(pathStyle);
+ String token = properties.getSessionToken();
+ if (hasText(token)) {
+ return new OSSClientBuilder().build(endpoint, accessKey,
secretKey, token, config);
}
- if (ossProps.containsKey("OSS_REGION") &&
!ossProps.containsKey("AWS_REGION")) {
- s3Props.put("AWS_REGION", ossProps.get("OSS_REGION"));
+ return new OSSClientBuilder().build(endpoint, accessKey, secretKey,
config);
+ }
+
+ @Override
+ public RemoteObjects listObjects(String remotePath, String
continuationToken) throws IOException {
+ return listObjectsWithOptions(remotePath, ObjectListOptions.builder()
+ .continuationToken(continuationToken)
+ .build());
+ }
+
+ @Override
+ public RemoteObjects listObjectsWithOptions(String remotePath,
ObjectListOptions options) throws IOException {
+ ObjectStorageUri uri = ObjectStorageUri.parse(remotePath, false);
+ ListObjectsRequest request = new ListObjectsRequest(uri.bucket());
+ request.setPrefix(uri.key());
+ if (options != null) {
+ String marker = hasText(options.continuationToken())
+ ? options.continuationToken() : options.startAfter();
+ if (hasText(marker)) {
+ request.setMarker(marker);
+ }
+ if (options.maxKeys() > 0) {
+ request.setMaxKeys(options.maxKeys());
+ }
+ if (hasText(options.delimiter())) {
+ request.setDelimiter(options.delimiter());
+ }
}
- if (ossProps.containsKey("OSS_ROLE_ARN") &&
!ossProps.containsKey("AWS_ROLE_ARN")) {
- s3Props.put("AWS_ROLE_ARN", ossProps.get("OSS_ROLE_ARN"));
+ try {
+ ObjectListing listing =
getClient(uri.bucket()).listObjects(request);
+ List<RemoteObject> objects = listing.getObjectSummaries().stream()
+ .map(obj -> toRemoteObject(uri.key(), obj))
+ .collect(Collectors.toList());
+ return new RemoteObjects(objects, listing.isTruncated(),
+ listing.isTruncated() ? listing.getNextMarker() : null);
+ } catch (OSSException e) {
+ // OSSException (server-side errors such as NoSuchBucket) is a
sibling of
+ // ClientException, not a subclass, so it must be caught
explicitly or it would
+ // propagate as an unwrapped runtime exception.
+ throw new IOException("Failed to list objects at " + remotePath +
": " + e.getMessage(), e);
+ } catch (ClientException e) {
+ throw new IOException("Failed to list objects at " + remotePath +
": " + e.getMessage(), e);
}
- s3Props.put("use_path_style", "false");
- return s3Props;
}
- // -----------------------------------------------------------------------
- // Cloud-specific extension overrides
- // -----------------------------------------------------------------------
+ @Override
+ public RemoteObject headObject(String remotePath) throws IOException {
+ ObjectStorageUri uri = ObjectStorageUri.parse(remotePath, false);
+ try {
+ ObjectMetadata metadata =
getClient(uri.bucket()).getObjectMetadata(uri.bucket(), uri.key());
+ return new RemoteObject(uri.key(), uri.key(), metadata.getETag(),
metadata.getContentLength(),
+ lastModifiedMs(metadata.getLastModified()));
+ } catch (OSSException e) {
+ if (isNotFound(e)) {
+ throw new FileNotFoundException("Object not found: " +
remotePath);
+ }
+ throw new IOException("headObject failed for " + remotePath + ": "
+ e.getMessage(), e);
+ } catch (ClientException e) {
+ throw new IOException("headObject failed for " + remotePath + ": "
+ e.getMessage(), e);
+ }
+ }
- /**
- * Generates a pre-signed PUT URL using the Alibaba Cloud OSS native SDK.
- *
- * @param objectKey the bare object key (no scheme or bucket prefix)
- */
@Override
- public String getPresignedUrl(String objectKey) throws IOException {
- String bucket = resolveRequired("OSS_BUCKET", "AWS_BUCKET", "OSS
bucket for presigned URL");
+ public void putObject(String remotePath, RequestBody requestBody) throws
IOException {
+ ObjectStorageUri uri = ObjectStorageUri.parse(remotePath, false);
+ ObjectMetadata metadata = new ObjectMetadata();
+ metadata.setContentLength(requestBody.contentLength());
+ try (InputStream content = requestBody.content()) {
+ getClient(uri.bucket()).putObject(new
PutObjectRequest(uri.bucket(), uri.key(), content, metadata));
Review Comment:
This catches only `ClientException`, but this same class already notes in
`listObjectsWithOptions` that `OSSException` is a sibling and otherwise
propagates as an unwrapped runtime exception. Server-side OSS failures for this
write path, such as `NoSuchBucket` or `AccessDenied`, will therefore escape
`ObjStorage.putObject` without being converted to the declared `IOException`;
the same pattern appears in copy, multipart upload, presign, and batch delete
below. Callers of the filesystem API commonly handle `IOException` to surface a
`UserException` or `Status`, so native OSS ends up with different failure
behavior from the other backends. Please catch and wrap `OSSException`
consistently for all SDK calls that can return server-side errors, preserving
`FileNotFoundException` where not-found is special.
##########
fe/fe-filesystem/fe-filesystem-cos/src/main/java/org/apache/doris/filesystem/cos/CosObjStorage.java:
##########
@@ -36,122 +60,285 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.InputStream;
import java.net.URL;
+import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
-import java.util.UUID;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
/**
- * Tencent Cloud COS implementation of {@link
org.apache.doris.filesystem.spi.ObjStorage}.
+ * Tencent Cloud COS implementation backed by the native COS SDK.
*
- * <p>Extends {@link S3ObjStorage} so all core I/O operations (list, head,
put, delete, copy,
- * multipart upload) delegate to the parent using S3-compatible APIs —
matching the reference pattern
- * where {@code CosRemote extends DefaultRemote} (the S3-backed base class).
- *
- * <p>The two cloud-specific extension methods that <em>require</em> a native
SDK are overridden:
- * <ul>
- * <li>{@link #getPresignedUrl(String)} — uses Tencent COS SDK
- * ({@code COSClient.generatePresignedUrl}) to produce the correct COS
signature format.</li>
- * <li>{@link #getStsToken()} — uses Tencent Cloud STS SDK
- * ({@code StsClient.AssumeRole}) to call {@code
sts.tencentcloudapi.com}.</li>
- * </ul>
- *
- * <p>Recognized property keys (COS-specific; AWS_* equivalents accepted as
fallback):
- * <ul>
- * <li>{@code COS_ENDPOINT} / {@code AWS_ENDPOINT} — COS endpoint URL</li>
- * <li>{@code COS_ACCESS_KEY} / {@code AWS_ACCESS_KEY} — SecretId</li>
- * <li>{@code COS_SECRET_KEY} / {@code AWS_SECRET_KEY} — SecretKey</li>
- * <li>{@code COS_BUCKET} / {@code AWS_BUCKET} — bucket name (required for
cloud extensions)</li>
- * <li>{@code COS_REGION} / {@code AWS_REGION} — region (e.g. {@code
ap-guangzhou})</li>
- * <li>{@code COS_ROLE_ARN} / {@code AWS_ROLE_ARN} — role ARN for STS
assumption</li>
- * </ul>
+ * <p>This class consumes typed COS properties. Raw key aliases are resolved by
+ * {@link CosFileSystemProperties}; client construction and authentication do
not
+ * translate through AWS-compatible keys.
*/
-public class CosObjStorage extends S3ObjStorage {
+public class CosObjStorage implements ObjStorage<COSClient> {
private static final Logger LOG =
LogManager.getLogger(CosObjStorage.class);
- /** Validity period for presigned URLs and STS tokens, in seconds. */
private static final int SESSION_EXPIRE_SECONDS = 3600;
+ private static final int DELETE_BATCH_SIZE = 1000;
- private final Map<String, String> cosProperties;
+ private final CosFileSystemProperties properties;
+ private final AtomicBoolean closed = new AtomicBoolean(false);
private volatile COSClient cosClient;
public CosObjStorage(Map<String, String> properties) {
- super(toS3Props(properties));
- this.cosProperties = properties;
+ this(CosFileSystemProperties.of(properties));
+ }
+
+ public CosObjStorage(CosFileSystemProperties properties) {
+ this.properties = properties;
+ }
+
+ /** Whether path-style (vs virtual-hosted-style) bucket access is
configured. */
+ public boolean isUsePathStyle() {
+ return properties.isUsePathStyle();
}
- /**
- * Translates COS-specific property keys to the AWS keys expected by
{@link S3ObjStorage}.
- * If both forms are present, the AWS_* key takes precedence.
- */
- static Map<String, String> toS3Props(Map<String, String> cosProps) {
- Map<String, String> s3Props = new HashMap<>(cosProps);
- if (cosProps.containsKey("COS_ENDPOINT") &&
!cosProps.containsKey("AWS_ENDPOINT")) {
- s3Props.put("AWS_ENDPOINT", cosProps.get("COS_ENDPOINT"));
+ @Override
+ public COSClient getClient() throws IOException {
+ if (closed.get()) {
+ throw new IOException("CosObjStorage is already closed");
}
- if (cosProps.containsKey("COS_ACCESS_KEY") &&
!cosProps.containsKey("AWS_ACCESS_KEY")) {
- s3Props.put("AWS_ACCESS_KEY", cosProps.get("COS_ACCESS_KEY"));
+ if (cosClient == null) {
+ synchronized (this) {
+ if (cosClient == null) {
+ cosClient = buildCosClient(properties.getRegion());
+ }
+ }
}
- if (cosProps.containsKey("COS_SECRET_KEY") &&
!cosProps.containsKey("AWS_SECRET_KEY")) {
- s3Props.put("AWS_SECRET_KEY", cosProps.get("COS_SECRET_KEY"));
+ return cosClient;
+ }
+
+ protected COSClient buildCosClient(String region) throws IOException {
+ COSCredentials cred = buildCredentials();
+ ClientConfig clientConfig = new ClientConfig();
+ clientConfig.setRegion(new Region(region));
+ clientConfig.setHttpProtocol(HttpProtocol.https);
Review Comment:
`CosFileSystem` passes `objStorage.isUsePathStyle()` into
`S3CompatibleFileSystem`, so `use_path_style=true` makes the shared layer parse
HTTPS URLs as path-style (`endpoint/bucket/key`). The native COS client built
here never applies that flag, while S3, OBS, and OSS configure the underlying
SDK for path-style. For COS endpoints or bucket names that require path-style
addressing, Doris will parse the path as if path-style is enabled but the SDK
will still build virtual-hosted COS requests, so the configured native COS
filesystem fails despite accepting the option. Please either configure COS SDK
endpoint building for path-style here or reject/ignore the flag consistently
for COS instead of advertising it through the shared filesystem.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]