steveloughran commented on code in PR #6106:
URL: https://github.com/apache/hadoop/pull/6106#discussion_r1347608674
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -229,4 +254,49 @@ private static URI getS3Endpoint(String endpoint, final
Configuration conf) {
throw new IllegalArgumentException(e);
}
}
+
+ /**
+ * Parses the endpoint to get the region.
+ * If endpoint is the central one, use US_EAST_1.
+ *
+ * @param endpoint the configure endpoint.
+ * @return the S3 region.
+ */
+ private static Region getS3RegionFromEndpoint(String endpoint) {
+
+ if(!endpoint.endsWith(CENTRAL_ENDPOINT)) {
+ LOG.debug("Endpoint {} is not the default; parsing", endpoint);
+ return AwsHostNameUtils.parseSigningRegion(endpoint,
S3_SERVICE_NAME).orElse(null);
Review Comment:
weve had so much trouble in the past here with vpce, govcloud, cn that we
need a set of unit tests to verify this code does what we actually want -and
therea are no regressions
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -138,14 +157,21 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT,
ClientT>, ClientT> Build
BuilderT builder, S3ClientCreationParameters parameters, Configuration
conf, String bucket)
throws IOException {
- Region region = parameters.getRegion();
- LOG.debug("Using region {}", region);
-
URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf);
+ Region region = null;
+
if (endpoint != null) {
builder.endpointOverride(endpoint);
- LOG.debug("Using endpoint {}", endpoint);
+ region = getS3RegionFromEndpoint(parameters.getEndpoint());
+ LOG.debug("Using endpoint {} and region {}", endpoint, region);
+ }
+
+ if (region != null) {
+ builder.region(region);
+ } else {
+ builder.crossRegionAccessEnabled(true);
+ builder.region(getS3Region(conf));
Review Comment:
also, what about: env vars and sysprops? good or bad?
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -229,4 +254,49 @@ private static URI getS3Endpoint(String endpoint, final
Configuration conf) {
throw new IllegalArgumentException(e);
}
}
+
+ /**
+ * Parses the endpoint to get the region.
+ * If endpoint is the central one, use US_EAST_1.
+ *
+ * @param endpoint the configure endpoint.
+ * @return the S3 region.
Review Comment:
javadoc to say returns null if not resolvved
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -229,4 +254,49 @@ private static URI getS3Endpoint(String endpoint, final
Configuration conf) {
throw new IllegalArgumentException(e);
}
}
+
+ /**
+ * Parses the endpoint to get the region.
+ * If endpoint is the central one, use US_EAST_1.
+ *
+ * @param endpoint the configure endpoint.
+ * @return the S3 region.
+ */
+ private static Region getS3RegionFromEndpoint(String endpoint) {
+
+ if(!endpoint.endsWith(CENTRAL_ENDPOINT)) {
+ LOG.debug("Endpoint {} is not the default; parsing", endpoint);
+ return AwsHostNameUtils.parseSigningRegion(endpoint,
S3_SERVICE_NAME).orElse(null);
+ }
+
+ // endpoint is for US_EAST_1;
+ return Region.US_EAST_1;
+ }
+
+ /**
+ * Gets the region from configuration and returns.
+ * If configured region is an empty string, use
+ * the default SDK resolution chain.
+ *
+ * @param conf Configuration.
+ * @return the S3 region
+ */
+ private static Region getS3Region(Configuration conf) {
+ String region = conf.getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION);
+ LOG.debug("fs.s3a.endpoint.region=\"{}\"", region);
+ if (!region.isEmpty()) {
+ // there's either an explicit region or we have fallen back
+ // to the central one.
+ LOG.debug("Setting region to {}", region);
+ return Region.of(region);
+ } else {
+ // no region.
+ // allow this if people really want it; it is OK to rely on this
+ // when deployed in EC2.
Review Comment:
if it is ok in ec2, then this will be very noisy in ec2 deployments?
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -138,14 +157,21 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT,
ClientT>, ClientT> Build
BuilderT builder, S3ClientCreationParameters parameters, Configuration
conf, String bucket)
throws IOException {
- Region region = parameters.getRegion();
- LOG.debug("Using region {}", region);
-
URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf);
+ Region region = null;
+
if (endpoint != null) {
builder.endpointOverride(endpoint);
- LOG.debug("Using endpoint {}", endpoint);
+ region = getS3RegionFromEndpoint(parameters.getEndpoint());
+ LOG.debug("Using endpoint {} and region {}", endpoint, region);
+ }
+
+ if (region != null) {
+ builder.region(region);
+ } else {
+ builder.crossRegionAccessEnabled(true);
+ builder.region(getS3Region(conf));
Review Comment:
if a region is set in the config, it must take priority over anything the
AWS SDK determines from the endpoint. without that we can never correct for sdk
issues
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java:
##########
@@ -138,14 +157,21 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT,
ClientT>, ClientT> Build
BuilderT builder, S3ClientCreationParameters parameters, Configuration
conf, String bucket)
throws IOException {
- Region region = parameters.getRegion();
Review Comment:
restore the ability to pass in a region
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]