[
https://issues.apache.org/jira/browse/HADOOP-19740?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18038998#comment-18038998
]
ASF GitHub Bot commented on HADOOP-19740:
-----------------------------------------
mukund-thakur commented on code in PR #8058:
URL: https://github.com/apache/hadoop/pull/8058#discussion_r2535749021
##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RegionResolution.java:
##########
@@ -0,0 +1,535 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.impl;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Locale;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.annotation.Nullable;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.regions.providers.InstanceProfileRegionProvider;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.Invoker;
+import org.apache.hadoop.fs.s3a.Retries;
+import org.apache.hadoop.fs.s3a.S3ClientFactory;
+
+import static java.util.Objects.requireNonNull;
+import static
org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
+import static
org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT;
+import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.EC2_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.EMPTY_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.SDK_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS;
+import static org.apache.hadoop.util.Preconditions.checkArgument;
+import static software.amazon.awssdk.regions.Region.US_EAST_2;
+
+/**
+ * Region resolution.
+ * <p>This is complicated and can be a source of support escalations.
+ * <p>The V1 SDK was happy to take an endpoint and
+ * work details out from there, possibly probing us-central-1 and cacheing
+ * the result.
+ * <p>The V2 SDK like the signing region and endpoint to be declared.
+ * The S3A connector has tried to mimic the V1 code, but lacks some features
+ * (use of environment variables, probing of EC2 IAM details) for which
+ * the SDK is better.
+ *
+ */
+public class RegionResolution {
+
+ protected static final Logger LOG =
+ LoggerFactory.getLogger(RegionResolution.class);
+
+ /**
+ * Service to ask SDK to parse.
+ */
+ private static final String S3_SERVICE_NAME = "s3";
+
+ /**
+ * Pattern to match vpce endpoints on.
+ */
+ private static final Pattern VPC_ENDPOINT_PATTERN =
+
Pattern.compile("^(?:.+\\.)?([a-z0-9-]+)\\.vpce\\.amazonaws\\.(?:com|com\\.cn)$");
+
+ /**
+ * Error message when an endpoint is set with FIPS enabled: {@value}.
+ */
+ @VisibleForTesting
+ public static final String ERROR_ENDPOINT_WITH_FIPS =
+ "Only S3 central endpoint cannot be set when " + FIPS_ENDPOINT + " is
true";
+
+ /**
+ * Virtual hostnames MUST be used when using the FIPS endpoint.
+ */
+ public static final String FIPS_PATH_ACCESS_INCOMPATIBLE =
+ "Path style access must be disabled when " + FIPS_ENDPOINT + " is true";
+
+ /**
+ * String value for external region: {@value}.
+ */
+ public static final String EXTERNAL = "external";
+
+ /**
+ * External region, used for third party endpoints.
+ */
+ public static final Region EXTERNAL_REGION = Region.of(EXTERNAL);
+
+ /**
+ * How was the region resolved?
+ */
+ public enum RegionResolutionMechanism {
+
+ CalculatedFromEndpoint("Calculated from endpoint"),
+ ExternalEndpoint("External endpoint"),
+ FallbackToCentral("Fallback to central endpoint"),
+ ParseVpceEndpoint("Parse VPCE Endpoint"),
+ Ec2Metadata("EC2 Metadata"),
+ Sdk("SDK resolution chain"),
+ Specified("region specified");
+
+ /**
+ * Text of the mechanism.
+ */
+ private final String mechanism;
+
+ RegionResolutionMechanism(String mechanism) {
+ this.mechanism = mechanism;
+ }
+
+ /**
+ * String value of the resolution mechanism.
+ * @return the resolution mechanism.
+ */
+ public String getMechanism() {
+ return mechanism;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder("RegionResolutionMechanism{");
+ sb.append("mechanism='").append(mechanism).append('\'');
+ sb.append('}');
+ return sb.toString();
+ }
+ }
+
+ /**
+ * The resolution of a region and endpoint..
+ */
+ public static final class Resolution {
+
+ /**
+ * Region: if null hand down to the SDK.
+ */
+ private Region region;
+
+ /**
+ * How was the region resolved?
+ * Null means unresolved.
+ */
+ private RegionResolutionMechanism mechanism;
+
+ /**
+ * Should FIPS be enabled?
+ */
+ private boolean useFips;
+
+ /**
+ * Should cross-region access be enabled?
+ */
+ private boolean crossRegionAccessEnabled;
+
+ /**
+ * Endpoint as string.
+ */
+ private String endpointStr;
+
+ /**
+ * Endpoint URI.
+ */
+ private URI endpointUri;
+
+ /**
+ * Use the central endpoint?
+ */
+ private boolean useCentralEndpoint;
+
+ public Resolution() {
+ }
+
+ /**
+ * Instantiate with a region and resolution mechanism.
+ * @param region region
+ * @param mechanism resolution mechanism.
+ */
+ public Resolution(final Region region, final RegionResolutionMechanism
mechanism) {
+ this.region = region;
+ this.mechanism = mechanism;
+ }
+
+ /**
+ * Set the region.
+ * Declares the region as resolved even when the value is null (i.e.
resolve to SDK).
+ * @param region region
+ * @param resolutionMechanism resolution mechanism.
+ * @return the builder
+ */
+ public Resolution withRegion(
+ @Nullable final Region region,
+ final RegionResolutionMechanism resolutionMechanism) {
+ this.region = region;
+ this.mechanism = requireNonNull(resolutionMechanism);
+ return this;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withUseFips(final boolean value) {
+ useFips = value;
+ return this;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withCrossRegionAccessEnabled(final boolean value) {
+ crossRegionAccessEnabled = value;
+ return this;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withEndpointStr(final String value) {
+ endpointStr = value;
+ return this;
+ }
+
+ public URI getEndpointUri() {
+ return endpointUri;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withEndpointUri(final URI value) {
+ endpointUri = value;
+ return this;
+ }
+
+ public Region getRegion() {
+ return region;
+ }
+
+ public boolean isUseFips() {
+ return useFips;
+ }
+
+ public boolean isCrossRegionAccessEnabled() {
+ return crossRegionAccessEnabled;
+ }
+
+ public RegionResolutionMechanism getMechanism() {
+ return mechanism;
+ }
+
+ public String getEndpointStr() {
+ return endpointStr;
+ }
+
+ public boolean isRegionResolved() {
+ return mechanism != null;
+ }
+
+ public boolean isUseCentralEndpoint() {
+ return useCentralEndpoint;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withUseCentralEndpoint(final boolean value) {
+ useCentralEndpoint = value;
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder("Resolution{");
+ sb.append("region=").append(region);
+ sb.append(", resolution=").append(mechanism);
+ sb.append(", useFips=").append(useFips);
+ sb.append(",
crossRegionAccessEnabled=").append(crossRegionAccessEnabled);
+ sb.append(", endpointUri=").append(endpointUri);
+ sb.append(", useCentralEndpoint=").append(useCentralEndpoint);
+ sb.append('}');
+ return sb.toString();
+ }
+ }
+
+ /**
+ * Given a endpoint string, create the endpoint URI.
+ * @param endpoint possibly null endpoint.
+ * @param secureConnections use secure HTTPS connection?
+ * @return an endpoint uri or null if the endpoint was passed in was
null/empty
+ * @throws IllegalArgumentException failure to parse the endpoint.
+ */
+ public static URI buildEndpointUri(String endpoint, final boolean
secureConnections) {
+
+ String protocol = secureConnections ? "https" : "http";
+
+ if (endpoint == null || endpoint.isEmpty()) {
+ // don't set an endpoint if none is configured, instead let the SDK
figure it out.
+ return null;
+ }
+
+ if (!endpoint.contains("://")) {
+ endpoint = String.format("%s://%s", protocol, endpoint);
+ }
+
+ try {
+ return new URI(endpoint);
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ /**
+ * Parses the endpoint to get the region.
+ * If endpoint is the central one, use US_EAST_2.
+ * @param endpoint the configure endpoint.
+ * @param endpointEndsWithCentral true if the endpoint is configured as
central.
+ * @return the S3 region resolution if possible from parsing the endpoint
+ */
+ @VisibleForTesting
+ public static Optional<Resolution> getS3RegionFromEndpoint(
+ final String endpoint,
+ final boolean endpointEndsWithCentral) {
+
+ if (!endpointEndsWithCentral) {
+ // S3 VPC endpoint parsing
+ Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint);
+ if (matcher.find()) {
+ LOG.debug("Mapping to VPCE");
+ LOG.debug("Endpoint {} is VPC endpoint; parsing region as {}",
+ endpoint, matcher.group(1));
+ return Optional.of(new Resolution(
+ Region.of(matcher.group(1)),
+ RegionResolutionMechanism.ParseVpceEndpoint));
+ }
+
+ LOG.debug("Endpoint {} is not the default; parsing", endpoint);
+ return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME)
+ .map(r ->
+ new Resolution(r,
RegionResolutionMechanism.CalculatedFromEndpoint));
+ }
+
+ // No resolution.
+ return Optional.empty();
+ }
+
+ /**
+ * Is this an AWS endpoint, that is: has an endpoint been set which matches
+ * amazon.
+ * @param endpoint non-null endpoint URL
+ * @return true if this is amazonaws or amazonaws china
+ */
+ public static boolean isAwsEndpoint(final String endpoint) {
Review Comment:
can there be other cases. How do we test this?
> S3A: add explicit "sdk" and "ec2" regions for region resolution through SDK
> and EC2 IAM
> ---------------------------------------------------------------------------------------
>
> Key: HADOOP-19740
> URL: https://issues.apache.org/jira/browse/HADOOP-19740
> Project: Hadoop Common
> Issue Type: Improvement
> Components: fs/s3
> Affects Versions: 3.4.2
> Reporter: Steve Loughran
> Assignee: Steve Loughran
> Priority: Major
> Labels: pull-request-available
>
> Add explicit regions to hand off to the sdk
> * sdk: "use the sdk chain"
> * ec2: "we are in EC2, use the local region": use the iAM logic inside the
> SDK directly.
> empty string "" also hands off to the SDK; the warning will be removed
> also: if an endpoint is set and it is not parsed as a vpce endpoint, we will
> automatically add the endpoint name "external". This avoids the need to make
> up an external region when working with an endpoint.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]