yuqi1129 commented on code in PR #10726: URL: https://github.com/apache/gravitino/pull/10726#discussion_r3062979494
########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueClientProvider.java: ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import java.net.URI; +import java.util.Map; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.GlueClientBuilder; + +/** + * Factory for creating AWS {@link GlueClient} instances from Gravitino catalog configuration. + * + * <p>Authentication priority: + * + * <ol> + * <li>Static credentials ({@code aws-access-key-id} + {@code aws-secret-access-key}) + * <li>Default credential chain (environment variables, instance profile, container credentials) + * </ol> + * + * <p>An optional endpoint override ({@code aws-glue-endpoint}) enables connectivity to VPC + * endpoints and LocalStack for integration testing. + */ +public final class GlueClientProvider { + + private GlueClientProvider() {} + + /** + * Builds a {@link GlueClient} from the given catalog configuration map. + * + * @param config Catalog configuration properties. + * @return A configured and ready-to-use {@link GlueClient}. + * @throws IllegalArgumentException if {@code aws-region} is missing or blank, if only one of the + * credential keys is provided, or if {@code aws-glue-endpoint} is not a valid URI. + */ + public static GlueClient buildClient(Map<String, String> config) { + String region = config.get(GlueConstants.AWS_REGION); + Preconditions.checkArgument( + !Strings.isNullOrEmpty(region), Review Comment: **Bug: blank region not caught.** `Strings.isNullOrEmpty(" ")` returns `false` — blank strings pass through to `Region.of()`, which may throw an opaque SDK error instead of the helpful message defined here. The Javadoc already says "missing **or blank**", so the guard should match: ```java !Strings.isNullOrEmpty(region) && !region.isBlank() ``` `testBuildClientBlankRegionThrows` technically passes today because the AWS SDK's `Validate.paramNotBlank` fires, but the error message is not the one we defined. ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java: ########## @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +/** Constant keys for the AWS Glue Data Catalog connector configuration and table properties. */ +public final class GlueConstants { + + // ------------------------------------------------------------------------- + // Catalog-level connection properties + // ------------------------------------------------------------------------- + + /** AWS region for the Glue Data Catalog (required). */ + public static final String AWS_REGION = "aws-region"; + + /** Glue catalog ID — the 12-digit AWS account ID (required). */ + public static final String AWS_GLUE_CATALOG_ID = "aws-glue-catalog-id"; + + /** AWS access key ID for static credential authentication (optional, sensitive). */ + public static final String AWS_ACCESS_KEY_ID = "aws-access-key-id"; + + /** AWS secret access key for static credential authentication (optional, sensitive). */ + public static final String AWS_SECRET_ACCESS_KEY = "aws-secret-access-key"; + + /** + * Custom Glue endpoint URL (optional). Used for VPC endpoints or LocalStack testing. Example: + * {@code http://localhost:4566} + */ + public static final String AWS_GLUE_ENDPOINT = "aws-glue-endpoint"; + + /** + * Default table format used when creating tables via Gravitino's {@code createTable()} API + * (optional). Accepted values: {@code iceberg}, {@code hive}. Defaults to {@code hive}. + */ + public static final String DEFAULT_TABLE_FORMAT = "default-table-format"; + + /** Default value for {@link #DEFAULT_TABLE_FORMAT}: {@code "hive"}. */ + public static final String DEFAULT_TABLE_FORMAT_VALUE = "hive"; + + /** + * Comma-separated list of table types exposed by {@code listTables()} and {@code loadTable()} + * (optional). Accepted values: {@code all}, {@code hive}, {@code iceberg}, {@code delta}, {@code + * parquet}. Defaults to {@code all}. + */ + public static final String TABLE_TYPE_FILTER = "table-type-filter"; + + /** Default value for {@link #TABLE_TYPE_FILTER}: expose all table types. */ + public static final String TABLE_TYPE_FILTER_ALL = "all"; + + // ------------------------------------------------------------------------- + // Glue Table.parameters() keys (passthrough properties) + // ------------------------------------------------------------------------- + + /** + * Glue table format type parameter key stored in {@code Table.parameters()}. Common values: + * {@code ICEBERG}, {@code HIVE}, {@code DELTA}, {@code PARQUET}. Review Comment: Minor nit: the values listed here (`ICEBERG`, `HIVE`, `DELTA`, `PARQUET` — uppercase) are Glue `Table.parameters()` values, while `TABLE_TYPE_FILTER`'s accepted values (line 57) are lowercase (`hive`, `iceberg`, ...). They're different namespaces so the casing difference is correct, but a brief note like "Note: these are stored uppercase in Glue parameters; see `TABLE_TYPE_FILTER` for the Gravitino-side filter values" would prevent confusion. ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueClientProvider.java: ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import java.net.URI; +import java.util.Map; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.GlueClientBuilder; + +/** + * Factory for creating AWS {@link GlueClient} instances from Gravitino catalog configuration. + * + * <p>Authentication priority: + * + * <ol> + * <li>Static credentials ({@code aws-access-key-id} + {@code aws-secret-access-key}) + * <li>Default credential chain (environment variables, instance profile, container credentials) + * </ol> + * + * <p>An optional endpoint override ({@code aws-glue-endpoint}) enables connectivity to VPC + * endpoints and LocalStack for integration testing. + */ +public final class GlueClientProvider { + + private GlueClientProvider() {} + + /** + * Builds a {@link GlueClient} from the given catalog configuration map. + * + * @param config Catalog configuration properties. + * @return A configured and ready-to-use {@link GlueClient}. + * @throws IllegalArgumentException if {@code aws-region} is missing or blank, if only one of the + * credential keys is provided, or if {@code aws-glue-endpoint} is not a valid URI. + */ + public static GlueClient buildClient(Map<String, String> config) { + String region = config.get(GlueConstants.AWS_REGION); + Preconditions.checkArgument( + !Strings.isNullOrEmpty(region), + "Property '%s' is required to create a Glue client", + GlueConstants.AWS_REGION); + + GlueClientBuilder builder = GlueClient.builder().region(Region.of(region)); + + // Static credentials take priority over the default credential chain. + // Both keys must be provided together — a partial pair is always a misconfiguration. + String accessKey = config.get(GlueConstants.AWS_ACCESS_KEY_ID); + String secretKey = config.get(GlueConstants.AWS_SECRET_ACCESS_KEY); + boolean hasAccessKey = !Strings.isNullOrEmpty(accessKey); Review Comment: **Same blank issue for credentials.** If a user provides `aws-access-key-id = " "` (all spaces), `isNullOrEmpty` returns `false`, so `hasAccessKey = true`. If the secret key is absent, the pair-check fires correctly. But if *both* are blank strings, `hasAccessKey == hasSecretKey == true`, and `AwsBasicCredentials.create(" ", " ")` is called — which silently succeeds at build time and fails with a cryptic auth error at the first API call. Suggest: ```java boolean hasAccessKey = !Strings.isNullOrEmpty(accessKey) && !accessKey.isBlank(); boolean hasSecretKey = !Strings.isNullOrEmpty(secretKey) && !secretKey.isBlank(); ``` ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogPropertiesMetadata.java: ########## @@ -18,20 +18,87 @@ */ package org.apache.gravitino.catalog.glue; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_ACCESS_KEY_ID; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_GLUE_CATALOG_ID; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_GLUE_ENDPOINT; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_REGION; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_SECRET_ACCESS_KEY; +import static org.apache.gravitino.catalog.glue.GlueConstants.DEFAULT_TABLE_FORMAT; +import static org.apache.gravitino.catalog.glue.GlueConstants.DEFAULT_TABLE_FORMAT_VALUE; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE_FILTER; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE_FILTER_ALL; +import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; +import static org.apache.gravitino.connector.PropertyEntry.stringRequiredPropertyEntry; + import com.google.common.collect.ImmutableMap; import java.util.Map; import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; -/** - * Properties metadata for the AWS Glue Data Catalog connector. - * - * <p>TODO PR-02: add required properties (aws-region, aws-glue-catalog-id) and optional properties - * (credentials, endpoint override, default-table-format, table-type-filter). - */ +/** Properties metadata for the AWS Glue Data Catalog connector catalog-level configuration. */ public class GlueCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { - private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = ImmutableMap.of(); + private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = + ImmutableMap.<String, PropertyEntry<?>>builder() + .put( + AWS_REGION, + stringRequiredPropertyEntry( + AWS_REGION, + "AWS region for the Glue Data Catalog (e.g. us-east-1)", + true /* immutable */, + false /* hidden */)) + .put( + AWS_GLUE_CATALOG_ID, + stringRequiredPropertyEntry( + AWS_GLUE_CATALOG_ID, + "The 12-digit AWS account ID that owns the Glue catalog", + true /* immutable */, + false /* hidden */)) + .put( + AWS_ACCESS_KEY_ID, + stringOptionalPropertyEntry( + AWS_ACCESS_KEY_ID, + "AWS access key ID for static credential authentication." + + " When omitted the default credential chain is used.", + false /* immutable */, + null /* defaultValue */, + true /* hidden */)) + .put( + AWS_SECRET_ACCESS_KEY, + stringOptionalPropertyEntry( + AWS_SECRET_ACCESS_KEY, + "AWS secret access key paired with aws-access-key-id." + + " When omitted the default credential chain is used.", + false /* immutable */, + null /* defaultValue */, + true /* hidden */)) + .put( + AWS_GLUE_ENDPOINT, + stringOptionalPropertyEntry( + AWS_GLUE_ENDPOINT, + "Custom Glue endpoint URL for VPC endpoints or LocalStack testing" + + " (e.g. http://localhost:4566)", + false /* immutable */, + null /* defaultValue */, + false /* hidden */)) + .put( + DEFAULT_TABLE_FORMAT, + stringOptionalPropertyEntry( + DEFAULT_TABLE_FORMAT, + "Default format for tables created via createTable(). Accepted: iceberg, hive.", Review Comment: **No validation of accepted values for `default-table-format`.** The description says "Accepted: iceberg, hive" but an invalid value like `"parquet"` will be silently stored and only fail (with a confusing error) at `createTable()` time. Same issue applies to `table-type-filter` on line 96. Suggest adding validation in the catalog's `initialize()` or in `GlueClientProvider`, or at minimum calling out in the description that unrecognised values are deferred to runtime. ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogPropertiesMetadata.java: ########## @@ -18,20 +18,87 @@ */ package org.apache.gravitino.catalog.glue; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_ACCESS_KEY_ID; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_GLUE_CATALOG_ID; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_GLUE_ENDPOINT; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_REGION; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_SECRET_ACCESS_KEY; +import static org.apache.gravitino.catalog.glue.GlueConstants.DEFAULT_TABLE_FORMAT; +import static org.apache.gravitino.catalog.glue.GlueConstants.DEFAULT_TABLE_FORMAT_VALUE; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE_FILTER; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE_FILTER_ALL; +import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; +import static org.apache.gravitino.connector.PropertyEntry.stringRequiredPropertyEntry; + import com.google.common.collect.ImmutableMap; import java.util.Map; import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; -/** - * Properties metadata for the AWS Glue Data Catalog connector. - * - * <p>TODO PR-02: add required properties (aws-region, aws-glue-catalog-id) and optional properties - * (credentials, endpoint override, default-table-format, table-type-filter). - */ +/** Properties metadata for the AWS Glue Data Catalog connector catalog-level configuration. */ public class GlueCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { - private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = ImmutableMap.of(); + private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = + ImmutableMap.<String, PropertyEntry<?>>builder() + .put( + AWS_REGION, + stringRequiredPropertyEntry( + AWS_REGION, + "AWS region for the Glue Data Catalog (e.g. us-east-1)", + true /* immutable */, + false /* hidden */)) + .put( + AWS_GLUE_CATALOG_ID, + stringRequiredPropertyEntry( Review Comment: **Consider making `aws-glue-catalog-id` optional.** When omitted, Glue defaults to the 12-digit account ID of the caller's credentials. Marking it as `required` forces every user to look up and supply their account ID even when the default is correct. Either: - Make it optional (with a description note: "When omitted, defaults to the caller's AWS account ID"), or - Keep it required but explicitly explain in the description why the default is insufficient for Gravitino's use case (e.g. cross-account scenarios). If there's a specific reason Gravitino always needs the explicit ID, a comment here would prevent future confusion. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
