This is an automated email from the ASF dual-hosted git repository. diqiu50 pushed a commit to branch glue-pr03 in repository https://gitbox.apache.org/repos/asf/gravitino.git
commit 04713355a9f3b63cdf9e1a513ac732092ea15411 Author: diqiu50 <[email protected]> AuthorDate: Wed Apr 8 22:36:58 2026 +0800 [MINOR] feat(catalog-glue): PR-02 — GlueClientProvider + properties metadata - Add GlueConstants for all catalog and table property keys - Add GlueClientProvider: static creds / DefaultCredentialChain selection, region, and endpoint override (for VPC endpoints / LocalStack) - Implement GlueCatalogPropertiesMetadata: required aws-region + aws-glue-catalog-id, optional credentials (hidden), endpoint, default-table-format, table-type-filter - Implement GlueCatalogCapability: case-insensitive names, no NOT NULL, no DEFAULT - Implement GlueTablePropertiesMetadata: table_type, metadata_location, location - Add TestGlueClientProvider unit tests --- .../catalog/glue/GlueCatalogCapability.java | 41 ++++++++- .../glue/GlueCatalogPropertiesMetadata.java | 81 +++++++++++++++-- .../gravitino/catalog/glue/GlueClientProvider.java | 86 ++++++++++++++++++ .../gravitino/catalog/glue/GlueConstants.java | 82 +++++++++++++++++ .../catalog/glue/GlueTablePropertiesMetadata.java | 40 ++++++++- .../catalog/glue/TestGlueClientProvider.java | 100 +++++++++++++++++++++ 6 files changed, 417 insertions(+), 13 deletions(-) diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogCapability.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogCapability.java index 9a17cf6a4d..0a863b3d1b 100644 --- a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogCapability.java +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogCapability.java @@ -19,11 +19,46 @@ package org.apache.gravitino.catalog.glue; import org.apache.gravitino.connector.capability.Capability; +import org.apache.gravitino.connector.capability.CapabilityResult; /** * Capability declarations for the AWS Glue Data Catalog connector. * - * <p>TODO PR-02: declare actual capabilities (case sensitivity, NOT NULL support, etc.) based on - * Glue's known constraints. + * <p>AWS Glue constraints that deviate from Gravitino defaults: + * + * <ul> + * <li>Names (database, table, column) are case-insensitive — Glue normalises them to lowercase. + * <li>Column NOT NULL constraints are not enforced by Glue. + * <li>Column DEFAULT values are not supported by Glue. + * </ul> */ -public class GlueCatalogCapability implements Capability {} +public class GlueCatalogCapability implements Capability { + + @Override + public CapabilityResult columnNotNull() { + return CapabilityResult.unsupported( + "AWS Glue Data Catalog does not enforce NOT NULL constraints on columns."); + } + + @Override + public CapabilityResult columnDefaultValue() { + return CapabilityResult.unsupported( + "AWS Glue Data Catalog does not support DEFAULT values on columns."); + } + + @Override + public CapabilityResult caseSensitiveOnName(Scope scope) { + switch (scope) { + case SCHEMA: + case TABLE: + case COLUMN: + // Glue normalises database/table/column names to lowercase. + return CapabilityResult.unsupported( + "AWS Glue Data Catalog is case-insensitive for " + + scope.name().toLowerCase() + + " names."); + default: + return CapabilityResult.SUPPORTED; + } + } +} diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogPropertiesMetadata.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogPropertiesMetadata.java index eacaca3950..443fc5a0c9 100644 --- a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogPropertiesMetadata.java +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueCatalogPropertiesMetadata.java @@ -18,20 +18,87 @@ */ package org.apache.gravitino.catalog.glue; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_ACCESS_KEY_ID; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_GLUE_CATALOG_ID; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_GLUE_ENDPOINT; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_REGION; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_SECRET_ACCESS_KEY; +import static org.apache.gravitino.catalog.glue.GlueConstants.DEFAULT_TABLE_FORMAT; +import static org.apache.gravitino.catalog.glue.GlueConstants.DEFAULT_TABLE_FORMAT_VALUE; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE_FILTER; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE_FILTER_ALL; +import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; +import static org.apache.gravitino.connector.PropertyEntry.stringRequiredPropertyEntry; + import com.google.common.collect.ImmutableMap; import java.util.Map; import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; -/** - * Properties metadata for the AWS Glue Data Catalog connector. - * - * <p>TODO PR-02: add required properties (aws-region, aws-glue-catalog-id) and optional properties - * (credentials, endpoint override, default-table-format, table-type-filter). - */ +/** Properties metadata for the AWS Glue Data Catalog connector catalog-level configuration. */ public class GlueCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { - private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = ImmutableMap.of(); + private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = + ImmutableMap.<String, PropertyEntry<?>>builder() + .put( + AWS_REGION, + stringRequiredPropertyEntry( + AWS_REGION, + "AWS region for the Glue Data Catalog (e.g. us-east-1)", + true /* immutable */, + false /* hidden */)) + .put( + AWS_GLUE_CATALOG_ID, + stringRequiredPropertyEntry( + AWS_GLUE_CATALOG_ID, + "The 12-digit AWS account ID that owns the Glue catalog", + true /* immutable */, + false /* hidden */)) + .put( + AWS_ACCESS_KEY_ID, + stringOptionalPropertyEntry( + AWS_ACCESS_KEY_ID, + "AWS access key ID for static credential authentication." + + " When omitted the default credential chain is used.", + false /* immutable */, + null /* defaultValue */, + true /* hidden */)) + .put( + AWS_SECRET_ACCESS_KEY, + stringOptionalPropertyEntry( + AWS_SECRET_ACCESS_KEY, + "AWS secret access key paired with aws-access-key-id." + + " When omitted the default credential chain is used.", + false /* immutable */, + null /* defaultValue */, + true /* hidden */)) + .put( + AWS_GLUE_ENDPOINT, + stringOptionalPropertyEntry( + AWS_GLUE_ENDPOINT, + "Custom Glue endpoint URL for VPC endpoints or LocalStack testing" + + " (e.g. http://localhost:4566)", + false /* immutable */, + null /* defaultValue */, + false /* hidden */)) + .put( + DEFAULT_TABLE_FORMAT, + stringOptionalPropertyEntry( + DEFAULT_TABLE_FORMAT, + "Default format for tables created via createTable(). Accepted: iceberg, hive.", + false /* immutable */, + DEFAULT_TABLE_FORMAT_VALUE, + false /* hidden */)) + .put( + TABLE_TYPE_FILTER, + stringOptionalPropertyEntry( + TABLE_TYPE_FILTER, + "Comma-separated table types exposed by listTables() and loadTable()." + + " Accepted: all, hive, iceberg, delta, parquet.", + false /* immutable */, + TABLE_TYPE_FILTER_ALL, + false /* hidden */)) + .build(); @Override protected Map<String, PropertyEntry<?>> specificPropertyEntries() { diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueClientProvider.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueClientProvider.java new file mode 100644 index 0000000000..e865ec2287 --- /dev/null +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueClientProvider.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import java.net.URI; +import java.util.Map; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.GlueClientBuilder; + +/** + * Factory for creating AWS {@link GlueClient} instances from Gravitino catalog configuration. + * + * <p>Authentication priority: + * + * <ol> + * <li>Static credentials ({@code aws-access-key-id} + {@code aws-secret-access-key}) + * <li>Default credential chain (environment variables, instance profile, container credentials) + * </ol> + * + * <p>An optional endpoint override ({@code aws-glue-endpoint}) enables connectivity to VPC + * endpoints and LocalStack for integration testing. + */ +public final class GlueClientProvider { + + private GlueClientProvider() {} + + /** + * Builds a {@link GlueClient} from the given catalog configuration map. + * + * @param config Catalog configuration properties. + * @return A configured and ready-to-use {@link GlueClient}. + * @throws IllegalArgumentException if {@code aws-region} is missing or blank. + */ + public static GlueClient buildClient(Map<String, String> config) { + String region = config.get(GlueConstants.AWS_REGION); + Preconditions.checkArgument( + !Strings.isNullOrEmpty(region) && !region.isBlank(), + "Property '%s' is required to create a Glue client", + GlueConstants.AWS_REGION); + + GlueClientBuilder builder = GlueClient.builder().region(Region.of(region)); + + // Static credentials take priority over the default credential chain. + String accessKey = config.get(GlueConstants.AWS_ACCESS_KEY_ID); + String secretKey = config.get(GlueConstants.AWS_SECRET_ACCESS_KEY); + if (!Strings.isNullOrEmpty(accessKey) + && !accessKey.isBlank() + && !Strings.isNullOrEmpty(secretKey) + && !secretKey.isBlank()) { + builder.credentialsProvider( + StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))); + } else { + builder.credentialsProvider(DefaultCredentialsProvider.create()); + } + + // Optional custom endpoint override for VPC endpoints or LocalStack testing. + String endpoint = config.get(GlueConstants.AWS_GLUE_ENDPOINT); + if (!Strings.isNullOrEmpty(endpoint) && !endpoint.isBlank()) { + builder.endpointOverride(URI.create(endpoint)); + } + + return builder.build(); + } +} diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java new file mode 100644 index 0000000000..fde675afe5 --- /dev/null +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +/** Constant keys for the AWS Glue Data Catalog connector configuration and table properties. */ +public final class GlueConstants { + + // ------------------------------------------------------------------------- + // Catalog-level connection properties + // ------------------------------------------------------------------------- + + /** AWS region for the Glue Data Catalog (required). */ + public static final String AWS_REGION = "aws-region"; + + /** Glue catalog ID — the 12-digit AWS account ID (required). */ + public static final String AWS_GLUE_CATALOG_ID = "aws-glue-catalog-id"; + + /** AWS access key ID for static credential authentication (optional, sensitive). */ + public static final String AWS_ACCESS_KEY_ID = "aws-access-key-id"; + + /** AWS secret access key for static credential authentication (optional, sensitive). */ + public static final String AWS_SECRET_ACCESS_KEY = "aws-secret-access-key"; + + /** + * Custom Glue endpoint URL (optional). Used for VPC endpoints or LocalStack testing. Example: + * {@code http://localhost:4566} + */ + public static final String AWS_GLUE_ENDPOINT = "aws-glue-endpoint"; + + /** + * Default table format used when creating tables via Gravitino's {@code createTable()} API + * (optional). Accepted values: {@code iceberg}, {@code hive}. Defaults to {@code iceberg}. + */ + public static final String DEFAULT_TABLE_FORMAT = "default-table-format"; + + /** Default value for {@link #DEFAULT_TABLE_FORMAT}. */ + public static final String DEFAULT_TABLE_FORMAT_VALUE = "iceberg"; + + /** + * Comma-separated list of table types exposed by {@code listTables()} and {@code loadTable()} + * (optional). Accepted values: {@code all}, {@code hive}, {@code iceberg}, {@code delta}, {@code + * parquet}. Defaults to {@code all}. + */ + public static final String TABLE_TYPE_FILTER = "table-type-filter"; + + /** Default value for {@link #TABLE_TYPE_FILTER}: expose all table types. */ + public static final String TABLE_TYPE_FILTER_ALL = "all"; + + // ------------------------------------------------------------------------- + // Glue Table.parameters() keys (passthrough properties) + // ------------------------------------------------------------------------- + + /** + * Glue table type parameter key. Common values: {@code ICEBERG}, {@code HIVE}, {@code DELTA}, + * {@code PARQUET}, {@code VIRTUAL_VIEW}. + */ + public static final String TABLE_TYPE = "table_type"; + + /** Iceberg table metadata location stored in Glue {@code Table.parameters()}. */ + public static final String METADATA_LOCATION = "metadata_location"; + + /** Storage location for the table data. */ + public static final String LOCATION = "location"; + + private GlueConstants() {} +} diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTablePropertiesMetadata.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTablePropertiesMetadata.java index bec87e2e36..2439d9a139 100644 --- a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTablePropertiesMetadata.java +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTablePropertiesMetadata.java @@ -18,6 +18,11 @@ */ package org.apache.gravitino.catalog.glue; +import static org.apache.gravitino.catalog.glue.GlueConstants.LOCATION; +import static org.apache.gravitino.catalog.glue.GlueConstants.METADATA_LOCATION; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE; +import static org.apache.gravitino.connector.PropertyEntry.stringOptionalPropertyEntry; + import com.google.common.collect.ImmutableMap; import java.util.Map; import org.apache.gravitino.connector.BasePropertiesMetadata; @@ -26,12 +31,41 @@ import org.apache.gravitino.connector.PropertyEntry; /** * Properties metadata for Glue tables. * - * <p>TODO PR-02: support passthrough of Glue Table.parameters() keys such as {@code table_type} and - * {@code metadata_location} for Iceberg, Delta, and other formats. + * <p>Defines well-known Glue {@code Table.parameters()} keys that Gravitino exposes. All entries + * are optional and mutable, reflecting that Glue stores them as free-form key-value pairs. Unknown + * parameters from {@code Table.parameters()} are passed through transparently by the catalog + * operations layer and are not validated here. */ public class GlueTablePropertiesMetadata extends BasePropertiesMetadata { - private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = ImmutableMap.of(); + private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = + ImmutableMap.<String, PropertyEntry<?>>builder() + .put( + TABLE_TYPE, + stringOptionalPropertyEntry( + TABLE_TYPE, + "Glue table type stored in Table.parameters(). Common values:" + + " ICEBERG, HIVE, DELTA, PARQUET.", + false /* immutable */, + null /* defaultValue */, + false /* hidden */)) + .put( + METADATA_LOCATION, + stringOptionalPropertyEntry( + METADATA_LOCATION, + "Iceberg metadata file location stored in Table.parameters().", + false /* immutable */, + null /* defaultValue */, + false /* hidden */)) + .put( + LOCATION, + stringOptionalPropertyEntry( + LOCATION, + "Storage location for the table data.", + false /* immutable */, + null /* defaultValue */, + false /* hidden */)) + .build(); @Override protected Map<String, PropertyEntry<?>> specificPropertyEntries() { diff --git a/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/TestGlueClientProvider.java b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/TestGlueClientProvider.java new file mode 100644 index 0000000000..022a155690 --- /dev/null +++ b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/TestGlueClientProvider.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_ACCESS_KEY_ID; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_GLUE_ENDPOINT; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_REGION; +import static org.apache.gravitino.catalog.glue.GlueConstants.AWS_SECRET_ACCESS_KEY; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.glue.GlueClient; + +class TestGlueClientProvider { + + @Test + void testBuildClientWithStaticCredentials() { + Map<String, String> config = new HashMap<>(); + config.put(AWS_REGION, "us-east-1"); + config.put(AWS_ACCESS_KEY_ID, "AKIAIOSFODNN7EXAMPLE"); + config.put(AWS_SECRET_ACCESS_KEY, "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"); + + GlueClient client = GlueClientProvider.buildClient(config); + assertNotNull(client); + client.close(); + } + + @Test + void testBuildClientWithDefaultCredentialChain() { + // Without explicit credentials the default chain is used. + Map<String, String> config = new HashMap<>(); + config.put(AWS_REGION, "eu-west-1"); + + GlueClient client = GlueClientProvider.buildClient(config); + assertNotNull(client); + client.close(); + } + + @Test + void testBuildClientWithEndpointOverride() { + Map<String, String> config = new HashMap<>(); + config.put(AWS_REGION, "us-east-1"); + config.put(AWS_ACCESS_KEY_ID, "test"); + config.put(AWS_SECRET_ACCESS_KEY, "test"); + config.put(AWS_GLUE_ENDPOINT, "http://localhost:4566"); + + GlueClient client = GlueClientProvider.buildClient(config); + assertNotNull(client); + client.close(); + } + + @Test + void testBuildClientMissingRegionThrows() { + Map<String, String> config = new HashMap<>(); + // No AWS_REGION set. + + assertThrows(IllegalArgumentException.class, () -> GlueClientProvider.buildClient(config)); + } + + @Test + void testBuildClientBlankRegionThrows() { + Map<String, String> config = new HashMap<>(); + config.put(AWS_REGION, " "); + + assertThrows(IllegalArgumentException.class, () -> GlueClientProvider.buildClient(config)); + } + + @Test + void testBuildClientOnlyAccessKeyFallsBackToDefaultChain() { + // Only one of the key pair provided → both must be present to use static creds. + // Falls back to default chain, which just builds without error. + Map<String, String> config = new HashMap<>(); + config.put(AWS_REGION, "ap-southeast-1"); + config.put(AWS_ACCESS_KEY_ID, "AKIAIOSFODNN7EXAMPLE"); + // No AWS_SECRET_ACCESS_KEY → default chain is used instead. + + GlueClient client = GlueClientProvider.buildClient(config); + assertNotNull(client); + client.close(); + } +}
