Copilot commented on code in PR #10781: URL: https://github.com/apache/gravitino/pull/10781#discussion_r3077826577
########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTypeConverter.java: ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; + +/** + * Converts between AWS Glue / Hive type strings and Gravitino {@link Type} objects. + * + * <p>Glue stores column types as Hive type strings (e.g. {@code "bigint"}, {@code "decimal(10,2)"}, + * {@code "array<string>"}). This converter handles all primitive types natively; complex and + * unknown types fall back to {@link Types.ExternalType} to preserve the original string. + */ +public final class GlueTypeConverter { + + private GlueTypeConverter() {} + + /** + * Converts a Glue/Hive type string to a Gravitino {@link Type}. + * + * @param glueType the Hive type string from {@code Column.type()} (case-insensitive) + * @return the corresponding Gravitino {@link Type}; unknown types become {@link + * Types.ExternalType} + */ + public static Type toGravitino(String glueType) { + if (glueType == null || glueType.isEmpty()) { + return Types.ExternalType.of(""); + } + String lower = glueType.trim().toLowerCase(java.util.Locale.ROOT); Review Comment: `toGravitino` uses a fully-qualified `java.util.Locale.ROOT`. Since there’s no name collision here, this should be a normal import (`import java.util.Locale;`) and then `toLowerCase(Locale.ROOT)` to match the project’s “no FQNs in code” convention. ########## catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueTableIT.java: ########## @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.CreateTableRequest; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.DeleteTableRequest; +import software.amazon.awssdk.services.glue.model.GetTableRequest; +import software.amazon.awssdk.services.glue.model.Order; +import software.amazon.awssdk.services.glue.model.SerDeInfo; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; +import software.amazon.awssdk.services.glue.model.TableInput; + +/** + * Runs {@link AbstractGlueTableTest} scenarios against a real AWS Glue endpoint. + * + * <p>This test is tagged {@code gravitino-aws-test} and is <b>skipped by default</b>. To run it, + * set the following environment variables and pass {@code -PrunAwsTests} to Gradle: + * + * <ul> + * <li>{@code AWS_ACCESS_KEY_ID} + * <li>{@code AWS_SECRET_ACCESS_KEY} + * <li>{@code AWS_DEFAULT_REGION} (e.g. {@code us-east-1}) + * <li>{@code GLUE_CATALOG_ID} (12-digit AWS account ID; optional) + * </ul> + * + * <p>Each test creates a real Glue table in a pre-created database, retrieves it via the API, + * converts it to a {@link GlueTable}, and asserts the field mapping. The table (and schema) is + * deleted in {@link #cleanup} regardless of test outcome. + */ +@Tag("gravitino-aws-test") +class AwsGlueTableIT extends AbstractGlueTableTest { + + private static GlueClient glueClient; + private static String catalogId; + private static String testSchemaName; + + private static final String INPUT_FMT = "org.apache.hadoop.mapred.TextInputFormat"; + private static final String OUTPUT_FMT = + "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"; + private static final String SERDE = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; + private static final String LOCATION = "s3://my-bucket/warehouse/"; + + @BeforeAll + static void initClient() { + Map<String, String> config = new HashMap<>(); + config.put( + GlueConstants.AWS_REGION, System.getenv().getOrDefault("AWS_DEFAULT_REGION", "us-east-1")); + String accessKey = System.getenv("AWS_ACCESS_KEY_ID"); + String secretKey = System.getenv("AWS_SECRET_ACCESS_KEY"); + if (accessKey != null && secretKey != null) { + config.put(GlueConstants.AWS_ACCESS_KEY_ID, accessKey); + config.put(GlueConstants.AWS_SECRET_ACCESS_KEY, secretKey); + } + glueClient = GlueClientProvider.buildClient(config); + catalogId = System.getenv("GLUE_CATALOG_ID"); + + // Create a dedicated test schema once per test class. + testSchemaName = "aws_glue_table_it_" + System.currentTimeMillis(); + CreateDatabaseRequest.Builder dbReq = + CreateDatabaseRequest.builder() + .databaseInput( + software.amazon.awssdk.services.glue.model.DatabaseInput.builder() + .name(testSchemaName) + .description("schema for AwsGlueTableIT") + .build()); + if (catalogId != null) { + dbReq.catalogId(catalogId); + } + glueClient.createDatabase(dbReq.build()); + } + + @Override + protected Table provideHiveTable(String schemaName, String tableName) { + TableInput input = + TableInput.builder() + .name(tableName) + .description("a hive table") + .tableType("EXTERNAL_TABLE") + .storageDescriptor( + StorageDescriptor.builder() + .columns( + Column.builder().name("id").type("bigint").comment("primary key").build(), + Column.builder().name("name").type("string").build()) + .location(LOCATION + tableName) + .inputFormat(INPUT_FMT) + .outputFormat(OUTPUT_FMT) + .serdeInfo(SerDeInfo.builder().serializationLibrary(SERDE).build()) + .bucketColumns("id") + .numberOfBuckets(4) + .sortColumns(Order.builder().column("name").sortOrder(1).build()) + .build()) + .partitionKeys(Column.builder().name("dt").type("date").build()) + .parameters(Map.of("created_by", "aws_glue_table_it")) + .build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + @Override + protected Table provideIcebergTable(String schemaName, String tableName) { + TableInput input = + TableInput.builder() + .name(tableName) + .tableType("EXTERNAL_TABLE") + .storageDescriptor(StorageDescriptor.builder().build()) + .parameters( + Map.of( + GlueConstants.TABLE_FORMAT, "ICEBERG", + GlueConstants.METADATA_LOCATION, "s3://bucket/path/metadata/v1.metadata.json")) + .build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + @Override + protected Table provideMinimalTable(String schemaName, String tableName) { + TableInput input = TableInput.builder().name(tableName).build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + private Table retrieveTable(String tableName) { + GetTableRequest.Builder getReq = + GetTableRequest.builder().databaseName(testSchemaName).name(tableName); + if (catalogId != null) { + getReq.catalogId(catalogId); + } + return glueClient.getTable(getReq.build()).table(); + } + + @Override + protected void cleanup(String schemaName, String tableName) { + try { + DeleteTableRequest.Builder req = + DeleteTableRequest.builder().databaseName(testSchemaName).name(tableName); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.deleteTable(req.build()); + } catch (Exception ignored) { + // Best-effort cleanup + } Review Comment: `cleanup(...)` catches a generic `Exception` and ignores it. Even for best-effort cleanup, it’s better to catch the specific AWS SDK exceptions you expect (e.g., `software.amazon.awssdk.services.glue.model.GlueException` / `SdkException`) so unexpected programming errors aren’t silently swallowed. ########## catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/SyntheticGlueSchemaTest.java: ########## @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.time.Instant; +import java.util.Map; +import software.amazon.awssdk.services.glue.model.Database; + +/** + * Runs {@link AbstractGlueSchemaTest} scenarios using AWS SDK builders to create {@link Database} + * objects directly — no network or AWS credentials required. + * + * <p>This verifies that the {@link GlueSchema#fromGlueDatabase} conversion logic works correctly + * for typical Glue API response shapes. + */ +class SyntheticGlueSchemaTest extends AbstractGlueSchemaTest { Review Comment: Test class name doesn’t follow the repository convention of using a `TestXxx` prefix for test classes. Renaming this to `TestSyntheticGlueSchema` (or similar) would keep test discovery consistent across modules. ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTypeConverter.java: ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; + +/** + * Converts between AWS Glue / Hive type strings and Gravitino {@link Type} objects. + * + * <p>Glue stores column types as Hive type strings (e.g. {@code "bigint"}, {@code "decimal(10,2)"}, + * {@code "array<string>"}). This converter handles all primitive types natively; complex and + * unknown types fall back to {@link Types.ExternalType} to preserve the original string. + */ +public final class GlueTypeConverter { + + private GlueTypeConverter() {} + + /** + * Converts a Glue/Hive type string to a Gravitino {@link Type}. + * + * @param glueType the Hive type string from {@code Column.type()} (case-insensitive) + * @return the corresponding Gravitino {@link Type}; unknown types become {@link + * Types.ExternalType} + */ + public static Type toGravitino(String glueType) { + if (glueType == null || glueType.isEmpty()) { + return Types.ExternalType.of(""); + } + String lower = glueType.trim().toLowerCase(java.util.Locale.ROOT); + + switch (lower) { + case "boolean": + return Types.BooleanType.get(); + case "tinyint": + return Types.ByteType.get(); + case "smallint": + return Types.ShortType.get(); + case "int": + case "integer": + return Types.IntegerType.get(); + case "bigint": + return Types.LongType.get(); + case "float": + return Types.FloatType.get(); + case "double": + return Types.DoubleType.get(); + case "string": + return Types.StringType.get(); + case "date": + return Types.DateType.get(); + case "timestamp": + return Types.TimestampType.withoutTimeZone(); + case "binary": + return Types.BinaryType.get(); + case "interval_year_month": + return Types.IntervalYearType.get(); + case "interval_day_time": + return Types.IntervalDayType.get(); + default: + break; + } + + // char(N) + if (lower.startsWith("char(") && lower.endsWith(")")) { + int length = Integer.parseInt(lower.substring(5, lower.length() - 1).trim()); + return Types.FixedCharType.of(length); + } + // varchar(N) + if (lower.startsWith("varchar(") && lower.endsWith(")")) { + int length = Integer.parseInt(lower.substring(8, lower.length() - 1).trim()); + return Types.VarCharType.of(length); + } + // decimal(P,S) or decimal(P, S) + if (lower.startsWith("decimal(") && lower.endsWith(")")) { + String inner = lower.substring(8, lower.length() - 1); + String[] parts = inner.split(",", 2); + int precision = Integer.parseInt(parts[0].trim()); + int scale = parts.length > 1 ? Integer.parseInt(parts[1].trim()) : 0; + return Types.DecimalType.of(precision, scale); + } + + // Complex types (array<...>, map<...>, struct<...>, uniontype<...>) and anything unknown + // are preserved as ExternalType so the original string survives the round-trip. + return Types.ExternalType.of(glueType); + } + + /** + * Converts a Gravitino {@link Type} back to a Glue/Hive type string. + * + * @param type the Gravitino type + * @return the Hive type string + * @throws IllegalArgumentException if the type has no known Glue representation + */ + public static String fromGravitino(Type type) { + if (type instanceof Types.BooleanType) return "boolean"; + if (type instanceof Types.ByteType) return "tinyint"; + if (type instanceof Types.ShortType) return "smallint"; + if (type instanceof Types.IntegerType) return "int"; + if (type instanceof Types.LongType) return "bigint"; + if (type instanceof Types.FloatType) return "float"; + if (type instanceof Types.DoubleType) return "double"; + if (type instanceof Types.StringType) return "string"; + if (type instanceof Types.DateType) return "date"; + if (type instanceof Types.TimestampType) return "timestamp"; + if (type instanceof Types.BinaryType) return "binary"; + if (type instanceof Types.IntervalYearType) return "interval_year_month"; + if (type instanceof Types.IntervalDayType) return "interval_day_time"; Review Comment: `fromGravitino` currently returns `"timestamp"` for any `Types.TimestampType`, including `timestamp_tz` and precision-qualified timestamps. This loses semantics (time zone) and can corrupt schemas on round-trip. Consider either (a) mapping `Types.TimestampType` with `hasTimeZone()` to `"timestamp_tz"` (and including precision if set), or (b) rejecting unsupported variants with a clear exception message instead of silently downcasting to `timestamp`. ########## catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueSchemaIT.java: ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.Database; +import software.amazon.awssdk.services.glue.model.DatabaseInput; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.GetDatabaseRequest; + +/** + * Runs {@link AbstractGlueSchemaTest} scenarios against a real AWS Glue endpoint. + * + * <p>This test is tagged {@code gravitino-aws-test} and is <b>skipped by default</b>. To run it, + * set the following environment variables and pass {@code -PrunAwsTests} to Gradle: + * + * <ul> + * <li>{@code AWS_ACCESS_KEY_ID} + * <li>{@code AWS_SECRET_ACCESS_KEY} + * <li>{@code AWS_DEFAULT_REGION} (e.g. {@code us-east-1}) + * <li>{@code GLUE_CATALOG_ID} (12-digit AWS account ID; optional) + * </ul> + * + * <p>Each test creates a real Glue database, retrieves it via the API (getting a real serialized + * response), converts it to a {@link GlueSchema}, and asserts the field mapping. The database is + * deleted in {@link #cleanup} regardless of test outcome. + */ +@Tag("gravitino-aws-test") +class AwsGlueSchemaIT extends AbstractGlueSchemaTest { Review Comment: Test class name doesn’t follow the repository convention of using a `TestXxx` prefix for test classes. Consider renaming to `TestAwsGlueSchema` (or similar). ########## catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueTableIT.java: ########## @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.CreateTableRequest; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.DeleteTableRequest; +import software.amazon.awssdk.services.glue.model.GetTableRequest; +import software.amazon.awssdk.services.glue.model.Order; +import software.amazon.awssdk.services.glue.model.SerDeInfo; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; +import software.amazon.awssdk.services.glue.model.TableInput; + +/** + * Runs {@link AbstractGlueTableTest} scenarios against a real AWS Glue endpoint. + * + * <p>This test is tagged {@code gravitino-aws-test} and is <b>skipped by default</b>. To run it, + * set the following environment variables and pass {@code -PrunAwsTests} to Gradle: + * + * <ul> + * <li>{@code AWS_ACCESS_KEY_ID} + * <li>{@code AWS_SECRET_ACCESS_KEY} + * <li>{@code AWS_DEFAULT_REGION} (e.g. {@code us-east-1}) + * <li>{@code GLUE_CATALOG_ID} (12-digit AWS account ID; optional) + * </ul> + * + * <p>Each test creates a real Glue table in a pre-created database, retrieves it via the API, + * converts it to a {@link GlueTable}, and asserts the field mapping. The table (and schema) is + * deleted in {@link #cleanup} regardless of test outcome. + */ +@Tag("gravitino-aws-test") +class AwsGlueTableIT extends AbstractGlueTableTest { Review Comment: Test class name doesn’t follow the repository convention of using a `TestXxx` prefix for test classes. Consider renaming to `TestAwsGlueTable` (or similar). ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTable.java: ########## @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import static org.apache.gravitino.catalog.glue.GlueConstants.INPUT_FORMAT; +import static org.apache.gravitino.catalog.glue.GlueConstants.LOCATION; +import static org.apache.gravitino.catalog.glue.GlueConstants.OUTPUT_FORMAT; +import static org.apache.gravitino.catalog.glue.GlueConstants.SERDE_LIB; +import static org.apache.gravitino.catalog.glue.GlueConstants.SERDE_NAME; +import static org.apache.gravitino.catalog.glue.GlueConstants.SERDE_PARAMETER_PREFIX; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import lombok.ToString; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.connector.BaseTable; +import org.apache.gravitino.connector.TableOperations; +import org.apache.gravitino.meta.AuditInfo; +import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.expressions.NamedReference; +import org.apache.gravitino.rel.expressions.distributions.Distribution; +import org.apache.gravitino.rel.expressions.distributions.Distributions; +import org.apache.gravitino.rel.expressions.sorts.SortDirection; +import org.apache.gravitino.rel.expressions.sorts.SortOrder; +import org.apache.gravitino.rel.expressions.sorts.SortOrders; +import org.apache.gravitino.rel.expressions.transforms.Transform; +import org.apache.gravitino.rel.expressions.transforms.Transforms; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; + +/** + * Represents an AWS Glue {@link Table} as a Gravitino table. + * + * <p>All entries in {@code Table.parameters()} pass through intact (including {@code table_type}, + * {@code metadata_location}, etc.), so downstream tools can correctly identify the table format. + * StorageDescriptor fields (location, formats, SerDe) are surfaced as additional properties. + */ +@ToString +public class GlueTable extends BaseTable { + + private GlueTable() {} + + @Override + protected TableOperations newOps() { + // Partition operations are deferred to PR-06. + throw new UnsupportedOperationException( + "Partition operations are not yet supported for GlueTable"); + } + + /** + * Converts an AWS Glue {@link Table} to a {@link GlueTable}. + * + * <p>Column assembly: + * + * <ol> + * <li>Data columns from {@code storageDescriptor.columns()} (Hive-format tables). + * <li>Partition columns from {@code table.partitionKeys()} appended after data columns. + * </ol> + * + * <p>For Iceberg-format tables the StorageDescriptor columns are typically empty; all metadata + * (including {@code table_type=ICEBERG} and {@code metadata_location}) is in {@code + * table.parameters()} and passes through as-is. + * + * @param glueTable the Glue Table returned by the AWS SDK + * @return a populated {@link GlueTable} + */ + public static GlueTable fromGlueTable(Table glueTable) { + StorageDescriptor sd = glueTable.storageDescriptor(); + + // --- Columns --- + List<Column> columns = new ArrayList<>(); + if (sd != null && sd.hasColumns()) { + for (software.amazon.awssdk.services.glue.model.Column c : sd.columns()) { + columns.add(GlueColumn.fromGlueColumn(c)); + } + } + List<String> partitionColNames = new ArrayList<>(); + if (glueTable.hasPartitionKeys()) { + for (software.amazon.awssdk.services.glue.model.Column pk : glueTable.partitionKeys()) { + columns.add(GlueColumn.fromGlueColumn(pk)); + partitionColNames.add(pk.name()); + } + } + + // --- Partitioning --- + Transform[] partitioning = + partitionColNames.stream().map(Transforms::identity).toArray(Transform[]::new); + + // --- Distribution (bucket) --- + Distribution distribution = Distributions.NONE; + if (sd != null && sd.hasBucketColumns() && sd.numberOfBuckets() > 0) { + distribution = + Distributions.hash( + sd.numberOfBuckets(), + sd.bucketColumns().stream() + .map(NamedReference::field) + .toArray(org.apache.gravitino.rel.expressions.Expression[]::new)); + } + + // --- Sort orders --- + SortOrder[] sortOrders = SortOrders.NONE; + if (sd != null && sd.hasSortColumns()) { + sortOrders = + sd.sortColumns().stream() + .map( + o -> + SortOrders.of( + NamedReference.field(o.column()), + o.sortOrder() == 1 ? SortDirection.ASCENDING : SortDirection.DESCENDING)) + .toArray(SortOrder[]::new); Review Comment: Potential NPE due to AWS SDK boxed integers: `sd.numberOfBuckets()` and `o.sortOrder()` are likely nullable `Integer` values. The current code auto-unboxes them (`> 0`, `== 1`), which can throw if Glue omits the field. Consider null-checking (or using `hasNumberOfBuckets()` / `hasSortColumns()` + null-safe defaults) before comparing/unboxing. ########## catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueTableIT.java: ########## @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.CreateTableRequest; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.DeleteTableRequest; +import software.amazon.awssdk.services.glue.model.GetTableRequest; +import software.amazon.awssdk.services.glue.model.Order; +import software.amazon.awssdk.services.glue.model.SerDeInfo; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; +import software.amazon.awssdk.services.glue.model.TableInput; + +/** + * Runs {@link AbstractGlueTableTest} scenarios against a real AWS Glue endpoint. + * + * <p>This test is tagged {@code gravitino-aws-test} and is <b>skipped by default</b>. To run it, + * set the following environment variables and pass {@code -PrunAwsTests} to Gradle: + * + * <ul> + * <li>{@code AWS_ACCESS_KEY_ID} + * <li>{@code AWS_SECRET_ACCESS_KEY} + * <li>{@code AWS_DEFAULT_REGION} (e.g. {@code us-east-1}) + * <li>{@code GLUE_CATALOG_ID} (12-digit AWS account ID; optional) + * </ul> + * + * <p>Each test creates a real Glue table in a pre-created database, retrieves it via the API, + * converts it to a {@link GlueTable}, and asserts the field mapping. The table (and schema) is + * deleted in {@link #cleanup} regardless of test outcome. + */ +@Tag("gravitino-aws-test") +class AwsGlueTableIT extends AbstractGlueTableTest { + + private static GlueClient glueClient; + private static String catalogId; + private static String testSchemaName; + + private static final String INPUT_FMT = "org.apache.hadoop.mapred.TextInputFormat"; + private static final String OUTPUT_FMT = + "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"; + private static final String SERDE = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; + private static final String LOCATION = "s3://my-bucket/warehouse/"; + + @BeforeAll + static void initClient() { + Map<String, String> config = new HashMap<>(); + config.put( + GlueConstants.AWS_REGION, System.getenv().getOrDefault("AWS_DEFAULT_REGION", "us-east-1")); + String accessKey = System.getenv("AWS_ACCESS_KEY_ID"); + String secretKey = System.getenv("AWS_SECRET_ACCESS_KEY"); + if (accessKey != null && secretKey != null) { + config.put(GlueConstants.AWS_ACCESS_KEY_ID, accessKey); + config.put(GlueConstants.AWS_SECRET_ACCESS_KEY, secretKey); + } + glueClient = GlueClientProvider.buildClient(config); + catalogId = System.getenv("GLUE_CATALOG_ID"); + + // Create a dedicated test schema once per test class. + testSchemaName = "aws_glue_table_it_" + System.currentTimeMillis(); + CreateDatabaseRequest.Builder dbReq = + CreateDatabaseRequest.builder() + .databaseInput( + software.amazon.awssdk.services.glue.model.DatabaseInput.builder() + .name(testSchemaName) + .description("schema for AwsGlueTableIT") + .build()); + if (catalogId != null) { + dbReq.catalogId(catalogId); + } + glueClient.createDatabase(dbReq.build()); + } + + @Override + protected Table provideHiveTable(String schemaName, String tableName) { + TableInput input = + TableInput.builder() + .name(tableName) + .description("a hive table") + .tableType("EXTERNAL_TABLE") + .storageDescriptor( + StorageDescriptor.builder() + .columns( + Column.builder().name("id").type("bigint").comment("primary key").build(), + Column.builder().name("name").type("string").build()) + .location(LOCATION + tableName) + .inputFormat(INPUT_FMT) + .outputFormat(OUTPUT_FMT) + .serdeInfo(SerDeInfo.builder().serializationLibrary(SERDE).build()) + .bucketColumns("id") + .numberOfBuckets(4) + .sortColumns(Order.builder().column("name").sortOrder(1).build()) + .build()) + .partitionKeys(Column.builder().name("dt").type("date").build()) + .parameters(Map.of("created_by", "aws_glue_table_it")) + .build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + @Override + protected Table provideIcebergTable(String schemaName, String tableName) { + TableInput input = + TableInput.builder() + .name(tableName) + .tableType("EXTERNAL_TABLE") + .storageDescriptor(StorageDescriptor.builder().build()) + .parameters( + Map.of( + GlueConstants.TABLE_FORMAT, "ICEBERG", + GlueConstants.METADATA_LOCATION, "s3://bucket/path/metadata/v1.metadata.json")) + .build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + @Override + protected Table provideMinimalTable(String schemaName, String tableName) { + TableInput input = TableInput.builder().name(tableName).build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + private Table retrieveTable(String tableName) { + GetTableRequest.Builder getReq = + GetTableRequest.builder().databaseName(testSchemaName).name(tableName); + if (catalogId != null) { + getReq.catalogId(catalogId); + } + return glueClient.getTable(getReq.build()).table(); + } + + @Override + protected void cleanup(String schemaName, String tableName) { + try { + DeleteTableRequest.Builder req = + DeleteTableRequest.builder().databaseName(testSchemaName).name(tableName); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.deleteTable(req.build()); + } catch (Exception ignored) { + // Best-effort cleanup + } + } + + @AfterAll + static void cleanupSchema() { + try { + DeleteDatabaseRequest.Builder dbReq = DeleteDatabaseRequest.builder().name(testSchemaName); + if (catalogId != null) { + dbReq.catalogId(catalogId); + } + glueClient.deleteDatabase(dbReq.build()); + } catch (Exception ignored) { + // Best-effort cleanup Review Comment: `@AfterAll cleanupSchema()` deletes the database but never closes the shared `GlueClient`. This can leak HTTP resources/threads when these tests are run. Consider closing `glueClient` in `@AfterAll` (ideally in a `finally` block) after cleanup completes. ```suggestion // Best-effort cleanup } finally { if (glueClient != null) { try { glueClient.close(); } catch (Exception ignored) { // Best-effort cleanup } } ``` ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTablePropertiesMetadata.java: ########## @@ -26,12 +30,35 @@ /** * Properties metadata for Glue tables. * - * <p>TODO PR-02: support passthrough of Glue Table.parameters() keys such as {@code table_type} and - * {@code metadata_location} for Iceberg, Delta, and other formats. + * <p>Defines well-known Glue {@code Table.parameters()} keys that Gravitino exposes. All entries + * are optional and mutable, reflecting that Glue stores them as free-form key-value pairs. Unknown + * parameters from {@code Table.parameters()} are passed through transparently by the catalog + * operations layer and are not validated here. + * + * <p>Note: storage location ({@code StorageDescriptor.location}) varies by table format and is + * handled per-format in the Table CRUD layer, not declared here. */ public class GlueTablePropertiesMetadata extends BasePropertiesMetadata { - private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = ImmutableMap.of(); + private static final Map<String, PropertyEntry<?>> PROPERTIES_METADATA = + ImmutableMap.<String, PropertyEntry<?>>builder() + .put( + TABLE_FORMAT, + stringOptionalPropertyEntry( + TABLE_FORMAT, + "Table format stored in Table.parameters(). Common values: iceberg, hive.", Review Comment: The description for `TABLE_FORMAT` says common values are `iceberg, hive`, but the constant/Javadocs and tests use Glue’s typical uppercase values like `ICEBERG`. Consider aligning the description with the actual expected values/casing to avoid confusing users. ```suggestion "Table format stored in Table.parameters(). Common values: ICEBERG, HIVE.", ``` ########## catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueSchemaIT.java: ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.Database; +import software.amazon.awssdk.services.glue.model.DatabaseInput; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.GetDatabaseRequest; + +/** + * Runs {@link AbstractGlueSchemaTest} scenarios against a real AWS Glue endpoint. + * + * <p>This test is tagged {@code gravitino-aws-test} and is <b>skipped by default</b>. To run it, + * set the following environment variables and pass {@code -PrunAwsTests} to Gradle: + * + * <ul> + * <li>{@code AWS_ACCESS_KEY_ID} + * <li>{@code AWS_SECRET_ACCESS_KEY} + * <li>{@code AWS_DEFAULT_REGION} (e.g. {@code us-east-1}) + * <li>{@code GLUE_CATALOG_ID} (12-digit AWS account ID; optional) + * </ul> + * + * <p>Each test creates a real Glue database, retrieves it via the API (getting a real serialized + * response), converts it to a {@link GlueSchema}, and asserts the field mapping. The database is + * deleted in {@link #cleanup} regardless of test outcome. + */ +@Tag("gravitino-aws-test") +class AwsGlueSchemaIT extends AbstractGlueSchemaTest { + + private static GlueClient glueClient; + private static String catalogId; + + @BeforeAll + static void initClient() { + Map<String, String> config = new HashMap<>(); + config.put( + GlueConstants.AWS_REGION, System.getenv().getOrDefault("AWS_DEFAULT_REGION", "us-east-1")); + String accessKey = System.getenv("AWS_ACCESS_KEY_ID"); + String secretKey = System.getenv("AWS_SECRET_ACCESS_KEY"); + if (accessKey != null && secretKey != null) { + config.put(GlueConstants.AWS_ACCESS_KEY_ID, accessKey); + config.put(GlueConstants.AWS_SECRET_ACCESS_KEY, secretKey); + } + glueClient = GlueClientProvider.buildClient(config); + catalogId = System.getenv("GLUE_CATALOG_ID"); + } Review Comment: `AwsGlueSchemaIT` creates a static `GlueClient` in `@BeforeAll` but never closes it. Consider adding an `@AfterAll` method to close the client to avoid leaking HTTP resources/threads when these tests are executed. ########## catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/SyntheticGlueTableTest.java: ########## @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.time.Instant; +import java.util.Map; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.Order; +import software.amazon.awssdk.services.glue.model.SerDeInfo; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; + +/** + * Runs {@link AbstractGlueTableTest} scenarios using AWS SDK builders — no network or credentials + * required. + */ +class SyntheticGlueTableTest extends AbstractGlueTableTest { Review Comment: Test class name doesn’t follow the repository convention of using a `TestXxx` prefix for test classes. Renaming this to `TestSyntheticGlueTable` (or similar) would keep test discovery consistent across modules. ########## catalogs/catalog-glue/build.gradle.kts: ########## @@ -37,6 +37,7 @@ dependencies { implementation(libs.aws.glue) implementation(libs.aws.sts) + implementation(libs.commons.lang3) implementation(libs.guava) Review Comment: This module adds a new `commons-lang3` dependency, but the usage appears limited to `StringUtils.isNotBlank(...)` checks. If possible, prefer existing utilities already available in the module (e.g., Guava `Strings.isNullOrEmpty`, or simple `trim().isEmpty()` checks) to avoid expanding the dependency surface area for the connector. ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueSchema.java: ########## @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import lombok.ToString; +import org.apache.gravitino.connector.BaseSchema; +import org.apache.gravitino.meta.AuditInfo; +import software.amazon.awssdk.services.glue.model.Database; + +/** Represents an AWS Glue Database as a Gravitino {@link org.apache.gravitino.Schema}. */ +@ToString +public class GlueSchema extends BaseSchema { + + private GlueSchema() {} + + /** + * Converts an AWS Glue {@link Database} to a {@link GlueSchema}. + * + * <p>Field mapping: + * + * <ul> + * <li>{@code Database.name()} → {@code name} + * <li>{@code Database.description()} → {@code comment} (nullable) + * <li>{@code Database.parameters()} → {@code properties} + * <li>{@code Database.createTime()} → {@code auditInfo.createTime} + * </ul> + * + * @param database the Glue Database returned by the AWS SDK + * @return a populated {@link GlueSchema} + */ + public static GlueSchema fromGlueDatabase(Database database) { + AuditInfo auditInfo = AuditInfo.builder().withCreateTime(database.createTime()).build(); + + return GlueSchema.builder() + .withName(database.name()) + .withComment(database.description()) + .withProperties(database.parameters()) + .withAuditInfo(auditInfo) Review Comment: `fromGlueDatabase` passes `database.parameters()` straight through to `withProperties(...)`. In Glue responses this can be null when no parameters are set, which would make `schema.properties()` null and can lead to NPEs in callers that assume a map. Consider defaulting to an empty map when `database.parameters()` is null. ########## catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueSchemaIT.java: ########## @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.Database; +import software.amazon.awssdk.services.glue.model.DatabaseInput; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.GetDatabaseRequest; + +/** + * Runs {@link AbstractGlueSchemaTest} scenarios against a real AWS Glue endpoint. + * + * <p>This test is tagged {@code gravitino-aws-test} and is <b>skipped by default</b>. To run it, + * set the following environment variables and pass {@code -PrunAwsTests} to Gradle: + * + * <ul> + * <li>{@code AWS_ACCESS_KEY_ID} + * <li>{@code AWS_SECRET_ACCESS_KEY} + * <li>{@code AWS_DEFAULT_REGION} (e.g. {@code us-east-1}) + * <li>{@code GLUE_CATALOG_ID} (12-digit AWS account ID; optional) + * </ul> + * + * <p>Each test creates a real Glue database, retrieves it via the API (getting a real serialized + * response), converts it to a {@link GlueSchema}, and asserts the field mapping. The database is + * deleted in {@link #cleanup} regardless of test outcome. + */ +@Tag("gravitino-aws-test") +class AwsGlueSchemaIT extends AbstractGlueSchemaTest { + + private static GlueClient glueClient; + private static String catalogId; + + @BeforeAll + static void initClient() { + Map<String, String> config = new HashMap<>(); + config.put( + GlueConstants.AWS_REGION, System.getenv().getOrDefault("AWS_DEFAULT_REGION", "us-east-1")); + String accessKey = System.getenv("AWS_ACCESS_KEY_ID"); + String secretKey = System.getenv("AWS_SECRET_ACCESS_KEY"); + if (accessKey != null && secretKey != null) { + config.put(GlueConstants.AWS_ACCESS_KEY_ID, accessKey); + config.put(GlueConstants.AWS_SECRET_ACCESS_KEY, secretKey); + } + glueClient = GlueClientProvider.buildClient(config); + catalogId = System.getenv("GLUE_CATALOG_ID"); + } + + @Override + protected Database provideDatabase(String name, String description, Map<String, String> params) { + CreateDatabaseRequest.Builder req = + CreateDatabaseRequest.builder() + .databaseInput( + DatabaseInput.builder() + .name(name) + .description(description) + .parameters(params) + .build()); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createDatabase(req.build()); + + GetDatabaseRequest.Builder getReq = GetDatabaseRequest.builder().name(name); + if (catalogId != null) { + getReq.catalogId(catalogId); + } + return glueClient.getDatabase(getReq.build()).database(); + } + + @Override + protected void cleanup(String name) { + try { + DeleteDatabaseRequest.Builder req = DeleteDatabaseRequest.builder().name(name); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.deleteDatabase(req.build()); + } catch (Exception ignored) { + // Best-effort cleanup + } Review Comment: `cleanup(...)` catches a generic `Exception` and ignores it. Even in tests, swallowing all exceptions can hide real issues (e.g., mis-typed database name). Consider catching the specific AWS SDK exception types expected during deletion, and/or logging at debug level when cleanup fails. ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java: ########## @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +/** Constant keys for the AWS Glue Data Catalog connector configuration and table properties. */ +public final class GlueConstants { + + // ------------------------------------------------------------------------- + // Catalog-level connection properties + // ------------------------------------------------------------------------- + + /** AWS region for the Glue Data Catalog (required). */ + public static final String AWS_REGION = "aws-region"; + + /** + * Glue catalog ID — the 12-digit AWS account ID (optional). When omitted, defaults to the + * caller's AWS account ID. + */ + public static final String AWS_GLUE_CATALOG_ID = "aws-glue-catalog-id"; + + /** AWS access key ID for static credential authentication (optional, sensitive). */ + public static final String AWS_ACCESS_KEY_ID = "aws-access-key-id"; + + /** AWS secret access key for static credential authentication (optional, sensitive). */ + public static final String AWS_SECRET_ACCESS_KEY = "aws-secret-access-key"; + + /** + * Custom Glue endpoint URL (optional). Used for VPC endpoints or LocalStack testing. Example: + * {@code http://localhost:4566} + */ + public static final String AWS_GLUE_ENDPOINT = "aws-glue-endpoint"; + + /** + * Default table format used when creating tables via Gravitino's {@code createTable()} API + * (optional). Accepted values: {@code iceberg}, {@code hive}. Defaults to {@code hive}. + */ + public static final String DEFAULT_TABLE_FORMAT = "default-table-format"; + + /** Default value for {@link #DEFAULT_TABLE_FORMAT}: {@code "hive"}. */ + public static final String DEFAULT_TABLE_FORMAT_VALUE = "hive"; + + /** + * Comma-separated list of table formats exposed by {@code listTables()} and {@code loadTable()} + * (optional). Defaults to {@code all}. + */ + public static final String TABLE_FORMAT_FILTER = "table-format-filter"; + + /** Default value for {@link #TABLE_FORMAT_FILTER}: expose all table formats. */ + public static final String DEFAULT_TABLE_FORMAT_FILTER = "all"; + + // ------------------------------------------------------------------------- + // Glue Table.parameters() keys (passthrough properties) + // ------------------------------------------------------------------------- + + /** + * Glue table format parameter key stored in {@code Table.parameters()}. Common values: {@code + * ICEBERG}, {@code HIVE}, {@code DELTA}, {@code PARQUET} (uppercase, as stored by Glue). Used + * internally to determine the table format when reading Glue tables. + */ + public static final String TABLE_FORMAT = "table-format"; + Review Comment: `TABLE_FORMAT` is defined as `"table-format"`, but elsewhere in the repo (and in the glue design doc) the Glue/Hive convention for format routing is `Table.parameters()["table_type"]` (e.g., `table_type=ICEBERG`). Using a different key will break downstream format detection and makes the Javadoc in `GlueTable` misleading (it mentions `table_type`). Consider renaming this constant to `TABLE_TYPE` (or similar) and setting the value to `"table_type"`, then update the tests/usages accordingly. ```suggestion * Glue table format-routing parameter key stored in {@code Table.parameters()}. Common values: * {@code ICEBERG}, {@code HIVE}, {@code DELTA}, {@code PARQUET} (uppercase, as stored by Glue). * Used internally to determine the table format when reading Glue tables. */ public static final String TABLE_TYPE_PARAMETER = "table_type"; /** * @deprecated Use {@link #TABLE_TYPE_PARAMETER} instead. */ @Deprecated public static final String TABLE_FORMAT = TABLE_TYPE_PARAMETER; ``` ########## catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTypeConverter.java: ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; + +/** + * Converts between AWS Glue / Hive type strings and Gravitino {@link Type} objects. + * + * <p>Glue stores column types as Hive type strings (e.g. {@code "bigint"}, {@code "decimal(10,2)"}, + * {@code "array<string>"}). This converter handles all primitive types natively; complex and + * unknown types fall back to {@link Types.ExternalType} to preserve the original string. + */ +public final class GlueTypeConverter { + + private GlueTypeConverter() {} + + /** + * Converts a Glue/Hive type string to a Gravitino {@link Type}. + * + * @param glueType the Hive type string from {@code Column.type()} (case-insensitive) + * @return the corresponding Gravitino {@link Type}; unknown types become {@link + * Types.ExternalType} + */ + public static Type toGravitino(String glueType) { + if (glueType == null || glueType.isEmpty()) { + return Types.ExternalType.of(""); + } + String lower = glueType.trim().toLowerCase(java.util.Locale.ROOT); + + switch (lower) { + case "boolean": + return Types.BooleanType.get(); + case "tinyint": + return Types.ByteType.get(); + case "smallint": + return Types.ShortType.get(); + case "int": + case "integer": + return Types.IntegerType.get(); + case "bigint": + return Types.LongType.get(); + case "float": + return Types.FloatType.get(); + case "double": + return Types.DoubleType.get(); + case "string": + return Types.StringType.get(); + case "date": + return Types.DateType.get(); + case "timestamp": + return Types.TimestampType.withoutTimeZone(); + case "binary": + return Types.BinaryType.get(); + case "interval_year_month": + return Types.IntervalYearType.get(); + case "interval_day_time": + return Types.IntervalDayType.get(); + default: + break; + } + + // char(N) + if (lower.startsWith("char(") && lower.endsWith(")")) { + int length = Integer.parseInt(lower.substring(5, lower.length() - 1).trim()); + return Types.FixedCharType.of(length); + } + // varchar(N) + if (lower.startsWith("varchar(") && lower.endsWith(")")) { + int length = Integer.parseInt(lower.substring(8, lower.length() - 1).trim()); + return Types.VarCharType.of(length); + } + // decimal(P,S) or decimal(P, S) + if (lower.startsWith("decimal(") && lower.endsWith(")")) { + String inner = lower.substring(8, lower.length() - 1); + String[] parts = inner.split(",", 2); + int precision = Integer.parseInt(parts[0].trim()); + int scale = parts.length > 1 ? Integer.parseInt(parts[1].trim()) : 0; + return Types.DecimalType.of(precision, scale); + } Review Comment: `toGravitino` is documented to fall back to `ExternalType` for unknown types, but the `char(...)` / `varchar(...)` / `decimal(...)` parsing paths will throw `NumberFormatException` (or `ArrayIndexOutOfBoundsException`) for malformed inputs like `decimal(x,y)` or `char()`. It would be safer to wrap these parses and, on any parse failure, return `Types.ExternalType.of(glueType)` instead of throwing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
