This is an automated email from the ASF dual-hosted git repository. diqiu50 pushed a commit to branch glue-pr03 in repository https://gitbox.apache.org/repos/asf/gravitino.git
commit 8c8928dade1e1bb3b8d9fb88dc8b6247c0539ce8 Author: diqiu50 <[email protected]> AuthorDate: Mon Apr 13 21:32:23 2026 +0800 feat(catalog-glue): add GlueSchema and GlueTable model classes with tests Add model layer for catalog-glue (PR-03): - GlueConstants: storage descriptor and table-format constants - GlueTypeConverter: Glue/Hive type string to Gravitino Type mapping - GlueSchema: maps AWS Glue Database -> Gravitino BaseSchema - GlueColumn: maps AWS Glue Column -> Gravitino BaseColumn - GlueTable: maps AWS Glue Table -> Gravitino BaseTable (columns, partitioning, distribution, sort orders, properties) Test architecture: abstract base class + two implementations: - SyntheticGlueXxxTest: SDK builder, no network, always runs - AwsGlueXxxIT: real AWS Glue API, tagged gravitino-aws-test, skipped by default --- catalogs/catalog-glue/build.gradle.kts | 4 + .../apache/gravitino/catalog/glue/GlueColumn.java | 82 ++++++++ .../gravitino/catalog/glue/GlueConstants.java | 31 +++ .../apache/gravitino/catalog/glue/GlueSchema.java | 82 ++++++++ .../apache/gravitino/catalog/glue/GlueTable.java | 211 +++++++++++++++++++++ .../gravitino/catalog/glue/GlueTypeConverter.java | 140 ++++++++++++++ .../catalog/glue/AbstractGlueSchemaTest.java | 116 +++++++++++ .../catalog/glue/AbstractGlueTableTest.java | 187 ++++++++++++++++++ .../gravitino/catalog/glue/AwsGlueSchemaIT.java | 104 ++++++++++ .../gravitino/catalog/glue/AwsGlueTableIT.java | 204 ++++++++++++++++++++ .../catalog/glue/SyntheticGlueSchemaTest.java | 43 +++++ .../catalog/glue/SyntheticGlueTableTest.java | 90 +++++++++ .../catalog/glue/TestGlueTypeConverter.java | 163 ++++++++++++++++ 13 files changed, 1457 insertions(+) diff --git a/catalogs/catalog-glue/build.gradle.kts b/catalogs/catalog-glue/build.gradle.kts index b7787263c5..6bb7cc6d77 100644 --- a/catalogs/catalog-glue/build.gradle.kts +++ b/catalogs/catalog-glue/build.gradle.kts @@ -91,6 +91,10 @@ tasks.test { val skipITs = project.hasProperty("skipITs") if (skipITs) { exclude("**/integration/test/**") + // Skip AWS integration tests (require real AWS credentials). + useJUnitPlatform { + excludeTags("gravitino-aws-test") + } } else { dependsOn(tasks.jar) } diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueColumn.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueColumn.java new file mode 100644 index 0000000000..1d90e2b505 --- /dev/null +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueColumn.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import lombok.EqualsAndHashCode; +import org.apache.gravitino.connector.BaseColumn; +import software.amazon.awssdk.services.glue.model.Column; + +/** Represents an AWS Glue {@link Column} as a Gravitino column. */ +@EqualsAndHashCode(callSuper = true) +public class GlueColumn extends BaseColumn { + + private GlueColumn() {} + + /** + * Converts an AWS Glue {@link Column} to a {@link GlueColumn}. + * + * <p>Field mapping: + * + * <ul> + * <li>{@code Column.name()} → {@code name} + * <li>{@code Column.type()} → {@code dataType} via {@link GlueTypeConverter#toGravitino} + * <li>{@code Column.comment()} → {@code comment} (nullable) + * <li>Glue has no nullability metadata → {@code nullable = true} always + * <li>Glue has no auto-increment concept → {@code autoIncrement = false} always + * </ul> + * + * @param glueColumn the Glue Column returned by the AWS SDK + * @return a populated {@link GlueColumn} + */ + public static GlueColumn fromGlueColumn(Column glueColumn) { + return GlueColumn.builder() + .withName(glueColumn.name()) + .withType(GlueTypeConverter.toGravitino(glueColumn.type())) + .withComment(glueColumn.comment()) + .withNullable(true) + .build(); + } + + /** Builder for {@link GlueColumn}. */ + public static class Builder extends BaseColumnBuilder<Builder, GlueColumn> { + + private Builder() {} + + @Override + protected GlueColumn internalBuild() { + GlueColumn col = new GlueColumn(); + col.name = name; + col.comment = comment; + col.dataType = dataType; + col.nullable = nullable; + col.autoIncrement = autoIncrement; + col.defaultValue = defaultValue == null ? DEFAULT_VALUE_NOT_SET : defaultValue; + return col; + } + } + + /** + * Creates a new {@link Builder}. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } +} diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java index 1aaa10599a..2244f7552a 100644 --- a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueConstants.java @@ -78,5 +78,36 @@ public final class GlueConstants { /** Iceberg table metadata location stored in Glue {@code Table.parameters()}. */ public static final String METADATA_LOCATION = "metadata_location"; + // ------------------------------------------------------------------------- + // StorageDescriptor-derived table properties (stored in Gravitino properties map) + // ------------------------------------------------------------------------- + + /** Table data location from {@code StorageDescriptor.location()}. */ + public static final String LOCATION = "location"; + + /** InputFormat class name from {@code StorageDescriptor.inputFormat()}. */ + public static final String INPUT_FORMAT = "input-format"; + + /** OutputFormat class name from {@code StorageDescriptor.outputFormat()}. */ + public static final String OUTPUT_FORMAT = "output-format"; + + /** SerDe library class name from {@code StorageDescriptor.serDeInfo().serializationLibrary()}. */ + public static final String SERDE_LIB = "serde-lib"; + + /** SerDe name from {@code StorageDescriptor.serDeInfo().name()}. */ + public static final String SERDE_NAME = "serde-name"; + + /** + * Prefix for SerDe parameters from {@code StorageDescriptor.serDeInfo().parameters()}. Each SerDe + * parameter key {@code k} is stored as {@code "serde.parameter." + k}. + */ + public static final String SERDE_PARAMETER_PREFIX = "serde.parameter."; + + /** + * Glue table type from {@code Table.tableType()}. Common values: {@code EXTERNAL_TABLE}, {@code + * MANAGED_TABLE}. + */ + public static final String TABLE_TYPE = "table-type"; + private GlueConstants() {} } diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueSchema.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueSchema.java new file mode 100644 index 0000000000..d2dcb36bb2 --- /dev/null +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueSchema.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import lombok.ToString; +import org.apache.gravitino.connector.BaseSchema; +import org.apache.gravitino.meta.AuditInfo; +import software.amazon.awssdk.services.glue.model.Database; + +/** Represents an AWS Glue Database as a Gravitino {@link org.apache.gravitino.Schema}. */ +@ToString +public class GlueSchema extends BaseSchema { + + private GlueSchema() {} + + /** + * Converts an AWS Glue {@link Database} to a {@link GlueSchema}. + * + * <p>Field mapping: + * + * <ul> + * <li>{@code Database.name()} → {@code name} + * <li>{@code Database.description()} → {@code comment} (nullable) + * <li>{@code Database.parameters()} → {@code properties} + * <li>{@code Database.createTime()} → {@code auditInfo.createTime} + * </ul> + * + * @param database the Glue Database returned by the AWS SDK + * @return a populated {@link GlueSchema} + */ + public static GlueSchema fromGlueDatabase(Database database) { + AuditInfo auditInfo = AuditInfo.builder().withCreateTime(database.createTime()).build(); + + return GlueSchema.builder() + .withName(database.name()) + .withComment(database.description()) + .withProperties(database.parameters()) + .withAuditInfo(auditInfo) + .build(); + } + + /** Builder for {@link GlueSchema}. */ + public static class Builder extends BaseSchemaBuilder<Builder, GlueSchema> { + + private Builder() {} + + @Override + protected GlueSchema internalBuild() { + GlueSchema schema = new GlueSchema(); + schema.name = name; + schema.comment = comment; + schema.properties = properties; + schema.auditInfo = auditInfo; + return schema; + } + } + + /** + * Creates a new {@link Builder}. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } +} diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTable.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTable.java new file mode 100644 index 0000000000..49472d4386 --- /dev/null +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTable.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import static org.apache.gravitino.catalog.glue.GlueConstants.INPUT_FORMAT; +import static org.apache.gravitino.catalog.glue.GlueConstants.LOCATION; +import static org.apache.gravitino.catalog.glue.GlueConstants.OUTPUT_FORMAT; +import static org.apache.gravitino.catalog.glue.GlueConstants.SERDE_LIB; +import static org.apache.gravitino.catalog.glue.GlueConstants.SERDE_NAME; +import static org.apache.gravitino.catalog.glue.GlueConstants.SERDE_PARAMETER_PREFIX; +import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import lombok.ToString; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.connector.BaseTable; +import org.apache.gravitino.connector.TableOperations; +import org.apache.gravitino.meta.AuditInfo; +import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.expressions.NamedReference; +import org.apache.gravitino.rel.expressions.distributions.Distribution; +import org.apache.gravitino.rel.expressions.distributions.Distributions; +import org.apache.gravitino.rel.expressions.sorts.SortDirection; +import org.apache.gravitino.rel.expressions.sorts.SortOrder; +import org.apache.gravitino.rel.expressions.sorts.SortOrders; +import org.apache.gravitino.rel.expressions.transforms.Transform; +import org.apache.gravitino.rel.expressions.transforms.Transforms; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; + +/** + * Represents an AWS Glue {@link Table} as a Gravitino table. + * + * <p>All entries in {@code Table.parameters()} pass through intact (including {@code table_type}, + * {@code metadata_location}, etc.), so downstream tools can correctly identify the table format. + * StorageDescriptor fields (location, formats, SerDe) are surfaced as additional properties. + */ +@ToString +public class GlueTable extends BaseTable { + + private GlueTable() {} + + @Override + protected TableOperations newOps() { + // Partition operations are deferred to PR-06. + throw new UnsupportedOperationException( + "Partition operations are not yet supported for GlueTable"); + } + + /** + * Converts an AWS Glue {@link Table} to a {@link GlueTable}. + * + * <p>Column assembly: + * + * <ol> + * <li>Data columns from {@code storageDescriptor.columns()} (Hive-format tables). + * <li>Partition columns from {@code table.partitionKeys()} appended after data columns. + * </ol> + * + * <p>For Iceberg-format tables the StorageDescriptor columns are typically empty; all metadata + * (including {@code table_type=ICEBERG} and {@code metadata_location}) is in {@code + * table.parameters()} and passes through as-is. + * + * @param glueTable the Glue Table returned by the AWS SDK + * @return a populated {@link GlueTable} + */ + public static GlueTable fromGlueTable(Table glueTable) { + StorageDescriptor sd = glueTable.storageDescriptor(); + + // --- Columns --- + List<Column> columns = new ArrayList<>(); + if (sd != null && sd.hasColumns()) { + for (software.amazon.awssdk.services.glue.model.Column c : sd.columns()) { + columns.add(GlueColumn.fromGlueColumn(c)); + } + } + List<String> partitionColNames = new ArrayList<>(); + if (glueTable.hasPartitionKeys()) { + for (software.amazon.awssdk.services.glue.model.Column pk : glueTable.partitionKeys()) { + columns.add(GlueColumn.fromGlueColumn(pk)); + partitionColNames.add(pk.name()); + } + } + + // --- Partitioning --- + Transform[] partitioning = + partitionColNames.stream().map(Transforms::identity).toArray(Transform[]::new); + + // --- Distribution (bucket) --- + Distribution distribution = Distributions.NONE; + if (sd != null && sd.hasBucketColumns() && sd.numberOfBuckets() > 0) { + distribution = + Distributions.hash( + sd.numberOfBuckets(), + sd.bucketColumns().stream() + .map(NamedReference::field) + .toArray(org.apache.gravitino.rel.expressions.Expression[]::new)); + } + + // --- Sort orders --- + SortOrder[] sortOrders = SortOrders.NONE; + if (sd != null && sd.hasSortColumns()) { + sortOrders = + sd.sortColumns().stream() + .map( + o -> + SortOrders.of( + NamedReference.field(o.column()), + o.sortOrder() == 1 ? SortDirection.ASCENDING : SortDirection.DESCENDING)) + .toArray(SortOrder[]::new); + } + + // --- Properties --- + Map<String, String> properties = new HashMap<>(); + if (glueTable.hasParameters()) { + properties.putAll(glueTable.parameters()); + } + if (StringUtils.isNotBlank(glueTable.tableType())) { + properties.put(TABLE_TYPE, glueTable.tableType()); + } + if (sd != null) { + putIfNotBlank(properties, LOCATION, sd.location()); + putIfNotBlank(properties, INPUT_FORMAT, sd.inputFormat()); + putIfNotBlank(properties, OUTPUT_FORMAT, sd.outputFormat()); + if (sd.serdeInfo() != null) { + putIfNotBlank(properties, SERDE_LIB, sd.serdeInfo().serializationLibrary()); + putIfNotBlank(properties, SERDE_NAME, sd.serdeInfo().name()); + if (sd.serdeInfo().parameters() != null) { + sd.serdeInfo() + .parameters() + .forEach((k, v) -> properties.put(SERDE_PARAMETER_PREFIX + k, v)); + } + } + } + + // --- AuditInfo --- + AuditInfo auditInfo = + AuditInfo.builder() + .withCreateTime(glueTable.createTime()) + .withLastModifiedTime(glueTable.updateTime()) + .build(); + + return GlueTable.builder() + .withName(glueTable.name()) + .withComment(glueTable.description()) + .withColumns(columns.toArray(new Column[0])) + .withProperties(properties) + .withPartitioning(partitioning) + .withDistribution(distribution) + .withSortOrders(sortOrders) + .withAuditInfo(auditInfo) + .build(); + } + + private static void putIfNotBlank(Map<String, String> map, String key, String value) { + if (StringUtils.isNotBlank(value)) { + map.put(key, value); + } + } + + /** Builder for {@link GlueTable}. */ + public static class Builder extends BaseTableBuilder<Builder, GlueTable> { + + private Builder() {} + + @Override + protected GlueTable internalBuild() { + GlueTable table = new GlueTable(); + table.name = name; + table.comment = comment; + table.columns = columns; + table.properties = properties; + table.partitioning = partitioning; + table.sortOrders = sortOrders; + table.distribution = distribution; + table.indexes = indexes; + table.auditInfo = auditInfo; + table.proxyPlugin = Optional.empty(); + return table; + } + } + + /** + * Creates a new {@link Builder}. + * + * @return a new builder instance + */ + public static Builder builder() { + return new Builder(); + } +} diff --git a/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTypeConverter.java b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTypeConverter.java new file mode 100644 index 0000000000..a421f4b662 --- /dev/null +++ b/catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTypeConverter.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; + +/** + * Converts between AWS Glue / Hive type strings and Gravitino {@link Type} objects. + * + * <p>Glue stores column types as Hive type strings (e.g. {@code "bigint"}, {@code "decimal(10,2)"}, + * {@code "array<string>"}). This converter handles all primitive types natively; complex and + * unknown types fall back to {@link Types.ExternalType} to preserve the original string. + */ +public final class GlueTypeConverter { + + private GlueTypeConverter() {} + + /** + * Converts a Glue/Hive type string to a Gravitino {@link Type}. + * + * @param glueType the Hive type string from {@code Column.type()} (case-insensitive) + * @return the corresponding Gravitino {@link Type}; unknown types become {@link + * Types.ExternalType} + */ + public static Type toGravitino(String glueType) { + if (glueType == null || glueType.isEmpty()) { + return Types.ExternalType.of(""); + } + String lower = glueType.trim().toLowerCase(java.util.Locale.ROOT); + + switch (lower) { + case "boolean": + return Types.BooleanType.get(); + case "tinyint": + return Types.ByteType.get(); + case "smallint": + return Types.ShortType.get(); + case "int": + case "integer": + return Types.IntegerType.get(); + case "bigint": + return Types.LongType.get(); + case "float": + return Types.FloatType.get(); + case "double": + return Types.DoubleType.get(); + case "string": + return Types.StringType.get(); + case "date": + return Types.DateType.get(); + case "timestamp": + return Types.TimestampType.withoutTimeZone(); + case "binary": + return Types.BinaryType.get(); + case "interval_year_month": + return Types.IntervalYearType.get(); + case "interval_day_time": + return Types.IntervalDayType.get(); + default: + break; + } + + // char(N) + if (lower.startsWith("char(") && lower.endsWith(")")) { + int length = Integer.parseInt(lower.substring(5, lower.length() - 1).trim()); + return Types.FixedCharType.of(length); + } + // varchar(N) + if (lower.startsWith("varchar(") && lower.endsWith(")")) { + int length = Integer.parseInt(lower.substring(8, lower.length() - 1).trim()); + return Types.VarCharType.of(length); + } + // decimal(P,S) or decimal(P, S) + if (lower.startsWith("decimal(") && lower.endsWith(")")) { + String inner = lower.substring(8, lower.length() - 1); + String[] parts = inner.split(",", 2); + int precision = Integer.parseInt(parts[0].trim()); + int scale = parts.length > 1 ? Integer.parseInt(parts[1].trim()) : 0; + return Types.DecimalType.of(precision, scale); + } + + // Complex types (array<...>, map<...>, struct<...>, uniontype<...>) and anything unknown + // are preserved as ExternalType so the original string survives the round-trip. + return Types.ExternalType.of(glueType); + } + + /** + * Converts a Gravitino {@link Type} back to a Glue/Hive type string. + * + * @param type the Gravitino type + * @return the Hive type string + * @throws IllegalArgumentException if the type has no known Glue representation + */ + public static String fromGravitino(Type type) { + if (type instanceof Types.BooleanType) return "boolean"; + if (type instanceof Types.ByteType) return "tinyint"; + if (type instanceof Types.ShortType) return "smallint"; + if (type instanceof Types.IntegerType) return "int"; + if (type instanceof Types.LongType) return "bigint"; + if (type instanceof Types.FloatType) return "float"; + if (type instanceof Types.DoubleType) return "double"; + if (type instanceof Types.StringType) return "string"; + if (type instanceof Types.DateType) return "date"; + if (type instanceof Types.TimestampType) return "timestamp"; + if (type instanceof Types.BinaryType) return "binary"; + if (type instanceof Types.IntervalYearType) return "interval_year_month"; + if (type instanceof Types.IntervalDayType) return "interval_day_time"; + if (type instanceof Types.FixedCharType) { + return "char(" + ((Types.FixedCharType) type).length() + ")"; + } + if (type instanceof Types.VarCharType) { + return "varchar(" + ((Types.VarCharType) type).length() + ")"; + } + if (type instanceof Types.DecimalType) { + Types.DecimalType d = (Types.DecimalType) type; + return "decimal(" + d.precision() + "," + d.scale() + ")"; + } + if (type instanceof Types.ExternalType) { + return ((Types.ExternalType) type).catalogString(); + } + throw new IllegalArgumentException("Unsupported Gravitino type for Glue: " + type); + } +} diff --git a/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AbstractGlueSchemaTest.java b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AbstractGlueSchemaTest.java new file mode 100644 index 0000000000..6e29b2df6f --- /dev/null +++ b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AbstractGlueSchemaTest.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Collections; +import java.util.Map; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.glue.model.Database; + +/** + * Abstract base for {@link GlueSchema} conversion tests. + * + * <p>Subclasses provide a {@link Database} object however they like (SDK builder, real AWS API, + * etc.). The test scenarios are defined once here and shared across all implementations. + */ +abstract class AbstractGlueSchemaTest { + + /** + * Returns a Glue {@link Database} with the given fields. Subclasses may create this via the SDK + * builder (synthetic) or by calling the real Glue API and retrieving the result. + */ + protected abstract Database provideDatabase( + String name, String description, Map<String, String> params); + + /** Clean up after each test (e.g. delete real Glue databases). Default: no-op. */ + protected void cleanup(String name) {} + + // ------------------------------------------------------------------------- + // Test scenarios + // ------------------------------------------------------------------------- + + @Test + void testAllFieldsMapped() { + String dbName = uniqueName("test_all_fields"); + Map<String, String> params = Map.of("owner", "alice", "env", "prod"); + Database db = provideDatabase(dbName, "a test database", params); + try { + GlueSchema schema = GlueSchema.fromGlueDatabase(db); + assertEquals(dbName, schema.name()); + assertEquals("a test database", schema.comment()); + assertEquals("alice", schema.properties().get("owner")); + assertEquals("prod", schema.properties().get("env")); + assertNotNull(schema.auditInfo()); + } finally { + cleanup(dbName); + } + } + + @Test + void testNullDescription() { + String dbName = uniqueName("test_null_desc"); + Database db = provideDatabase(dbName, null, Collections.emptyMap()); + try { + GlueSchema schema = GlueSchema.fromGlueDatabase(db); + assertNull(schema.comment()); + } finally { + cleanup(dbName); + } + } + + @Test + void testEmptyParameters() { + String dbName = uniqueName("test_empty_params"); + Database db = provideDatabase(dbName, "desc", Collections.emptyMap()); + try { + GlueSchema schema = GlueSchema.fromGlueDatabase(db); + assertNotNull(schema.properties()); + assertTrue(schema.properties().isEmpty()); + } finally { + cleanup(dbName); + } + } + + @Test + void testCreateTimeInAuditInfo() { + String dbName = uniqueName("test_audit"); + Database db = provideDatabase(dbName, null, Collections.emptyMap()); + try { + GlueSchema schema = GlueSchema.fromGlueDatabase(db); + // Glue always sets createTime; audit info must reflect it + assertNotNull(schema.auditInfo().createTime()); + } finally { + cleanup(dbName); + } + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + /** Returns a name unique enough to avoid collisions across parallel test runs. */ + protected String uniqueName(String base) { + return base + "_" + System.currentTimeMillis(); + } +} diff --git a/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AbstractGlueTableTest.java b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AbstractGlueTableTest.java new file mode 100644 index 0000000000..e6ee7f03fd --- /dev/null +++ b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AbstractGlueTableTest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.expressions.distributions.Distributions; +import org.apache.gravitino.rel.expressions.sorts.SortDirection; +import org.apache.gravitino.rel.expressions.sorts.SortOrders; +import org.apache.gravitino.rel.expressions.transforms.Transforms; +import org.apache.gravitino.rel.types.Types; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.glue.model.Table; + +/** + * Abstract base for {@link GlueTable} conversion tests. + * + * <p>Subclasses supply the {@link Table} object — either via SDK builder (synthetic) or via the + * real Glue API — while the test scenarios are defined once here. + */ +abstract class AbstractGlueTableTest { + + /** Returns a Hive-format Glue table with columns, partition keys, buckets, and sort columns. */ + protected abstract Table provideHiveTable(String schemaName, String tableName); + + /** Returns an Iceberg-format Glue table (empty StorageDescriptor columns). */ + protected abstract Table provideIcebergTable(String schemaName, String tableName); + + /** Returns a table with no StorageDescriptor (edge case). */ + protected abstract Table provideMinimalTable(String schemaName, String tableName); + + /** Clean up after each test. Default: no-op. */ + protected void cleanup(String schemaName, String tableName) {} + + // ------------------------------------------------------------------------- + // Test scenarios + // ------------------------------------------------------------------------- + + @Test + void testHiveTableColumnMapping() { + String schema = uniqueName("s"); + String table = uniqueName("hive_tbl"); + Table glueTable = provideHiveTable(schema, table); + try { + GlueTable t = GlueTable.fromGlueTable(glueTable); + assertEquals(table, t.name()); + + // data columns: id (bigint) + name (string); partition: dt (date) + assertEquals(3, t.columns().length); + Column id = t.columns()[0]; + assertEquals("id", id.name()); + assertEquals(Types.LongType.get(), id.dataType()); + + Column name = t.columns()[1]; + assertEquals("name", name.name()); + assertEquals(Types.StringType.get(), name.dataType()); + + Column dt = t.columns()[2]; + assertEquals("dt", dt.name()); + assertEquals(Types.DateType.get(), dt.dataType()); + } finally { + cleanup(schema, table); + } + } + + @Test + void testHiveTablePartitioning() { + String schema = uniqueName("s"); + String table = uniqueName("part_tbl"); + Table glueTable = provideHiveTable(schema, table); + try { + GlueTable t = GlueTable.fromGlueTable(glueTable); + assertEquals(1, t.partitioning().length); + assertEquals(Transforms.identity("dt"), t.partitioning()[0]); + } finally { + cleanup(schema, table); + } + } + + @Test + void testHiveTableDistribution() { + String schema = uniqueName("s"); + String table = uniqueName("bucket_tbl"); + Table glueTable = provideHiveTable(schema, table); + try { + GlueTable t = GlueTable.fromGlueTable(glueTable); + // 4 buckets on "id" + assertEquals(4, t.distribution().number()); + } finally { + cleanup(schema, table); + } + } + + @Test + void testHiveTableSortOrders() { + String schema = uniqueName("s"); + String table = uniqueName("sort_tbl"); + Table glueTable = provideHiveTable(schema, table); + try { + GlueTable t = GlueTable.fromGlueTable(glueTable); + assertEquals(1, t.sortOrder().length); + assertEquals(SortDirection.ASCENDING, t.sortOrder()[0].direction()); + } finally { + cleanup(schema, table); + } + } + + @Test + void testHiveTableStorageDescriptorProperties() { + String schema = uniqueName("s"); + String table = uniqueName("sd_props_tbl"); + Table glueTable = provideHiveTable(schema, table); + try { + GlueTable t = GlueTable.fromGlueTable(glueTable); + assertNotNull(t.properties().get(GlueConstants.LOCATION)); + assertNotNull(t.properties().get(GlueConstants.INPUT_FORMAT)); + assertNotNull(t.properties().get(GlueConstants.OUTPUT_FORMAT)); + assertNotNull(t.properties().get(GlueConstants.SERDE_LIB)); + assertEquals("EXTERNAL_TABLE", t.properties().get(GlueConstants.TABLE_TYPE)); + } finally { + cleanup(schema, table); + } + } + + @Test + void testIcebergTableParametersPassThrough() { + String schema = uniqueName("s"); + String table = uniqueName("iceberg_tbl"); + Table glueTable = provideIcebergTable(schema, table); + try { + GlueTable t = GlueTable.fromGlueTable(glueTable); + // Iceberg tables may have no data columns + assertEquals("ICEBERG", t.properties().get(GlueConstants.TABLE_FORMAT)); + assertNotNull(t.properties().get(GlueConstants.METADATA_LOCATION)); + // No partition transforms (Iceberg manages partitioning itself) + assertEquals(0, t.partitioning().length); + // No distribution / sort orders + assertEquals(Distributions.NONE, t.distribution()); + assertEquals(SortOrders.NONE.length, t.sortOrder().length); + } finally { + cleanup(schema, table); + } + } + + @Test + void testMinimalTableNoStorageDescriptor() { + String schema = uniqueName("s"); + String table = uniqueName("minimal_tbl"); + Table glueTable = provideMinimalTable(schema, table); + try { + GlueTable t = GlueTable.fromGlueTable(glueTable); + assertEquals(0, t.columns().length); + assertEquals(0, t.partitioning().length); + assertEquals(Distributions.NONE, t.distribution()); + assertTrue(t.properties().isEmpty() || !t.properties().containsKey(GlueConstants.LOCATION)); + } finally { + cleanup(schema, table); + } + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + protected String uniqueName(String base) { + return base + "_" + System.currentTimeMillis(); + } +} diff --git a/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueSchemaIT.java b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueSchemaIT.java new file mode 100644 index 0000000000..acdcf30cdb --- /dev/null +++ b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueSchemaIT.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.Database; +import software.amazon.awssdk.services.glue.model.DatabaseInput; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.GetDatabaseRequest; + +/** + * Runs {@link AbstractGlueSchemaTest} scenarios against a real AWS Glue endpoint. + * + * <p>This test is tagged {@code gravitino-aws-test} and is <b>skipped by default</b>. To run it, + * set the following environment variables and pass {@code -PrunAwsTests} to Gradle: + * + * <ul> + * <li>{@code AWS_ACCESS_KEY_ID} + * <li>{@code AWS_SECRET_ACCESS_KEY} + * <li>{@code AWS_DEFAULT_REGION} (e.g. {@code us-east-1}) + * <li>{@code GLUE_CATALOG_ID} (12-digit AWS account ID; optional) + * </ul> + * + * <p>Each test creates a real Glue database, retrieves it via the API (getting a real serialized + * response), converts it to a {@link GlueSchema}, and asserts the field mapping. The database is + * deleted in {@link #cleanup} regardless of test outcome. + */ +@Tag("gravitino-aws-test") +class AwsGlueSchemaIT extends AbstractGlueSchemaTest { + + private static GlueClient glueClient; + private static String catalogId; + + @BeforeAll + static void initClient() { + Map<String, String> config = new HashMap<>(); + config.put( + GlueConstants.AWS_REGION, System.getenv().getOrDefault("AWS_DEFAULT_REGION", "us-east-1")); + String accessKey = System.getenv("AWS_ACCESS_KEY_ID"); + String secretKey = System.getenv("AWS_SECRET_ACCESS_KEY"); + if (accessKey != null && secretKey != null) { + config.put(GlueConstants.AWS_ACCESS_KEY_ID, accessKey); + config.put(GlueConstants.AWS_SECRET_ACCESS_KEY, secretKey); + } + glueClient = GlueClientProvider.buildClient(config); + catalogId = System.getenv("GLUE_CATALOG_ID"); + } + + @Override + protected Database provideDatabase(String name, String description, Map<String, String> params) { + CreateDatabaseRequest.Builder req = + CreateDatabaseRequest.builder() + .databaseInput( + DatabaseInput.builder() + .name(name) + .description(description) + .parameters(params) + .build()); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createDatabase(req.build()); + + GetDatabaseRequest.Builder getReq = GetDatabaseRequest.builder().name(name); + if (catalogId != null) { + getReq.catalogId(catalogId); + } + return glueClient.getDatabase(getReq.build()).database(); + } + + @Override + protected void cleanup(String name) { + try { + DeleteDatabaseRequest.Builder req = DeleteDatabaseRequest.builder().name(name); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.deleteDatabase(req.build()); + } catch (Exception ignored) { + // Best-effort cleanup + } + } +} diff --git a/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueTableIT.java b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueTableIT.java new file mode 100644 index 0000000000..4e775a9c9e --- /dev/null +++ b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/AwsGlueTableIT.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.CreateTableRequest; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.DeleteTableRequest; +import software.amazon.awssdk.services.glue.model.GetTableRequest; +import software.amazon.awssdk.services.glue.model.Order; +import software.amazon.awssdk.services.glue.model.SerDeInfo; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; +import software.amazon.awssdk.services.glue.model.TableInput; + +/** + * Runs {@link AbstractGlueTableTest} scenarios against a real AWS Glue endpoint. + * + * <p>This test is tagged {@code gravitino-aws-test} and is <b>skipped by default</b>. To run it, + * set the following environment variables and pass {@code -PrunAwsTests} to Gradle: + * + * <ul> + * <li>{@code AWS_ACCESS_KEY_ID} + * <li>{@code AWS_SECRET_ACCESS_KEY} + * <li>{@code AWS_DEFAULT_REGION} (e.g. {@code us-east-1}) + * <li>{@code GLUE_CATALOG_ID} (12-digit AWS account ID; optional) + * </ul> + * + * <p>Each test creates a real Glue table in a pre-created database, retrieves it via the API, + * converts it to a {@link GlueTable}, and asserts the field mapping. The table (and schema) is + * deleted in {@link #cleanup} regardless of test outcome. + */ +@Tag("gravitino-aws-test") +class AwsGlueTableIT extends AbstractGlueTableTest { + + private static GlueClient glueClient; + private static String catalogId; + private static String testSchemaName; + + private static final String INPUT_FMT = "org.apache.hadoop.mapred.TextInputFormat"; + private static final String OUTPUT_FMT = + "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"; + private static final String SERDE = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; + private static final String LOCATION = "s3://my-bucket/warehouse/"; + + @BeforeAll + static void initClient() { + Map<String, String> config = new HashMap<>(); + config.put( + GlueConstants.AWS_REGION, System.getenv().getOrDefault("AWS_DEFAULT_REGION", "us-east-1")); + String accessKey = System.getenv("AWS_ACCESS_KEY_ID"); + String secretKey = System.getenv("AWS_SECRET_ACCESS_KEY"); + if (accessKey != null && secretKey != null) { + config.put(GlueConstants.AWS_ACCESS_KEY_ID, accessKey); + config.put(GlueConstants.AWS_SECRET_ACCESS_KEY, secretKey); + } + glueClient = GlueClientProvider.buildClient(config); + catalogId = System.getenv("GLUE_CATALOG_ID"); + + // Create a dedicated test schema once per test class. + testSchemaName = "aws_glue_table_it_" + System.currentTimeMillis(); + CreateDatabaseRequest.Builder dbReq = + CreateDatabaseRequest.builder() + .databaseInput( + software.amazon.awssdk.services.glue.model.DatabaseInput.builder() + .name(testSchemaName) + .description("schema for AwsGlueTableIT") + .build()); + if (catalogId != null) { + dbReq.catalogId(catalogId); + } + glueClient.createDatabase(dbReq.build()); + } + + @Override + protected Table provideHiveTable(String schemaName, String tableName) { + TableInput input = + TableInput.builder() + .name(tableName) + .description("a hive table") + .tableType("EXTERNAL_TABLE") + .storageDescriptor( + StorageDescriptor.builder() + .columns( + Column.builder().name("id").type("bigint").comment("primary key").build(), + Column.builder().name("name").type("string").build()) + .location(LOCATION + tableName) + .inputFormat(INPUT_FMT) + .outputFormat(OUTPUT_FMT) + .serdeInfo(SerDeInfo.builder().serializationLibrary(SERDE).build()) + .bucketColumns("id") + .numberOfBuckets(4) + .sortColumns(Order.builder().column("name").sortOrder(1).build()) + .build()) + .partitionKeys(Column.builder().name("dt").type("date").build()) + .parameters(Map.of("created_by", "aws_glue_table_it")) + .build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + @Override + protected Table provideIcebergTable(String schemaName, String tableName) { + TableInput input = + TableInput.builder() + .name(tableName) + .tableType("EXTERNAL_TABLE") + .storageDescriptor(StorageDescriptor.builder().build()) + .parameters( + Map.of( + GlueConstants.TABLE_FORMAT, "ICEBERG", + GlueConstants.METADATA_LOCATION, "s3://bucket/path/metadata/v1.metadata.json")) + .build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + @Override + protected Table provideMinimalTable(String schemaName, String tableName) { + TableInput input = TableInput.builder().name(tableName).build(); + + CreateTableRequest.Builder req = + CreateTableRequest.builder().databaseName(testSchemaName).tableInput(input); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.createTable(req.build()); + + return retrieveTable(tableName); + } + + private Table retrieveTable(String tableName) { + GetTableRequest.Builder getReq = + GetTableRequest.builder().databaseName(testSchemaName).name(tableName); + if (catalogId != null) { + getReq.catalogId(catalogId); + } + return glueClient.getTable(getReq.build()).table(); + } + + @Override + protected void cleanup(String schemaName, String tableName) { + try { + DeleteTableRequest.Builder req = + DeleteTableRequest.builder().databaseName(testSchemaName).name(tableName); + if (catalogId != null) { + req.catalogId(catalogId); + } + glueClient.deleteTable(req.build()); + } catch (Exception ignored) { + // Best-effort cleanup + } + } + + @AfterAll + static void cleanupSchema() { + try { + DeleteDatabaseRequest.Builder dbReq = DeleteDatabaseRequest.builder().name(testSchemaName); + if (catalogId != null) { + dbReq.catalogId(catalogId); + } + glueClient.deleteDatabase(dbReq.build()); + } catch (Exception ignored) { + // Best-effort cleanup + } + } +} diff --git a/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/SyntheticGlueSchemaTest.java b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/SyntheticGlueSchemaTest.java new file mode 100644 index 0000000000..7cccba6409 --- /dev/null +++ b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/SyntheticGlueSchemaTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.time.Instant; +import java.util.Map; +import software.amazon.awssdk.services.glue.model.Database; + +/** + * Runs {@link AbstractGlueSchemaTest} scenarios using AWS SDK builders to create {@link Database} + * objects directly — no network or AWS credentials required. + * + * <p>This verifies that the {@link GlueSchema#fromGlueDatabase} conversion logic works correctly + * for typical Glue API response shapes. + */ +class SyntheticGlueSchemaTest extends AbstractGlueSchemaTest { + + @Override + protected Database provideDatabase(String name, String description, Map<String, String> params) { + return Database.builder() + .name(name) + .description(description) + .parameters(params) + .createTime(Instant.now()) + .build(); + } +} diff --git a/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/SyntheticGlueTableTest.java b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/SyntheticGlueTableTest.java new file mode 100644 index 0000000000..552262c318 --- /dev/null +++ b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/SyntheticGlueTableTest.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import java.time.Instant; +import java.util.Map; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.Order; +import software.amazon.awssdk.services.glue.model.SerDeInfo; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.Table; + +/** + * Runs {@link AbstractGlueTableTest} scenarios using AWS SDK builders — no network or credentials + * required. + */ +class SyntheticGlueTableTest extends AbstractGlueTableTest { + + private static final String INPUT_FMT = "org.apache.hadoop.mapred.TextInputFormat"; + private static final String OUTPUT_FMT = + "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"; + private static final String SERDE = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; + private static final String LOCATION = "s3://my-bucket/warehouse/"; + + @Override + protected Table provideHiveTable(String schemaName, String tableName) { + return Table.builder() + .name(tableName) + .description("a hive table") + .tableType("EXTERNAL_TABLE") + .storageDescriptor( + StorageDescriptor.builder() + .columns( + Column.builder().name("id").type("bigint").comment("primary key").build(), + Column.builder().name("name").type("string").build()) + .location(LOCATION + tableName) + .inputFormat(INPUT_FMT) + .outputFormat(OUTPUT_FMT) + .serdeInfo(SerDeInfo.builder().serializationLibrary(SERDE).build()) + .bucketColumns("id") + .numberOfBuckets(4) + .sortColumns(Order.builder().column("name").sortOrder(1).build()) + .build()) + .partitionKeys(Column.builder().name("dt").type("date").build()) + .parameters(Map.of("created_by", "test")) + .createTime(Instant.now()) + .updateTime(Instant.now()) + .build(); + } + + @Override + protected Table provideIcebergTable(String schemaName, String tableName) { + return Table.builder() + .name(tableName) + .tableType("EXTERNAL_TABLE") + .storageDescriptor(StorageDescriptor.builder().build()) + .parameters( + Map.of( + GlueConstants.TABLE_FORMAT, "ICEBERG", + GlueConstants.METADATA_LOCATION, "s3://bucket/path/metadata/v1.metadata.json")) + .createTime(Instant.now()) + .updateTime(Instant.now()) + .build(); + } + + @Override + protected Table provideMinimalTable(String schemaName, String tableName) { + return Table.builder() + .name(tableName) + .createTime(Instant.now()) + .updateTime(Instant.now()) + .build(); + } +} diff --git a/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/TestGlueTypeConverter.java b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/TestGlueTypeConverter.java new file mode 100644 index 0000000000..eeb3b52574 --- /dev/null +++ b/catalogs/catalog-glue/src/test/java/org/apache/gravitino/catalog/glue/TestGlueTypeConverter.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.glue; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.apache.gravitino.rel.types.Type; +import org.apache.gravitino.rel.types.Types; +import org.junit.jupiter.api.Test; + +/** Unit tests for {@link GlueTypeConverter}. */ +class TestGlueTypeConverter { + + // ------------------------------------------------------------------------- + // toGravitino — primitive types + // ------------------------------------------------------------------------- + + @Test + void testPrimitiveTypes() { + assertEquals(Types.BooleanType.get(), GlueTypeConverter.toGravitino("boolean")); + assertEquals(Types.ByteType.get(), GlueTypeConverter.toGravitino("tinyint")); + assertEquals(Types.ShortType.get(), GlueTypeConverter.toGravitino("smallint")); + assertEquals(Types.IntegerType.get(), GlueTypeConverter.toGravitino("int")); + assertEquals(Types.IntegerType.get(), GlueTypeConverter.toGravitino("integer")); + assertEquals(Types.LongType.get(), GlueTypeConverter.toGravitino("bigint")); + assertEquals(Types.FloatType.get(), GlueTypeConverter.toGravitino("float")); + assertEquals(Types.DoubleType.get(), GlueTypeConverter.toGravitino("double")); + assertEquals(Types.StringType.get(), GlueTypeConverter.toGravitino("string")); + assertEquals(Types.DateType.get(), GlueTypeConverter.toGravitino("date")); + assertEquals(Types.TimestampType.withoutTimeZone(), GlueTypeConverter.toGravitino("timestamp")); + assertEquals(Types.BinaryType.get(), GlueTypeConverter.toGravitino("binary")); + assertEquals( + Types.IntervalYearType.get(), GlueTypeConverter.toGravitino("interval_year_month")); + assertEquals(Types.IntervalDayType.get(), GlueTypeConverter.toGravitino("interval_day_time")); + } + + @Test + void testCaseInsensitive() { + assertEquals(Types.LongType.get(), GlueTypeConverter.toGravitino("BIGINT")); + assertEquals(Types.StringType.get(), GlueTypeConverter.toGravitino("STRING")); + } + + // ------------------------------------------------------------------------- + // toGravitino — parameterised types + // ------------------------------------------------------------------------- + + @Test + void testCharType() { + assertEquals(Types.FixedCharType.of(10), GlueTypeConverter.toGravitino("char(10)")); + assertEquals(Types.FixedCharType.of(1), GlueTypeConverter.toGravitino("char(1)")); + } + + @Test + void testVarcharType() { + assertEquals(Types.VarCharType.of(255), GlueTypeConverter.toGravitino("varchar(255)")); + assertEquals(Types.VarCharType.of(65535), GlueTypeConverter.toGravitino("varchar(65535)")); + } + + @Test + void testDecimalType() { + assertEquals(Types.DecimalType.of(10, 2), GlueTypeConverter.toGravitino("decimal(10,2)")); + assertEquals(Types.DecimalType.of(38, 18), GlueTypeConverter.toGravitino("decimal(38, 18)")); + assertEquals(Types.DecimalType.of(5, 0), GlueTypeConverter.toGravitino("decimal(5)")); + } + + // ------------------------------------------------------------------------- + // toGravitino — complex / unknown types → ExternalType + // ------------------------------------------------------------------------- + + @Test + void testComplexTypesBecomesExternalType() { + assertInstanceOf(Types.ExternalType.class, GlueTypeConverter.toGravitino("array<string>")); + assertInstanceOf(Types.ExternalType.class, GlueTypeConverter.toGravitino("map<string,int>")); + assertInstanceOf( + Types.ExternalType.class, GlueTypeConverter.toGravitino("struct<id:bigint,name:string>")); + assertInstanceOf( + Types.ExternalType.class, GlueTypeConverter.toGravitino("uniontype<int,string>")); + assertInstanceOf( + Types.ExternalType.class, GlueTypeConverter.toGravitino("unknown_custom_type")); + } + + @Test + void testExternalTypePreservesOriginalString() { + String rawType = "array<map<string,int>>"; + Type type = GlueTypeConverter.toGravitino(rawType); + assertInstanceOf(Types.ExternalType.class, type); + assertEquals(rawType, ((Types.ExternalType) type).catalogString()); + } + + @Test + void testNullAndEmptyInput() { + assertInstanceOf(Types.ExternalType.class, GlueTypeConverter.toGravitino(null)); + assertInstanceOf(Types.ExternalType.class, GlueTypeConverter.toGravitino("")); + } + + // ------------------------------------------------------------------------- + // fromGravitino — round-trip + // ------------------------------------------------------------------------- + + @Test + void testRoundTripPrimitives() { + roundTrip("boolean", Types.BooleanType.get()); + roundTrip("tinyint", Types.ByteType.get()); + roundTrip("smallint", Types.ShortType.get()); + roundTrip("int", Types.IntegerType.get()); + roundTrip("bigint", Types.LongType.get()); + roundTrip("float", Types.FloatType.get()); + roundTrip("double", Types.DoubleType.get()); + roundTrip("string", Types.StringType.get()); + roundTrip("date", Types.DateType.get()); + roundTrip("timestamp", Types.TimestampType.withoutTimeZone()); + roundTrip("binary", Types.BinaryType.get()); + roundTrip("interval_year_month", Types.IntervalYearType.get()); + roundTrip("interval_day_time", Types.IntervalDayType.get()); + } + + @Test + void testRoundTripParameterised() { + assertEquals("char(10)", GlueTypeConverter.fromGravitino(Types.FixedCharType.of(10))); + assertEquals("varchar(255)", GlueTypeConverter.fromGravitino(Types.VarCharType.of(255))); + assertEquals("decimal(10,2)", GlueTypeConverter.fromGravitino(Types.DecimalType.of(10, 2))); + } + + @Test + void testFromGravitinoExternalType() { + String raw = "array<string>"; + assertEquals(raw, GlueTypeConverter.fromGravitino(Types.ExternalType.of(raw))); + } + + @Test + void testFromGravitinoUnsupportedTypeThrows() { + assertThrows( + IllegalArgumentException.class, + () -> GlueTypeConverter.fromGravitino(Types.NullType.get())); + } + + // ------------------------------------------------------------------------- + // helpers + // ------------------------------------------------------------------------- + + private static void roundTrip(String glueType, Type gravitinoType) { + assertEquals(gravitinoType, GlueTypeConverter.toGravitino(glueType)); + assertEquals(glueType, GlueTypeConverter.fromGravitino(gravitinoType)); + } +}
