diqiu50 commented on code in PR #10781:
URL: https://github.com/apache/gravitino/pull/10781#discussion_r3110834049


##########
catalogs/catalog-glue/src/main/java/org/apache/gravitino/catalog/glue/GlueTable.java:
##########
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.catalog.glue;
+
+import static org.apache.gravitino.catalog.glue.GlueConstants.INPUT_FORMAT;
+import static org.apache.gravitino.catalog.glue.GlueConstants.LOCATION;
+import static org.apache.gravitino.catalog.glue.GlueConstants.OUTPUT_FORMAT;
+import static org.apache.gravitino.catalog.glue.GlueConstants.SERDE_LIB;
+import static org.apache.gravitino.catalog.glue.GlueConstants.SERDE_NAME;
+import static 
org.apache.gravitino.catalog.glue.GlueConstants.SERDE_PARAMETER_PREFIX;
+import static org.apache.gravitino.catalog.glue.GlueConstants.TABLE_TYPE;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import lombok.ToString;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.gravitino.connector.BaseTable;
+import org.apache.gravitino.connector.TableOperations;
+import org.apache.gravitino.meta.AuditInfo;
+import org.apache.gravitino.rel.Column;
+import org.apache.gravitino.rel.expressions.NamedReference;
+import org.apache.gravitino.rel.expressions.distributions.Distribution;
+import org.apache.gravitino.rel.expressions.distributions.Distributions;
+import org.apache.gravitino.rel.expressions.sorts.SortDirection;
+import org.apache.gravitino.rel.expressions.sorts.SortOrder;
+import org.apache.gravitino.rel.expressions.sorts.SortOrders;
+import org.apache.gravitino.rel.expressions.transforms.Transform;
+import org.apache.gravitino.rel.expressions.transforms.Transforms;
+import software.amazon.awssdk.services.glue.model.StorageDescriptor;
+import software.amazon.awssdk.services.glue.model.Table;
+
+/**
+ * Represents an AWS Glue {@link Table} as a Gravitino table.
+ *
+ * <p>All entries in {@code Table.parameters()} pass through intact (including 
{@code table_type},
+ * {@code metadata_location}, etc.), so downstream tools can correctly 
identify the table format.
+ * StorageDescriptor fields (location, formats, SerDe) are surfaced as 
additional properties.
+ */
+@ToString
+public class GlueTable extends BaseTable {
+
+  private GlueTable() {}
+
+  @Override
+  protected TableOperations newOps() {
+    // Partition operations are deferred to PR-06.
+    throw new UnsupportedOperationException(
+        "Partition operations are not yet supported for GlueTable");
+  }
+
+  /**
+   * Converts an AWS Glue {@link Table} to a {@link GlueTable}.
+   *
+   * <p>Column assembly:
+   *
+   * <ol>
+   *   <li>Data columns from {@code storageDescriptor.columns()} (Hive-format 
tables).
+   *   <li>Partition columns from {@code table.partitionKeys()} appended after 
data columns.
+   * </ol>
+   *
+   * <p>For Iceberg-format tables the StorageDescriptor columns are typically 
empty; all metadata
+   * (including {@code table_type=ICEBERG} and {@code metadata_location}) is 
in {@code
+   * table.parameters()} and passes through as-is.
+   *
+   * @param glueTable the Glue Table returned by the AWS SDK
+   * @return a populated {@link GlueTable}
+   */
+  public static GlueTable fromGlueTable(Table glueTable) {
+    StorageDescriptor sd = glueTable.storageDescriptor();
+
+    // --- Columns ---
+    List<Column> columns = new ArrayList<>();
+    if (sd != null && sd.hasColumns()) {
+      for (software.amazon.awssdk.services.glue.model.Column c : sd.columns()) 
{
+        columns.add(GlueColumn.fromGlueColumn(c));
+      }
+    }
+    List<String> partitionColNames = new ArrayList<>();
+    if (glueTable.hasPartitionKeys()) {
+      for (software.amazon.awssdk.services.glue.model.Column pk : 
glueTable.partitionKeys()) {
+        columns.add(GlueColumn.fromGlueColumn(pk));
+        partitionColNames.add(pk.name());
+      }
+    }
+
+    // --- Partitioning ---
+    Transform[] partitioning =
+        
partitionColNames.stream().map(Transforms::identity).toArray(Transform[]::new);
+
+    // --- Distribution (bucket) ---
+    Distribution distribution = Distributions.NONE;
+    Integer numBuckets = sd != null ? sd.numberOfBuckets() : null;
+    if (sd != null && sd.hasBucketColumns() && numBuckets != null && 
numBuckets > 0) {
+      distribution =
+          Distributions.hash(

Review Comment:
   Yes



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to