Re: [PR] Flink: Supports specifying comment for iceberg fields in create table and addcolumn syntax using flinksql [iceberg]

via GitHub Mon, 04 Mar 2024 09:18:13 -0800


stevenzwu commented on code in PR #9606:
URL: https://github.com/apache/iceberg/pull/9606#discussion_r1511504695



##########
flink/v1.18/flink/src/main/java/org/apache/iceberg/flink/FlinkSchemaUtil.java:
##########
@@ -64,26 +68,59 @@ public static Schema convert(TableSchema schema) {
     RowType root = (RowType) schemaType;
     Type converted = root.accept(new FlinkTypeToType(root));
 
-    Schema iSchema = new Schema(converted.asStructType().fields());
-    return freshIdentifierFieldIds(iSchema, schema);
+    Schema icebergSchema = new Schema(converted.asStructType().fields());
+    if (schema.getPrimaryKey().isPresent()) {
+      return freshIdentifierFieldIds(icebergSchema, 
schema.getPrimaryKey().get().getColumns());
+    } else {
+      return icebergSchema;
+    }
   }
 
-  private static Schema freshIdentifierFieldIds(Schema iSchema, TableSchema 
schema) {
+  /** Convert the flink table schema to apache iceberg schema with column 
comment. */
+  public static Schema convert(ResolvedSchema flinkSchema) {
+    List<Column> tableColumns = flinkSchema.getColumns();
+    // copy from org.apache.flink.table.api.Schema#toRowDataType
+    DataTypes.Field[] fields =
+        tableColumns.stream()
+            .map(
+                column -> {
+                  if (column.getComment().isPresent()) {
+                    return DataTypes.FIELD(
+                        column.getName(), column.getDataType(), 
column.getComment().get());
+                  } else {
+                    return DataTypes.FIELD(column.getName(), 
column.getDataType());
+                  }
+                })
+            .toArray(DataTypes.Field[]::new);
+
+    LogicalType schemaType = DataTypes.ROW(fields).notNull().getLogicalType();
+    Preconditions.checkArgument(
+        schemaType instanceof RowType, "Schema logical type should be 
RowType.");
+
+    RowType root = (RowType) schemaType;
+    Type converted = root.accept(new FlinkTypeToType(root));
+    Schema icebergSchema = new Schema(converted.asStructType().fields());
+    if (flinkSchema.getPrimaryKey().isPresent()) {
+      return freshIdentifierFieldIds(icebergSchema, 
flinkSchema.getPrimaryKey().get().getColumns());
+    } else {
+      return icebergSchema;
+    }
+  }
+
+  private static Schema freshIdentifierFieldIds(Schema icebergSchema, 
List<String> primaryKeys) {
     // Locate the identifier field id list.
     Set<Integer> identifierFieldIds = Sets.newHashSet();
-    if (schema.getPrimaryKey().isPresent()) {
-      for (String column : schema.getPrimaryKey().get().getColumns()) {
-        Types.NestedField field = iSchema.findField(column);
-        Preconditions.checkNotNull(
-            field,
-            "Cannot find field ID for the primary key column %s in schema %s",
-            column,
-            iSchema);
-        identifierFieldIds.add(field.fieldId());
-      }
+    for (String primaryKey : primaryKeys) {
+      Types.NestedField field = icebergSchema.findField(primaryKey);
+      Preconditions.checkNotNull(
+          field,
+          "Cannot find field ID for the primary key column %s in Schema %s",

Review Comment:
   nit: Schema -> schema



##########
flink/v1.18/flink/src/main/java/org/apache/iceberg/flink/FlinkSchemaUtil.java:
##########
@@ -64,26 +68,59 @@ public static Schema convert(TableSchema schema) {
     RowType root = (RowType) schemaType;
     Type converted = root.accept(new FlinkTypeToType(root));
 
-    Schema iSchema = new Schema(converted.asStructType().fields());
-    return freshIdentifierFieldIds(iSchema, schema);
+    Schema icebergSchema = new Schema(converted.asStructType().fields());
+    if (schema.getPrimaryKey().isPresent()) {
+      return freshIdentifierFieldIds(icebergSchema, 
schema.getPrimaryKey().get().getColumns());
+    } else {
+      return icebergSchema;
+    }
   }
 
-  private static Schema freshIdentifierFieldIds(Schema iSchema, TableSchema 
schema) {
+  /** Convert the flink table schema to apache iceberg schema with column 
comment. */
+  public static Schema convert(ResolvedSchema flinkSchema) {
+    List<Column> tableColumns = flinkSchema.getColumns();
+    // copy from org.apache.flink.table.api.Schema#toRowDataType
+    DataTypes.Field[] fields =
+        tableColumns.stream()
+            .map(
+                column -> {
+                  if (column.getComment().isPresent()) {
+                    return DataTypes.FIELD(
+                        column.getName(), column.getDataType(), 
column.getComment().get());
+                  } else {
+                    return DataTypes.FIELD(column.getName(), 
column.getDataType());
+                  }
+                })
+            .toArray(DataTypes.Field[]::new);
+
+    LogicalType schemaType = DataTypes.ROW(fields).notNull().getLogicalType();
+    Preconditions.checkArgument(
+        schemaType instanceof RowType, "Schema logical type should be 
RowType.");

Review Comment:
   nit: RowType -> row type



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Re: [PR] Flink: Supports specifying comment for iceberg fields in create table and addcolumn syntax using flinksql [iceberg]

Reply via email to