Re: [PR] Introduces the new IcebergSink based on the new V2 Flink Sink Abstraction [iceberg]

via GitHub Sat, 17 Aug 2024 21:33:25 -0700


stevenzwu commented on code in PR #10179:
URL: https://github.com/apache/iceberg/pull/10179#discussion_r1720888195



##########
flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergSink.java:
##########
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.sink;
+
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION;
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION_STRATEGY;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+import java.util.function.Function;
+import org.apache.flink.annotation.Experimental;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.sink2.Committer;
+import org.apache.flink.api.connector.sink2.CommitterInitContext;
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.api.connector.sink2.SinkWriter;
+import org.apache.flink.api.connector.sink2.SupportsCommitter;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.streaming.api.connector.sink2.CommittableMessage;
+import 
org.apache.flink.streaming.api.connector.sink2.CommittableMessageTypeInfo;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPostCommitTopology;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPreCommitTopology;
+import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.util.DataFormatConverters;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DistributionMode;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.FlinkWriteConf;
+import org.apache.iceberg.flink.FlinkWriteOptions;
+import org.apache.iceberg.flink.TableLoader;
+import org.apache.iceberg.flink.util.FlinkCompatibilityUtil;
+import org.apache.iceberg.io.WriteResult;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.util.SerializableSupplier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Flink v2 sink offer different hooks to insert custom topologies into the 
sink. We will use the
+ * following:
+ *
+ * <ul>
+ *   <li>{@link SupportsPreWriteTopology} which redistributes the data to the 
writers based on the
+ *       {@link DistributionMode}
+ *   <li>{@link org.apache.flink.api.connector.sink2.SinkWriter} which writes 
data/delete files, and
+ *       generates the {@link org.apache.iceberg.io.WriteResult} objects for 
the files
+ *   <li>{@link SupportsPreCommitTopology} which we use to place the {@link
+ *       org.apache.iceberg.flink.sink.IcebergWriteAggregator} which merges 
the individual {@link
+ *       org.apache.flink.api.connector.sink2.SinkWriter}'s {@link
+ *       org.apache.iceberg.io.WriteResult}s to a single {@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}
+ *   <li>{@link org.apache.iceberg.flink.sink.IcebergCommitter} which commits 
the incoming{@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}s to the Iceberg 
table
+ *   <li>{@link SupportsPostCommitTopology} we could use for incremental 
compaction later. This is
+ *       not implemented yet.
+ * </ul>
+ *
+ * The job graph looks like below:
+ *
+ * <pre>{@code
+ *                            Flink sink
+ *               
+-----------------------------------------------------------------------------------+
+ *               |                                                             
                      |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ * | Map 1 | ==> | | writer 1 |                               | committer 1 | 
---> | post commit 1 | |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ *               |             \                             /                
\                      |
+ *               |              \                           /                  
\                     |
+ *               |               \                         /                   
 \                    |
+ * +-------+     | +----------+   \ +-------------------+ /   +-------------+  
  \ +---------------+ |
+ * | Map 2 | ==> | | writer 2 | --->| commit aggregator |     | committer 2 |  
    | post commit 2 | |
+ * +-------+     | +----------+     +-------------------+     +-------------+  
    +---------------+ |
+ *               |                                             Commit only on  
                      |
+ *               |                                             committer 1     
                      |
+ *               
+-----------------------------------------------------------------------------------+
+ * }</pre>
+ */
+@Experimental
+public class IcebergSink
+    implements Sink<RowData>,
+        SupportsPreWriteTopology<RowData>,
+        SupportsCommitter<IcebergCommittable>,
+        SupportsPreCommitTopology<WriteResult, IcebergCommittable>,
+        SupportsPostCommitTopology<IcebergCommittable> {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergSink.class);
+  private final TableLoader tableLoader;
+  private final Map<String, String> snapshotProperties;
+  private final String uidPrefix;
+  private final String sinkId;
+  private final Map<String, String> writeProperties;
+  private final RowType flinkRowType;
+  private final SerializableSupplier<Table> tableSupplier;
+  private final transient FlinkWriteConf flinkWriteConf;
+  private final List<Integer> equalityFieldIds;
+  private final boolean upsertMode;
+  private final FileFormat dataFileFormat;
+  private final long targetDataFileSize;
+  private final String branch;
+  private final boolean overwriteMode;
+  private final int workerPoolSize;
+
+  private final Table table;
+  private final List<String> equalityFieldColumns = null;
+
+  private IcebergSink(
+      TableLoader tableLoader,
+      Table table,
+      Map<String, String> snapshotProperties,
+      String uidPrefix,
+      Map<String, String> writeProperties,
+      RowType flinkRowType,
+      SerializableSupplier<Table> tableSupplier,
+      FlinkWriteConf flinkWriteConf,
+      List<Integer> equalityFieldIds,
+      String branch,
+      boolean overwriteMode) {
+    this.tableLoader = tableLoader;
+    this.snapshotProperties = snapshotProperties;
+    this.uidPrefix = uidPrefix;
+    this.writeProperties = writeProperties;
+    this.flinkRowType = flinkRowType;
+    this.tableSupplier = tableSupplier;
+    this.flinkWriteConf = flinkWriteConf;
+    this.equalityFieldIds = equalityFieldIds;
+    this.branch = branch;
+    this.overwriteMode = overwriteMode;
+    this.table = table;
+    this.upsertMode = flinkWriteConf.upsertMode();
+    this.dataFileFormat = flinkWriteConf.dataFileFormat();
+    this.targetDataFileSize = flinkWriteConf.targetDataFileSize();
+    this.workerPoolSize = flinkWriteConf.workerPoolSize();
+    // We generate a random UUID every time when a sink is created.
+    // This is used to separate files generated by different sinks writing the 
same table.
+    // Also used to generate the aggregator operator name
+    this.sinkId = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public SinkWriter<RowData> createWriter(InitContext context) {
+    RowDataTaskWriterFactory taskWriterFactory =
+        new RowDataTaskWriterFactory(
+            tableSupplier,
+            flinkRowType,
+            targetDataFileSize,
+            dataFileFormat,
+            writeProperties,
+            equalityFieldIds,
+            upsertMode);
+    IcebergStreamWriterMetrics metrics =
+        new IcebergStreamWriterMetrics(context.metricGroup(), table.name());
+    return new IcebergSinkWriter(
+        tableSupplier.get().name(),
+        taskWriterFactory,
+        metrics,
+        context.getSubtaskId(),
+        context.getAttemptNumber());
+  }
+
+  @Override
+  public Committer<IcebergCommittable> createCommitter(CommitterInitContext 
context) {
+    IcebergFilesCommitterMetrics metrics =
+        new IcebergFilesCommitterMetrics(context.metricGroup(), table.name());
+    return new IcebergCommitter(
+        tableLoader, branch, snapshotProperties, overwriteMode, 
workerPoolSize, sinkId, metrics);
+  }
+
+  @Override
+  public SimpleVersionedSerializer<IcebergCommittable> 
getCommittableSerializer() {
+    return new IcebergCommittableSerializer();
+  }
+
+  @Override
+  public void addPostCommitTopology(
+      DataStream<CommittableMessage<IcebergCommittable>> committables) {
+    // TODO Support small file compaction
+  }
+
+  @Override
+  public DataStream<RowData> addPreWriteTopology(DataStream<RowData> 
inputDataStream) {
+    return distributeDataStream(inputDataStream);
+  }
+
+  @Override
+  public DataStream<CommittableMessage<IcebergCommittable>> 
addPreCommitTopology(
+      DataStream<CommittableMessage<WriteResult>> writeResults) {
+    TypeInformation<CommittableMessage<IcebergCommittable>> typeInformation =
+        CommittableMessageTypeInfo.of(this::getCommittableSerializer);
+
+    // global forces all output records send to subtask 0 of the downstream 
committer operator.
+    // This is to ensure commit only happen in one committer subtask.
+    // Once upstream Flink provides the capability of setting committer 
operator
+    // parallelism to 1, this can be removed.
+    return writeResults
+        .global()
+        .transform(
+            suffixIfNotNull(uidPrefix, table.name() + "-" + sinkId + 
"-pre-commit-topology"),

Review Comment:
   why is this still `uidPrefix`?



##########
flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergSink.java:
##########
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.sink;
+
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION;
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION_STRATEGY;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+import java.util.function.Function;
+import org.apache.flink.annotation.Experimental;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.sink2.Committer;
+import org.apache.flink.api.connector.sink2.CommitterInitContext;
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.api.connector.sink2.SinkWriter;
+import org.apache.flink.api.connector.sink2.SupportsCommitter;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.streaming.api.connector.sink2.CommittableMessage;
+import 
org.apache.flink.streaming.api.connector.sink2.CommittableMessageTypeInfo;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPostCommitTopology;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPreCommitTopology;
+import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.util.DataFormatConverters;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DistributionMode;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.FlinkWriteConf;
+import org.apache.iceberg.flink.FlinkWriteOptions;
+import org.apache.iceberg.flink.TableLoader;
+import org.apache.iceberg.flink.util.FlinkCompatibilityUtil;
+import org.apache.iceberg.io.WriteResult;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.util.SerializableSupplier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Flink v2 sink offer different hooks to insert custom topologies into the 
sink. We will use the
+ * following:
+ *
+ * <ul>
+ *   <li>{@link SupportsPreWriteTopology} which redistributes the data to the 
writers based on the
+ *       {@link DistributionMode}
+ *   <li>{@link org.apache.flink.api.connector.sink2.SinkWriter} which writes 
data/delete files, and
+ *       generates the {@link org.apache.iceberg.io.WriteResult} objects for 
the files
+ *   <li>{@link SupportsPreCommitTopology} which we use to place the {@link
+ *       org.apache.iceberg.flink.sink.IcebergWriteAggregator} which merges 
the individual {@link
+ *       org.apache.flink.api.connector.sink2.SinkWriter}'s {@link
+ *       org.apache.iceberg.io.WriteResult}s to a single {@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}
+ *   <li>{@link org.apache.iceberg.flink.sink.IcebergCommitter} which commits 
the incoming{@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}s to the Iceberg 
table
+ *   <li>{@link SupportsPostCommitTopology} we could use for incremental 
compaction later. This is
+ *       not implemented yet.
+ * </ul>
+ *
+ * The job graph looks like below:
+ *
+ * <pre>{@code
+ *                            Flink sink
+ *               
+-----------------------------------------------------------------------------------+
+ *               |                                                             
                      |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ * | Map 1 | ==> | | writer 1 |                               | committer 1 | 
---> | post commit 1 | |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ *               |             \                             /                
\                      |
+ *               |              \                           /                  
\                     |
+ *               |               \                         /                   
 \                    |
+ * +-------+     | +----------+   \ +-------------------+ /   +-------------+  
  \ +---------------+ |
+ * | Map 2 | ==> | | writer 2 | --->| commit aggregator |     | committer 2 |  
    | post commit 2 | |
+ * +-------+     | +----------+     +-------------------+     +-------------+  
    +---------------+ |
+ *               |                                             Commit only on  
                      |
+ *               |                                             committer 1     
                      |
+ *               
+-----------------------------------------------------------------------------------+
+ * }</pre>
+ */
+@Experimental
+public class IcebergSink
+    implements Sink<RowData>,
+        SupportsPreWriteTopology<RowData>,
+        SupportsCommitter<IcebergCommittable>,
+        SupportsPreCommitTopology<WriteResult, IcebergCommittable>,
+        SupportsPostCommitTopology<IcebergCommittable> {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergSink.class);
+  private final TableLoader tableLoader;
+  private final Map<String, String> snapshotProperties;
+  private final String uidPrefix;
+  private final String sinkId;
+  private final Map<String, String> writeProperties;
+  private final RowType flinkRowType;
+  private final SerializableSupplier<Table> tableSupplier;
+  private final transient FlinkWriteConf flinkWriteConf;
+  private final List<Integer> equalityFieldIds;
+  private final boolean upsertMode;
+  private final FileFormat dataFileFormat;
+  private final long targetDataFileSize;
+  private final String branch;
+  private final boolean overwriteMode;
+  private final int workerPoolSize;
+
+  private final Table table;
+  private final List<String> equalityFieldColumns = null;
+
+  private IcebergSink(
+      TableLoader tableLoader,
+      Table table,
+      Map<String, String> snapshotProperties,
+      String uidPrefix,
+      Map<String, String> writeProperties,
+      RowType flinkRowType,
+      SerializableSupplier<Table> tableSupplier,
+      FlinkWriteConf flinkWriteConf,
+      List<Integer> equalityFieldIds,
+      String branch,
+      boolean overwriteMode) {
+    this.tableLoader = tableLoader;
+    this.snapshotProperties = snapshotProperties;
+    this.uidPrefix = uidPrefix;
+    this.writeProperties = writeProperties;
+    this.flinkRowType = flinkRowType;
+    this.tableSupplier = tableSupplier;
+    this.flinkWriteConf = flinkWriteConf;
+    this.equalityFieldIds = equalityFieldIds;
+    this.branch = branch;
+    this.overwriteMode = overwriteMode;
+    this.table = table;
+    this.upsertMode = flinkWriteConf.upsertMode();
+    this.dataFileFormat = flinkWriteConf.dataFileFormat();
+    this.targetDataFileSize = flinkWriteConf.targetDataFileSize();
+    this.workerPoolSize = flinkWriteConf.workerPoolSize();
+    // We generate a random UUID every time when a sink is created.
+    // This is used to separate files generated by different sinks writing the 
same table.
+    // Also used to generate the aggregator operator name
+    this.sinkId = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public SinkWriter<RowData> createWriter(InitContext context) {
+    RowDataTaskWriterFactory taskWriterFactory =
+        new RowDataTaskWriterFactory(
+            tableSupplier,
+            flinkRowType,
+            targetDataFileSize,
+            dataFileFormat,
+            writeProperties,
+            equalityFieldIds,
+            upsertMode);
+    IcebergStreamWriterMetrics metrics =
+        new IcebergStreamWriterMetrics(context.metricGroup(), table.name());
+    return new IcebergSinkWriter(
+        tableSupplier.get().name(),
+        taskWriterFactory,
+        metrics,
+        context.getSubtaskId(),
+        context.getAttemptNumber());
+  }
+
+  @Override
+  public Committer<IcebergCommittable> createCommitter(CommitterInitContext 
context) {
+    IcebergFilesCommitterMetrics metrics =
+        new IcebergFilesCommitterMetrics(context.metricGroup(), table.name());
+    return new IcebergCommitter(
+        tableLoader, branch, snapshotProperties, overwriteMode, 
workerPoolSize, sinkId, metrics);
+  }
+
+  @Override
+  public SimpleVersionedSerializer<IcebergCommittable> 
getCommittableSerializer() {
+    return new IcebergCommittableSerializer();
+  }
+
+  @Override
+  public void addPostCommitTopology(
+      DataStream<CommittableMessage<IcebergCommittable>> committables) {
+    // TODO Support small file compaction
+  }
+
+  @Override
+  public DataStream<RowData> addPreWriteTopology(DataStream<RowData> 
inputDataStream) {
+    return distributeDataStream(inputDataStream);
+  }
+
+  @Override
+  public DataStream<CommittableMessage<IcebergCommittable>> 
addPreCommitTopology(
+      DataStream<CommittableMessage<WriteResult>> writeResults) {
+    TypeInformation<CommittableMessage<IcebergCommittable>> typeInformation =
+        CommittableMessageTypeInfo.of(this::getCommittableSerializer);
+
+    // global forces all output records send to subtask 0 of the downstream 
committer operator.
+    // This is to ensure commit only happen in one committer subtask.
+    // Once upstream Flink provides the capability of setting committer 
operator
+    // parallelism to 1, this can be removed.
+    return writeResults
+        .global()
+        .transform(
+            suffixIfNotNull(uidPrefix, table.name() + "-" + sinkId + 
"-pre-commit-topology"),
+            typeInformation,
+            new IcebergWriteAggregator(tableLoader))
+        .uid(suffixIfNotNull(uidPrefix, "pre-commit-topology"))
+        .setParallelism(1)
+        .setMaxParallelism(1)
+        // global forces all output records send to subtask 0 of the 
downstream committer operator.
+        // This is to ensure commit only happen in one committer subtask.
+        // Once upstream Flink provides the capability of setting committer 
operator
+        // parallelism to 1, this can be removed.
+        .global();
+  }
+
+  @Override
+  public SimpleVersionedSerializer<WriteResult> getWriteResultSerializer() {
+    return new WriteResultSerializer();
+  }
+
+  public static class Builder {
+    private TableLoader tableLoader;
+    private String uidSuffix = null;
+    private Function<String, DataStream<RowData>> inputCreator = null;
+    private TableSchema tableSchema;
+    private SerializableTable table;
+    private final Map<String, String> writeOptions = Maps.newHashMap();
+    private final Map<String, String> snapshotSummary = Maps.newHashMap();
+    private ReadableConfig readableConfig = new Configuration();
+    private List<String> equalityFieldColumns = null;
+
+    private Builder() {}
+
+    private Builder forRowData(DataStream<RowData> newRowDataInput) {
+      this.inputCreator = ignored -> newRowDataInput;
+      return this;
+    }
+
+    private Builder forRow(DataStream<Row> input, TableSchema 
inputTableSchema) {
+      RowType rowType = (RowType) 
inputTableSchema.toRowDataType().getLogicalType();
+      DataType[] fieldDataTypes = inputTableSchema.getFieldDataTypes();
+
+      DataFormatConverters.RowConverter rowConverter =
+          new DataFormatConverters.RowConverter(fieldDataTypes);
+      return forMapperOutputType(
+              input, rowConverter::toInternal, 
FlinkCompatibilityUtil.toTypeInfo(rowType))
+          .tableSchema(inputTableSchema);
+    }
+
+    private <T> Builder forMapperOutputType(
+        DataStream<T> input, MapFunction<T, RowData> mapper, 
TypeInformation<RowData> outputType) {
+      this.inputCreator =
+          newUidSuffix -> {
+            // Input stream order is crucial for some situation(e.g. in cdc 
case). Therefore, we
+            // need to set the parallelism of map operator same as its input 
to keep map operator
+            // chaining its input, and avoid rebalanced by default.
+            SingleOutputStreamOperator<RowData> inputStream =
+                input.map(mapper, 
outputType).setParallelism(input.getParallelism());
+            if (newUidSuffix != null) {
+              inputStream.name(operatorName(newUidSuffix)).uid(newUidSuffix + 
"-mapper");

Review Comment:
   this should be just `"Sink pre-write mapper:" + uidSuffix`.



##########
flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergSink.java:
##########
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.sink;
+
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION;
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION_STRATEGY;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+import java.util.function.Function;
+import org.apache.flink.annotation.Experimental;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.sink2.Committer;
+import org.apache.flink.api.connector.sink2.CommitterInitContext;
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.api.connector.sink2.SinkWriter;
+import org.apache.flink.api.connector.sink2.SupportsCommitter;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.streaming.api.connector.sink2.CommittableMessage;
+import 
org.apache.flink.streaming.api.connector.sink2.CommittableMessageTypeInfo;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPostCommitTopology;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPreCommitTopology;
+import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.util.DataFormatConverters;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DistributionMode;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.FlinkWriteConf;
+import org.apache.iceberg.flink.FlinkWriteOptions;
+import org.apache.iceberg.flink.TableLoader;
+import org.apache.iceberg.flink.util.FlinkCompatibilityUtil;
+import org.apache.iceberg.io.WriteResult;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.util.SerializableSupplier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Flink v2 sink offer different hooks to insert custom topologies into the 
sink. We will use the
+ * following:
+ *
+ * <ul>
+ *   <li>{@link SupportsPreWriteTopology} which redistributes the data to the 
writers based on the
+ *       {@link DistributionMode}
+ *   <li>{@link org.apache.flink.api.connector.sink2.SinkWriter} which writes 
data/delete files, and
+ *       generates the {@link org.apache.iceberg.io.WriteResult} objects for 
the files
+ *   <li>{@link SupportsPreCommitTopology} which we use to place the {@link
+ *       org.apache.iceberg.flink.sink.IcebergWriteAggregator} which merges 
the individual {@link
+ *       org.apache.flink.api.connector.sink2.SinkWriter}'s {@link
+ *       org.apache.iceberg.io.WriteResult}s to a single {@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}
+ *   <li>{@link org.apache.iceberg.flink.sink.IcebergCommitter} which commits 
the incoming{@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}s to the Iceberg 
table
+ *   <li>{@link SupportsPostCommitTopology} we could use for incremental 
compaction later. This is
+ *       not implemented yet.
+ * </ul>
+ *
+ * The job graph looks like below:
+ *
+ * <pre>{@code
+ *                            Flink sink
+ *               
+-----------------------------------------------------------------------------------+
+ *               |                                                             
                      |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ * | Map 1 | ==> | | writer 1 |                               | committer 1 | 
---> | post commit 1 | |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ *               |             \                             /                
\                      |
+ *               |              \                           /                  
\                     |
+ *               |               \                         /                   
 \                    |
+ * +-------+     | +----------+   \ +-------------------+ /   +-------------+  
  \ +---------------+ |
+ * | Map 2 | ==> | | writer 2 | --->| commit aggregator |     | committer 2 |  
    | post commit 2 | |
+ * +-------+     | +----------+     +-------------------+     +-------------+  
    +---------------+ |
+ *               |                                             Commit only on  
                      |
+ *               |                                             committer 1     
                      |
+ *               
+-----------------------------------------------------------------------------------+
+ * }</pre>
+ */
+@Experimental
+public class IcebergSink
+    implements Sink<RowData>,
+        SupportsPreWriteTopology<RowData>,
+        SupportsCommitter<IcebergCommittable>,
+        SupportsPreCommitTopology<WriteResult, IcebergCommittable>,
+        SupportsPostCommitTopology<IcebergCommittable> {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergSink.class);
+  private final TableLoader tableLoader;
+  private final Map<String, String> snapshotProperties;
+  private final String uidPrefix;
+  private final String sinkId;
+  private final Map<String, String> writeProperties;
+  private final RowType flinkRowType;
+  private final SerializableSupplier<Table> tableSupplier;
+  private final transient FlinkWriteConf flinkWriteConf;
+  private final List<Integer> equalityFieldIds;
+  private final boolean upsertMode;
+  private final FileFormat dataFileFormat;
+  private final long targetDataFileSize;
+  private final String branch;
+  private final boolean overwriteMode;
+  private final int workerPoolSize;
+
+  private final Table table;
+  private final List<String> equalityFieldColumns = null;
+
+  private IcebergSink(
+      TableLoader tableLoader,
+      Table table,
+      Map<String, String> snapshotProperties,
+      String uidPrefix,
+      Map<String, String> writeProperties,
+      RowType flinkRowType,
+      SerializableSupplier<Table> tableSupplier,
+      FlinkWriteConf flinkWriteConf,
+      List<Integer> equalityFieldIds,
+      String branch,
+      boolean overwriteMode) {
+    this.tableLoader = tableLoader;
+    this.snapshotProperties = snapshotProperties;
+    this.uidPrefix = uidPrefix;
+    this.writeProperties = writeProperties;
+    this.flinkRowType = flinkRowType;
+    this.tableSupplier = tableSupplier;
+    this.flinkWriteConf = flinkWriteConf;
+    this.equalityFieldIds = equalityFieldIds;
+    this.branch = branch;
+    this.overwriteMode = overwriteMode;
+    this.table = table;
+    this.upsertMode = flinkWriteConf.upsertMode();
+    this.dataFileFormat = flinkWriteConf.dataFileFormat();
+    this.targetDataFileSize = flinkWriteConf.targetDataFileSize();
+    this.workerPoolSize = flinkWriteConf.workerPoolSize();
+    // We generate a random UUID every time when a sink is created.
+    // This is used to separate files generated by different sinks writing the 
same table.
+    // Also used to generate the aggregator operator name
+    this.sinkId = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public SinkWriter<RowData> createWriter(InitContext context) {
+    RowDataTaskWriterFactory taskWriterFactory =
+        new RowDataTaskWriterFactory(
+            tableSupplier,
+            flinkRowType,
+            targetDataFileSize,
+            dataFileFormat,
+            writeProperties,
+            equalityFieldIds,
+            upsertMode);
+    IcebergStreamWriterMetrics metrics =
+        new IcebergStreamWriterMetrics(context.metricGroup(), table.name());
+    return new IcebergSinkWriter(
+        tableSupplier.get().name(),
+        taskWriterFactory,
+        metrics,
+        context.getSubtaskId(),
+        context.getAttemptNumber());
+  }
+
+  @Override
+  public Committer<IcebergCommittable> createCommitter(CommitterInitContext 
context) {
+    IcebergFilesCommitterMetrics metrics =
+        new IcebergFilesCommitterMetrics(context.metricGroup(), table.name());
+    return new IcebergCommitter(
+        tableLoader, branch, snapshotProperties, overwriteMode, 
workerPoolSize, sinkId, metrics);
+  }
+
+  @Override
+  public SimpleVersionedSerializer<IcebergCommittable> 
getCommittableSerializer() {
+    return new IcebergCommittableSerializer();
+  }
+
+  @Override
+  public void addPostCommitTopology(
+      DataStream<CommittableMessage<IcebergCommittable>> committables) {
+    // TODO Support small file compaction
+  }
+
+  @Override
+  public DataStream<RowData> addPreWriteTopology(DataStream<RowData> 
inputDataStream) {
+    return distributeDataStream(inputDataStream);
+  }
+
+  @Override
+  public DataStream<CommittableMessage<IcebergCommittable>> 
addPreCommitTopology(
+      DataStream<CommittableMessage<WriteResult>> writeResults) {
+    TypeInformation<CommittableMessage<IcebergCommittable>> typeInformation =
+        CommittableMessageTypeInfo.of(this::getCommittableSerializer);
+
+    // global forces all output records send to subtask 0 of the downstream 
committer operator.
+    // This is to ensure commit only happen in one committer subtask.
+    // Once upstream Flink provides the capability of setting committer 
operator
+    // parallelism to 1, this can be removed.
+    return writeResults
+        .global()
+        .transform(
+            suffixIfNotNull(uidPrefix, table.name() + "-" + sinkId + 
"-pre-commit-topology"),
+            typeInformation,
+            new IcebergWriteAggregator(tableLoader))
+        .uid(suffixIfNotNull(uidPrefix, "pre-commit-topology"))

Review Comment:
   if we want to be consistent with the framework operator naming convention, 
it could be just `.uid("Sink pre-commit aggregator: " + uidPrefix)`
   
   I don't know why we need `SuffixIfNotNull(...)`. 



##########
flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergSink.java:
##########
@@ -0,0 +1,811 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.sink;
+
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION;
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION_STRATEGY;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.UUID;
+import java.util.function.Function;
+import org.apache.flink.annotation.Experimental;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.sink2.Committer;
+import org.apache.flink.api.connector.sink2.CommitterInitContext;
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.api.connector.sink2.SinkWriter;
+import org.apache.flink.api.connector.sink2.SupportsCommitter;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.streaming.api.connector.sink2.CommittableMessage;
+import 
org.apache.flink.streaming.api.connector.sink2.CommittableMessageTypeInfo;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPostCommitTopology;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPreCommitTopology;
+import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.util.DataFormatConverters;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DistributionMode;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.FlinkWriteConf;
+import org.apache.iceberg.flink.FlinkWriteOptions;
+import org.apache.iceberg.flink.TableLoader;
+import org.apache.iceberg.flink.sink.committer.IcebergCommittable;
+import org.apache.iceberg.flink.sink.committer.IcebergCommittableSerializer;
+import org.apache.iceberg.flink.sink.committer.IcebergCommitter;
+import org.apache.iceberg.flink.sink.committer.IcebergWriteAggregator;
+import org.apache.iceberg.flink.sink.committer.WriteResultSerializer;
+import org.apache.iceberg.flink.util.FlinkCompatibilityUtil;
+import org.apache.iceberg.io.WriteResult;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.util.SerializableSupplier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Flink v2 sink offer different hooks to insert custom topologies into the 
sink. We will use the
+ * following:
+ *
+ * <ul>
+ *   <li>{@link SupportsPreWriteTopology} which redistributes the data to the 
writers based on the
+ *       {@link DistributionMode}
+ *   <li>{@link org.apache.flink.api.connector.sink2.SinkWriter} which writes 
data files, and
+ *       generates a {@link IcebergCommittable} to store the {@link
+ *       org.apache.iceberg.io.WriteResult}
+ *   <li>{@link SupportsPreCommitTopology} which we use to to place the {@link
+ *       IcebergWriteAggregator} which merges the individual {@link
+ *       org.apache.flink.api.connector.sink2.SinkWriter}'s {@link
+ *       org.apache.iceberg.io.WriteResult}s to a single {@link
+ *       org.apache.iceberg.flink.sink.DeltaManifests}
+ *   <li>{@link IcebergCommitter} which stores the incoming {@link
+ *       org.apache.iceberg.flink.sink.DeltaManifests}s in state for recovery, 
and commits them to
+ *       the Iceberg table
+ *   <li>{@link SupportsPostCommitTopology} we could use for incremental 
compaction later. This is
+ *       not implemented yet.
+ * </ul>
+ *
+ * The job graph looks like below:
+ *
+ * <pre>{@code
+ *                            Flink sink
+ *               
+-----------------------------------------------------------------------------------+
+ *               |                                                             
                      |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ * | Map 1 | ==> | | writer 1 |                               | committer 1 | 
---> | post commit 1 | |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ *               |             \                             /                
\                      |
+ *               |              \                           /                  
\                     |
+ *               |               \                         /                   
 \                    |
+ * +-------+     | +----------+   \ +-------------------+ /   +-------------+  
  \ +---------------+ |
+ * | Map 2 | ==> | | writer 2 | --->| commit aggregator |     | committer 2 |  
    | post commit 2 | |
+ * +-------+     | +----------+     +-------------------+     +-------------+  
    +---------------+ |
+ *               |                                             Commit only on  
                      |
+ *               |                                             committer 1     
                      |
+ *               
+-----------------------------------------------------------------------------------+
+ * }</pre>
+ */
+@Experimental
+public class IcebergSink
+    implements Sink<RowData>,
+        SupportsPreWriteTopology<RowData>,
+        SupportsCommitter<IcebergCommittable>,
+        SupportsPreCommitTopology<WriteResult, IcebergCommittable>,
+        SupportsPostCommitTopology<IcebergCommittable> {
+  private final Table initTable;
+  private final TableLoader tableLoader;
+  private final Map<String, String> snapshotProperties;
+  private final String uidPrefix;
+  private final String sinkId;
+  private final Map<String, String> writeProperties;
+  private final RowType flinkRowType;
+  private final SerializableSupplier<Table> tableSupplier;
+  private final transient FlinkWriteConf flinkWriteConf;
+  private final List<Integer> equalityFieldIds;
+  private final boolean upsertMode;
+  private final FileFormat dataFileFormat;
+  private final long targetDataFileSize;
+  private final String branch;
+  private final boolean overwriteMode;
+  private final int workerPoolSize;
+  private final Table table;
+  private final List<String> equalityFieldColumns = null;
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergSink.class);
+
+  private IcebergSink(
+      Table initTable,
+      TableLoader tableLoader,
+      Table table,
+      Map<String, String> snapshotProperties,
+      String uidPrefix,
+      Map<String, String> writeProperties,
+      RowType flinkRowType,
+      SerializableSupplier<Table> tableSupplier,
+      FlinkWriteConf flinkWriteConf,
+      List<Integer> equalityFieldIds,
+      boolean upsertMode,
+      FileFormat dataFileFormat,
+      long targetDataFileSize,
+      String branch,
+      boolean overwriteMode,
+      int workerPoolSize) {
+    this.initTable = initTable;
+    this.tableLoader = tableLoader;
+    this.snapshotProperties = snapshotProperties;
+    this.uidPrefix = uidPrefix;
+    this.writeProperties = writeProperties;
+    this.flinkRowType = flinkRowType;
+    this.tableSupplier = tableSupplier;
+    this.flinkWriteConf = flinkWriteConf;
+    this.equalityFieldIds = equalityFieldIds;
+    this.upsertMode = upsertMode;
+    this.dataFileFormat = dataFileFormat;
+    this.targetDataFileSize = targetDataFileSize;
+    this.branch = branch;
+    this.overwriteMode = overwriteMode;
+    this.workerPoolSize = workerPoolSize;
+    this.table = table;
+    // We generate a random UUID every time when a sink is created.
+    // This is used to separate files generated by different sinks writing the 
same table.
+    // Also used to generate the aggregator operator name
+    this.sinkId = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public SinkWriter<RowData> createWriter(InitContext context) {
+    RowDataTaskWriterFactory taskWriterFactory =
+        new RowDataTaskWriterFactory(
+            tableSupplier,
+            flinkRowType,
+            targetDataFileSize,
+            dataFileFormat,
+            writeProperties,
+            equalityFieldIds,
+            upsertMode);
+    IcebergStreamWriterMetrics metrics =
+        new IcebergStreamWriterMetrics(context.metricGroup(), 
initTable.name());
+    return new IcebergSinkWriter(
+        tableSupplier,
+        taskWriterFactory,
+        metrics,
+        context.getSubtaskId(),
+        context.getAttemptNumber());
+  }
+
+  @Override
+  public Committer<IcebergCommittable> createCommitter(CommitterInitContext 
context) {
+    IcebergFilesCommitterMetrics metrics =
+        new IcebergFilesCommitterMetrics(context.metricGroup(), 
initTable.name());
+    return new IcebergCommitter(
+        tableLoader, branch, snapshotProperties, overwriteMode, 
workerPoolSize, sinkId, metrics);
+  }
+
+  @Override
+  public SimpleVersionedSerializer<IcebergCommittable> 
getCommittableSerializer() {
+    return new IcebergCommittableSerializer();
+  }
+
+  @Override
+  public void addPostCommitTopology(
+      DataStream<CommittableMessage<IcebergCommittable>> committables) {
+    // TODO Support small file compaction
+  }
+
+  @Override
+  public DataStream<RowData> addPreWriteTopology(DataStream<RowData> 
inputDataStream) {
+    return distributeDataStream(inputDataStream);
+  }
+
+  @Override
+  public DataStream<CommittableMessage<IcebergCommittable>> 
addPreCommitTopology(
+      DataStream<CommittableMessage<WriteResult>> writeResults) {
+    TypeInformation<CommittableMessage<IcebergCommittable>> typeInformation =
+        CommittableMessageTypeInfo.of(this::getCommittableSerializer);
+
+    // global forces all output records send to subtask 0 of the downstream 
committer operator.
+    // This is to ensure commit only happen in one committer subtask.
+    // Once upstream Flink provides the capability of setting committer 
operator
+    // parallelism to 1, this can be removed.
+    return writeResults
+        .global()

Review Comment:
   @rodmeneses what is the error/exception you are seeing after removed the 1st 
`global`?



##########
flink/v1.19/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergCommitter.java:
##########
@@ -0,0 +1,1445 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.sink;
+
+import static 
org.apache.iceberg.flink.sink.ManifestOutputFileFactory.FLINK_MANIFEST_LOCATION;
+import static 
org.apache.iceberg.flink.sink.SinkTestUtil.extractAndAssertCommittableSummary;
+import static 
org.apache.iceberg.flink.sink.SinkTestUtil.extractAndAssertCommittableWithLineage;
+import static 
org.apache.iceberg.flink.sink.SinkTestUtil.transformsToStreamElement;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assumptions.assumeThat;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.api.common.TaskInfo;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
+import org.apache.flink.api.connector.sink2.Committer;
+import org.apache.flink.core.memory.DataInputView;
+import org.apache.flink.core.memory.DataOutputView;
+import org.apache.flink.runtime.checkpoint.OperatorSubtaskState;
+import org.apache.flink.runtime.execution.Environment;
+import org.apache.flink.runtime.jobgraph.OperatorID;
+import org.apache.flink.streaming.api.connector.sink2.CommittableMessage;
+import 
org.apache.flink.streaming.api.connector.sink2.CommittableMessageSerializer;
+import org.apache.flink.streaming.api.connector.sink2.CommittableSummary;
+import org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage;
+import org.apache.flink.streaming.api.connector.sink2.SinkV2Assertions;
+import org.apache.flink.streaming.api.operators.StreamingRuntimeContext;
+import 
org.apache.flink.streaming.runtime.operators.sink.CommitterOperatorFactory;
+import org.apache.flink.streaming.runtime.streamrecord.StreamElement;
+import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.streaming.runtime.tasks.StreamTask;
+import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
+import org.apache.flink.table.data.RowData;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.GenericManifestFile;
+import org.apache.iceberg.ManifestContent;
+import org.apache.iceberg.ManifestFile;
+import org.apache.iceberg.Metrics;
+import org.apache.iceberg.Parameter;
+import org.apache.iceberg.ParameterizedTestExtension;
+import org.apache.iceberg.Parameters;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotRef;
+import org.apache.iceberg.SnapshotSummary;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.TestBase;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.SimpleDataUtil;
+import org.apache.iceberg.flink.TableLoader;
+import org.apache.iceberg.flink.TestHelpers;
+import org.apache.iceberg.io.FileAppenderFactory;
+import org.apache.iceberg.io.WriteResult;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.TestTemplate;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@ExtendWith(ParameterizedTestExtension.class)
+class TestIcebergCommitter extends TestBase {
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestIcebergCommitter.class);
+  public static final String OPERATOR_ID = "flink-sink";
+  @TempDir File temporaryFolder;
+
+  @TempDir File flinkManifestFolder;
+
+  private Table table;
+
+  private TableLoader tableLoader;
+
+  @Parameter(index = 1)
+  private Boolean isStreamingMode;
+
+  @Parameter(index = 2)
+  private String branch;
+
+  private final String jobId = "jobId";
+  private final long dataFIleRowCount = 5L;
+
+  private final TestCommittableMessageTypeSerializer 
committableMessageTypeSerializer =
+      new TestCommittableMessageTypeSerializer();
+
+  private final DataFile dataFileTest1 =
+      DataFiles.builder(PartitionSpec.unpartitioned())
+          .withPath("/path/to/data-1.parquet")
+          .withFileSizeInBytes(0)
+          .withMetrics(
+              new Metrics(
+                  dataFIleRowCount,
+                  null, // no column sizes
+                  ImmutableMap.of(1, 5L), // value count
+                  ImmutableMap.of(1, 0L), // null count
+                  null,
+                  ImmutableMap.of(1, longToBuffer(0L)), // lower bounds
+                  ImmutableMap.of(1, longToBuffer(4L)) // upper bounds
+                  ))
+          .build();
+
+  private final DataFile dataFileTest2 =
+      DataFiles.builder(PartitionSpec.unpartitioned())
+          .withPath("/path/to/data-2.parquet")
+          .withFileSizeInBytes(0)
+          .withMetrics(
+              new Metrics(
+                  dataFIleRowCount,
+                  null, // no column sizes
+                  ImmutableMap.of(1, 5L), // value count
+                  ImmutableMap.of(1, 0L), // null count
+                  null,
+                  ImmutableMap.of(1, longToBuffer(0L)), // lower bounds
+                  ImmutableMap.of(1, longToBuffer(4L)) // upper bounds
+                  ))
+          .build();
+
+  @SuppressWarnings("checkstyle:NestedForDepth")
+  @Parameters(name = "formatVersion={0} isStreaming={1}, branch={2}")
+  protected static List<Object> parameters() {
+    List<Object> parameters = Lists.newArrayList();
+    for (Boolean isStreamingMode : new Boolean[] {true, false}) {
+      for (int formatVersion : new int[] {1, 2}) {
+        parameters.add(new Object[] {formatVersion, isStreamingMode, 
SnapshotRef.MAIN_BRANCH});
+        parameters.add(new Object[] {formatVersion, isStreamingMode, 
"test-branch"});
+      }
+    }
+    return parameters;
+  }
+
+  @BeforeEach
+  public void before() throws Exception {
+    String warehouse = temporaryFolder.getAbsolutePath();
+
+    String tablePath = warehouse.concat("/test");
+    assertThat(new File(tablePath).mkdir()).as("Should create the table path 
correctly.").isTrue();
+
+    Map<String, String> props =
+        ImmutableMap.of(
+            TableProperties.FORMAT_VERSION,
+            String.valueOf(formatVersion),
+            FLINK_MANIFEST_LOCATION,
+            flinkManifestFolder.getAbsolutePath(),
+            IcebergCommitter.MAX_CONTINUOUS_EMPTY_COMMITS,
+            "1");
+    table = SimpleDataUtil.createTable(tablePath, props, false);
+    tableLoader = TableLoader.fromHadoopTable(tablePath);
+  }
+
+  @TestTemplate
+  public void testCommitTxnWithoutDataFiles() throws Exception {
+    IcebergCommitter committer = getCommitter();
+    SimpleDataUtil.assertTableRows(table, Lists.newArrayList(), branch);
+    assertSnapshotSize(0);
+    assertMaxCommittedCheckpointId(jobId, -1);
+
+    for (long i = 1; i <= 3; i++) {
+      Committer.CommitRequest<IcebergCommittable> commitRequest =
+          buildCommitRequestFor(jobId, i, Lists.newArrayList());
+      committer.commit(Lists.newArrayList(commitRequest));
+      assertMaxCommittedCheckpointId(jobId, i);
+      assertSnapshotSize((int) i);
+    }
+  }
+
+  @TestTemplate
+  public void testMxContinuousEmptyCommits() throws Exception {
+    
table.updateProperties().set(IcebergCommitter.MAX_CONTINUOUS_EMPTY_COMMITS, 
"3").commit();
+    IcebergCommitter committer = getCommitter();
+    for (int i = 1; i <= 9; i++) {
+      Committer.CommitRequest<IcebergCommittable> commitRequest =
+          buildCommitRequestFor(jobId, i, Lists.newArrayList());
+      committer.commit(Lists.newArrayList(commitRequest));
+      assertFlinkManifests(0);
+      assertSnapshotSize(i / 3);
+    }
+  }
+
+  @TestTemplate
+  public void testCommitTxn() throws Exception {
+    IcebergCommitter committer = getCommitter();
+    assertSnapshotSize(0);
+    List<RowData> rows = Lists.newArrayListWithExpectedSize(3);
+    for (int i = 1; i <= 3; i++) {
+      RowData rowData = SimpleDataUtil.createRowData(i, "hello" + i);
+      DataFile dataFile = writeDataFile("data-" + i, 
ImmutableList.of(rowData));
+      rows.add(rowData);
+      WriteResult writeResult = of(dataFile);
+      Committer.CommitRequest<IcebergCommittable> commitRequest =
+          buildCommitRequestFor(jobId, i, Lists.newArrayList(writeResult));
+      committer.commit(Lists.newArrayList(commitRequest));
+      assertFlinkManifests(0);
+      SimpleDataUtil.assertTableRows(table, ImmutableList.copyOf(rows), 
branch);
+      assertSnapshotSize(i);
+      assertMaxCommittedCheckpointId(jobId, i);
+      Map<String, String> summary = SimpleDataUtil.latestSnapshot(table, 
branch).summary();
+      assertThat(summary)
+          .containsEntry("flink.test", 
"org.apache.iceberg.flink.sink.TestIcebergCommitter")
+          .containsEntry("added-data-files", "1")
+          .containsEntry("flink.operator-id", OPERATOR_ID)
+          .containsEntry("flink.job-id", "jobId");
+    }
+  }
+
+  @TestTemplate
+  public void testOrderedEventsBetweenCheckpoints() throws Exception {
+    // It's possible that two checkpoints happen in the following orders:
+    //   1. snapshotState for checkpoint#1;
+    //   2. snapshotState for checkpoint#2;
+    //   3. notifyCheckpointComplete for checkpoint#1;
+    //   4. notifyCheckpointComplete for checkpoint#2;
+
+    long timestamp = 0;
+    try (OneInputStreamOperatorTestHarness<
+            CommittableMessage<IcebergCommittable>, 
CommittableMessage<IcebergCommittable>>
+        harness = getTestHarness()) {
+
+      harness.open();
+
+      assertMaxCommittedCheckpointId(jobId, -1L);
+
+      RowData row1 = SimpleDataUtil.createRowData(1, "hello");
+      DataFile dataFile1 = writeDataFile("data-1", ImmutableList.of(row1));
+
+      processElement(jobId, 1, harness, 1, OPERATOR_ID, dataFile1);
+      assertMaxCommittedCheckpointId(jobId, -1L);
+
+      // 1. snapshotState for checkpoint#1
+      long firstCheckpointId = 1;
+      harness.snapshot(firstCheckpointId, ++timestamp);
+      assertFlinkManifests(1);
+
+      RowData row2 = SimpleDataUtil.createRowData(2, "world");
+      DataFile dataFile2 = writeDataFile("data-2", ImmutableList.of(row2));
+      processElement(jobId, 2, harness, 1, OPERATOR_ID, dataFile2);
+      assertMaxCommittedCheckpointId(jobId, -1L);
+
+      // 2. snapshotState for checkpoint#2
+      long secondCheckpointId = 2;
+      OperatorSubtaskState snapshot = harness.snapshot(secondCheckpointId, 
++timestamp);
+      assertFlinkManifests(2);
+
+      // 3. notifyCheckpointComplete for checkpoint#1
+      harness.notifyOfCompletedCheckpoint(firstCheckpointId);
+      SimpleDataUtil.assertTableRows(table, ImmutableList.of(row1), branch);
+      assertMaxCommittedCheckpointId(jobId, firstCheckpointId);
+      assertFlinkManifests(1);
+
+      // 4. notifyCheckpointComplete for checkpoint#2
+      harness.notifyOfCompletedCheckpoint(secondCheckpointId);
+      SimpleDataUtil.assertTableRows(table, ImmutableList.of(row1, row2), 
branch);
+      assertMaxCommittedCheckpointId(jobId, secondCheckpointId);
+      assertFlinkManifests(0);
+    }
+  }
+
+  @TestTemplate
+  public void testDisorderedEventsBetweenCheckpoints() throws Exception {
+    // It's possible that two checkpoints happen in the following orders:
+    //   1. snapshotState for checkpoint#1;
+    //   2. snapshotState for checkpoint#2;
+    //   3. notifyCheckpointComplete for checkpoint#2;
+    //   4. notifyCheckpointComplete for checkpoint#1;
+
+    long timestamp = 0;
+    try (OneInputStreamOperatorTestHarness<
+            CommittableMessage<IcebergCommittable>, 
CommittableMessage<IcebergCommittable>>
+        harness = getTestHarness()) {
+
+      harness.open();
+      assertMaxCommittedCheckpointId(jobId, -1L);
+
+      RowData row1 = SimpleDataUtil.createRowData(1, "hello");
+      DataFile dataFile1 = writeDataFile("data-1", ImmutableList.of(row1));
+
+      processElement(jobId, 1, harness, 1, OPERATOR_ID, dataFile1);
+      assertMaxCommittedCheckpointId(jobId, -1L);
+
+      // 1. snapshotState for checkpoint#1
+      long firstCheckpointId = 1;
+      harness.snapshot(firstCheckpointId, ++timestamp);
+      assertFlinkManifests(1);
+
+      RowData row2 = SimpleDataUtil.createRowData(2, "world");
+      DataFile dataFile2 = writeDataFile("data-2", ImmutableList.of(row2));
+      processElement(jobId, 2, harness, 1, OPERATOR_ID, dataFile2);
+      assertMaxCommittedCheckpointId(jobId, -1L);
+
+      // 2. snapshotState for checkpoint#2
+      long secondCheckpointId = 2;
+      harness.snapshot(secondCheckpointId, ++timestamp);
+      assertFlinkManifests(2);
+
+      // 3. notifyCheckpointComplete for checkpoint#2
+      harness.notifyOfCompletedCheckpoint(secondCheckpointId);
+      SimpleDataUtil.assertTableRows(table, ImmutableList.of(row1, row2), 
branch);
+      assertMaxCommittedCheckpointId(jobId, secondCheckpointId);
+      assertFlinkManifests(0);
+
+      // 4. notifyCheckpointComplete for checkpoint#1
+      harness.notifyOfCompletedCheckpoint(firstCheckpointId);
+      SimpleDataUtil.assertTableRows(table, ImmutableList.of(row1, row2), 
branch);
+      assertMaxCommittedCheckpointId(jobId, secondCheckpointId);
+      assertFlinkManifests(0);
+    }
+  }
+
+  @TestTemplate
+  public void testSingleCommit() throws Exception {
+    try (OneInputStreamOperatorTestHarness<
+            CommittableMessage<IcebergCommittable>, 
CommittableMessage<IcebergCommittable>>
+        testHarness = getTestHarness()) {
+      testHarness.open();
+
+      long checkpointId = 1;
+
+      RowData row1 = SimpleDataUtil.createRowData(1, "hello1");
+      DataFile dataFile1 = writeDataFile("data-1-1", ImmutableList.of(row1));
+      CommittableSummary<IcebergCommittable> committableSummary =
+          processElement(jobId, checkpointId, testHarness, 1, OPERATOR_ID, 
dataFile1);
+
+      // Trigger commit
+      testHarness.notifyOfCompletedCheckpoint(checkpointId);
+
+      assertSnapshotSize(1);
+      assertMaxCommittedCheckpointId(jobId, 1L);
+
+      List<StreamElement> output = 
transformsToStreamElement(testHarness.getOutput());
+
+      SimpleDataUtil.assertTableRows(table, ImmutableList.of(row1), branch);
+      
SinkV2Assertions.assertThat(extractAndAssertCommittableSummary(output.get(0)))
+          
.hasFailedCommittables(committableSummary.getNumberOfFailedCommittables())
+          .hasOverallCommittables(committableSummary.getNumberOfCommittables())
+          .hasPendingCommittables(0);
+
+      
SinkV2Assertions.assertThat(extractAndAssertCommittableWithLineage(output.get(1)))
+          .hasSubtaskId(0)
+          .hasCheckpointId(checkpointId);
+    }
+
+    table.refresh();
+    Snapshot currentSnapshot = table.snapshot(branch);
+
+    assertThat(currentSnapshot.summary())
+        .containsEntry(SnapshotSummary.TOTAL_RECORDS_PROP, "1")
+        .containsEntry(SnapshotSummary.TOTAL_DATA_FILES_PROP, "1");
+  }
+
+  /** The data was not committed in the previous job. */
+  @TestTemplate
+  public void testStateRestoreFromPreJob() throws Exception {

Review Comment:
   `testStateRestoreFromPreJob` and `testStateRestoreFromPreJob2` names are not 
meaningful. should be sth like `testStateRestoreFromPreJobWithCommitted` and 
`testStateRestoreFromPreJobWithUncommitted`
   
   recreate a new comment to surface it again.



##########
flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergSink.java:
##########
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.sink;
+
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION;
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION_STRATEGY;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+import java.util.function.Function;
+import org.apache.flink.annotation.Experimental;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.sink2.Committer;
+import org.apache.flink.api.connector.sink2.CommitterInitContext;
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.api.connector.sink2.SinkWriter;
+import org.apache.flink.api.connector.sink2.SupportsCommitter;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.streaming.api.connector.sink2.CommittableMessage;
+import 
org.apache.flink.streaming.api.connector.sink2.CommittableMessageTypeInfo;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPostCommitTopology;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPreCommitTopology;
+import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.util.DataFormatConverters;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DistributionMode;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.FlinkWriteConf;
+import org.apache.iceberg.flink.FlinkWriteOptions;
+import org.apache.iceberg.flink.TableLoader;
+import org.apache.iceberg.flink.util.FlinkCompatibilityUtil;
+import org.apache.iceberg.io.WriteResult;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.util.SerializableSupplier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Flink v2 sink offer different hooks to insert custom topologies into the 
sink. We will use the
+ * following:
+ *
+ * <ul>
+ *   <li>{@link SupportsPreWriteTopology} which redistributes the data to the 
writers based on the
+ *       {@link DistributionMode}
+ *   <li>{@link org.apache.flink.api.connector.sink2.SinkWriter} which writes 
data/delete files, and
+ *       generates the {@link org.apache.iceberg.io.WriteResult} objects for 
the files
+ *   <li>{@link SupportsPreCommitTopology} which we use to place the {@link
+ *       org.apache.iceberg.flink.sink.IcebergWriteAggregator} which merges 
the individual {@link
+ *       org.apache.flink.api.connector.sink2.SinkWriter}'s {@link
+ *       org.apache.iceberg.io.WriteResult}s to a single {@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}
+ *   <li>{@link org.apache.iceberg.flink.sink.IcebergCommitter} which commits 
the incoming{@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}s to the Iceberg 
table
+ *   <li>{@link SupportsPostCommitTopology} we could use for incremental 
compaction later. This is
+ *       not implemented yet.
+ * </ul>
+ *
+ * The job graph looks like below:
+ *
+ * <pre>{@code
+ *                            Flink sink
+ *               
+-----------------------------------------------------------------------------------+
+ *               |                                                             
                      |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ * | Map 1 | ==> | | writer 1 |                               | committer 1 | 
---> | post commit 1 | |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ *               |             \                             /                
\                      |
+ *               |              \                           /                  
\                     |
+ *               |               \                         /                   
 \                    |
+ * +-------+     | +----------+   \ +-------------------+ /   +-------------+  
  \ +---------------+ |
+ * | Map 2 | ==> | | writer 2 | --->| commit aggregator |     | committer 2 |  
    | post commit 2 | |
+ * +-------+     | +----------+     +-------------------+     +-------------+  
    +---------------+ |
+ *               |                                             Commit only on  
                      |
+ *               |                                             committer 1     
                      |
+ *               
+-----------------------------------------------------------------------------------+
+ * }</pre>
+ */
+@Experimental
+public class IcebergSink
+    implements Sink<RowData>,
+        SupportsPreWriteTopology<RowData>,
+        SupportsCommitter<IcebergCommittable>,
+        SupportsPreCommitTopology<WriteResult, IcebergCommittable>,
+        SupportsPostCommitTopology<IcebergCommittable> {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergSink.class);
+  private final TableLoader tableLoader;
+  private final Map<String, String> snapshotProperties;
+  private final String uidPrefix;
+  private final String sinkId;
+  private final Map<String, String> writeProperties;
+  private final RowType flinkRowType;
+  private final SerializableSupplier<Table> tableSupplier;
+  private final transient FlinkWriteConf flinkWriteConf;
+  private final List<Integer> equalityFieldIds;
+  private final boolean upsertMode;
+  private final FileFormat dataFileFormat;
+  private final long targetDataFileSize;
+  private final String branch;
+  private final boolean overwriteMode;
+  private final int workerPoolSize;
+
+  private final Table table;
+  private final List<String> equalityFieldColumns = null;
+
+  private IcebergSink(
+      TableLoader tableLoader,
+      Table table,
+      Map<String, String> snapshotProperties,
+      String uidPrefix,
+      Map<String, String> writeProperties,
+      RowType flinkRowType,
+      SerializableSupplier<Table> tableSupplier,
+      FlinkWriteConf flinkWriteConf,
+      List<Integer> equalityFieldIds,
+      String branch,
+      boolean overwriteMode) {
+    this.tableLoader = tableLoader;
+    this.snapshotProperties = snapshotProperties;
+    this.uidPrefix = uidPrefix;
+    this.writeProperties = writeProperties;
+    this.flinkRowType = flinkRowType;
+    this.tableSupplier = tableSupplier;
+    this.flinkWriteConf = flinkWriteConf;
+    this.equalityFieldIds = equalityFieldIds;
+    this.branch = branch;
+    this.overwriteMode = overwriteMode;
+    this.table = table;
+    this.upsertMode = flinkWriteConf.upsertMode();
+    this.dataFileFormat = flinkWriteConf.dataFileFormat();
+    this.targetDataFileSize = flinkWriteConf.targetDataFileSize();
+    this.workerPoolSize = flinkWriteConf.workerPoolSize();
+    // We generate a random UUID every time when a sink is created.
+    // This is used to separate files generated by different sinks writing the 
same table.
+    // Also used to generate the aggregator operator name
+    this.sinkId = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public SinkWriter<RowData> createWriter(InitContext context) {
+    RowDataTaskWriterFactory taskWriterFactory =
+        new RowDataTaskWriterFactory(
+            tableSupplier,
+            flinkRowType,
+            targetDataFileSize,
+            dataFileFormat,
+            writeProperties,
+            equalityFieldIds,
+            upsertMode);
+    IcebergStreamWriterMetrics metrics =
+        new IcebergStreamWriterMetrics(context.metricGroup(), table.name());
+    return new IcebergSinkWriter(
+        tableSupplier.get().name(),
+        taskWriterFactory,
+        metrics,
+        context.getSubtaskId(),
+        context.getAttemptNumber());
+  }
+
+  @Override
+  public Committer<IcebergCommittable> createCommitter(CommitterInitContext 
context) {
+    IcebergFilesCommitterMetrics metrics =
+        new IcebergFilesCommitterMetrics(context.metricGroup(), table.name());
+    return new IcebergCommitter(
+        tableLoader, branch, snapshotProperties, overwriteMode, 
workerPoolSize, sinkId, metrics);
+  }
+
+  @Override
+  public SimpleVersionedSerializer<IcebergCommittable> 
getCommittableSerializer() {
+    return new IcebergCommittableSerializer();
+  }
+
+  @Override
+  public void addPostCommitTopology(
+      DataStream<CommittableMessage<IcebergCommittable>> committables) {
+    // TODO Support small file compaction
+  }
+
+  @Override
+  public DataStream<RowData> addPreWriteTopology(DataStream<RowData> 
inputDataStream) {
+    return distributeDataStream(inputDataStream);
+  }
+
+  @Override
+  public DataStream<CommittableMessage<IcebergCommittable>> 
addPreCommitTopology(
+      DataStream<CommittableMessage<WriteResult>> writeResults) {
+    TypeInformation<CommittableMessage<IcebergCommittable>> typeInformation =
+        CommittableMessageTypeInfo.of(this::getCommittableSerializer);
+
+    // global forces all output records send to subtask 0 of the downstream 
committer operator.
+    // This is to ensure commit only happen in one committer subtask.
+    // Once upstream Flink provides the capability of setting committer 
operator
+    // parallelism to 1, this can be removed.
+    return writeResults
+        .global()
+        .transform(
+            suffixIfNotNull(uidPrefix, table.name() + "-" + sinkId + 
"-pre-commit-topology"),
+            typeInformation,
+            new IcebergWriteAggregator(tableLoader))
+        .uid(suffixIfNotNull(uidPrefix, "pre-commit-topology"))
+        .setParallelism(1)
+        .setMaxParallelism(1)
+        // global forces all output records send to subtask 0 of the 
downstream committer operator.
+        // This is to ensure commit only happen in one committer subtask.
+        // Once upstream Flink provides the capability of setting committer 
operator
+        // parallelism to 1, this can be removed.
+        .global();
+  }
+
+  @Override
+  public SimpleVersionedSerializer<WriteResult> getWriteResultSerializer() {
+    return new WriteResultSerializer();
+  }
+
+  public static class Builder {
+    private TableLoader tableLoader;
+    private String uidSuffix = null;
+    private Function<String, DataStream<RowData>> inputCreator = null;
+    private TableSchema tableSchema;
+    private SerializableTable table;
+    private final Map<String, String> writeOptions = Maps.newHashMap();
+    private final Map<String, String> snapshotSummary = Maps.newHashMap();
+    private ReadableConfig readableConfig = new Configuration();
+    private List<String> equalityFieldColumns = null;
+
+    private Builder() {}
+
+    private Builder forRowData(DataStream<RowData> newRowDataInput) {
+      this.inputCreator = ignored -> newRowDataInput;
+      return this;
+    }
+
+    private Builder forRow(DataStream<Row> input, TableSchema 
inputTableSchema) {
+      RowType rowType = (RowType) 
inputTableSchema.toRowDataType().getLogicalType();
+      DataType[] fieldDataTypes = inputTableSchema.getFieldDataTypes();
+
+      DataFormatConverters.RowConverter rowConverter =
+          new DataFormatConverters.RowConverter(fieldDataTypes);
+      return forMapperOutputType(
+              input, rowConverter::toInternal, 
FlinkCompatibilityUtil.toTypeInfo(rowType))
+          .tableSchema(inputTableSchema);
+    }
+
+    private <T> Builder forMapperOutputType(
+        DataStream<T> input, MapFunction<T, RowData> mapper, 
TypeInformation<RowData> outputType) {
+      this.inputCreator =
+          newUidSuffix -> {
+            // Input stream order is crucial for some situation(e.g. in cdc 
case). Therefore, we
+            // need to set the parallelism of map operator same as its input 
to keep map operator
+            // chaining its input, and avoid rebalanced by default.
+            SingleOutputStreamOperator<RowData> inputStream =
+                input.map(mapper, 
outputType).setParallelism(input.getParallelism());
+            if (newUidSuffix != null) {
+              inputStream.name(operatorName(newUidSuffix)).uid(newUidSuffix + 
"-mapper");
+            }
+            return inputStream;
+          };
+      return this;
+    }
+
+    /**
+     * This iceberg {@link SerializableTable} instance is used for 
initializing {@link
+     * IcebergStreamWriter} which will write all the records into {@link 
DataFile}s and emit them to
+     * downstream operator. Providing a table would avoid so many table 
loading from each separate
+     * task.
+     *
+     * @param newTable the loaded iceberg table instance.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder table(Table newTable) {
+      this.table = (SerializableTable) SerializableTable.copyOf(newTable);
+      return this;
+    }
+
+    /**
+     * The table loader is used for loading tables in {@link
+     * org.apache.iceberg.flink.sink.IcebergCommitter} lazily, we need this 
loader because {@link
+     * Table} is not serializable and could not just use the loaded table from 
Builder#table in the
+     * remote task manager.
+     *
+     * @param newTableLoader to load iceberg table inside tasks.
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder tableLoader(TableLoader newTableLoader) {
+      this.tableLoader = newTableLoader;
+      return this;
+    }
+
+    TableLoader tableLoader() {
+      return tableLoader;
+    }
+
+    /**
+     * Set the write properties for IcebergSink. View the supported properties 
in {@link
+     * FlinkWriteOptions}
+     */
+    public Builder set(String property, String value) {
+      writeOptions.put(property, value);
+      return this;
+    }
+
+    /**
+     * Set the write properties for IcebergSink. View the supported properties 
in {@link
+     * FlinkWriteOptions}
+     */
+    public Builder setAll(Map<String, String> properties) {
+      writeOptions.putAll(properties);
+      return this;
+    }
+
+    public Builder tableSchema(TableSchema newTableSchema) {
+      this.tableSchema = newTableSchema;
+      return this;
+    }
+
+    public Builder overwrite(boolean newOverwrite) {
+      writeOptions.put(FlinkWriteOptions.OVERWRITE_MODE.key(), 
Boolean.toString(newOverwrite));
+      return this;
+    }
+
+    public Builder flinkConf(ReadableConfig config) {
+      this.readableConfig = config;
+      return this;
+    }
+
+    /**
+     * Configure the write {@link DistributionMode} that the IcebergSink will 
use. Currently, flink
+     * support {@link DistributionMode#NONE} and {@link DistributionMode#HASH}.
+     *
+     * @param mode to specify the write distribution mode.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder distributionMode(DistributionMode mode) {
+      Preconditions.checkArgument(
+          !DistributionMode.RANGE.equals(mode),
+          "Flink does not support 'range' write distribution mode now.");
+      if (mode != null) {
+        writeOptions.put(FlinkWriteOptions.DISTRIBUTION_MODE.key(), 
mode.modeName());
+      }
+      return this;
+    }
+
+    /**
+     * Configuring the write parallel number for iceberg stream writer.
+     *
+     * @param newWriteParallelism the number of parallel iceberg stream writer.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder writeParallelism(int newWriteParallelism) {
+      writeOptions.put(
+          FlinkWriteOptions.WRITE_PARALLELISM.key(), 
Integer.toString(newWriteParallelism));
+      return this;
+    }
+
+    /**
+     * All INSERT/UPDATE_AFTER events from input stream will be transformed to 
UPSERT events, which
+     * means it will DELETE the old records and then INSERT the new records. 
In partitioned table,
+     * the partition fields should be a subset of equality fields, otherwise 
the old row that
+     * located in partition-A could not be deleted by the new row that located 
in partition-B.
+     *
+     * @param enabled indicate whether it should transform all 
INSERT/UPDATE_AFTER events to UPSERT.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder upsert(boolean enabled) {
+      writeOptions.put(FlinkWriteOptions.WRITE_UPSERT_ENABLED.key(), 
Boolean.toString(enabled));
+      return this;
+    }
+
+    /**
+     * Configuring the equality field columns for iceberg table that accept 
CDC or UPSERT events.
+     *
+     * @param columns defines the iceberg table's key.
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder equalityFieldColumns(List<String> columns) {
+      this.equalityFieldColumns = columns;
+      return this;
+    }
+
+    /**
+     * Set the uid suffix for IcebergSink operators. Note that IcebergSink 
internally consists of
+     * multiple operators (like writer, committer, aggregator). Actual 
operator uid will be
+     * prepended with a prefix like "Sink Committer: uidSuffix".
+     *
+     * <p>Flink auto generates operator uid if not set explicitly. It is a 
recommended <a
+     * 
href="https://ci.apache.org/projects/flink/flink-docs-master/docs/ops/production_ready/";>
+     * best-practice to set uid for all operators</a> before deploying to 
production. Flink has an
+     * option to {@code pipeline.auto-generate-uid=false} to disable 
auto-generation and force
+     * explicit setting of all operator uid.
+     *
+     * <p>Be careful with setting this for an existing job, because now we are 
changing the operator
+     * uid from an auto-generated one to this new value. When deploying the 
change with a
+     * checkpoint, Flink won't be able to restore the previous IcebergSink 
operator state (more
+     * specifically the committer operator state). You need to use {@code 
--allowNonRestoredState}
+     * to ignore the previous sink state. During restore IcebergSink state is 
used to check if last
+     * commit was actually successful or not. {@code --allowNonRestoredState} 
can lead to data loss
+     * if the Iceberg commit failed in the last completed checkpoint.
+     *
+     * @param newSuffix suffix for Flink sink operator uid and name
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder uidSuffix(String newSuffix) {
+      this.uidSuffix = newSuffix;
+      return this;
+    }
+
+    public Builder snapshotProperties(Map<String, String> properties) {
+      snapshotSummary.putAll(properties);
+      return this;
+    }
+
+    public Builder setSnapshotProperty(String property, String value) {
+      snapshotSummary.put(property, value);
+      return this;
+    }
+
+    public Builder toBranch(String branch) {
+      writeOptions.put(FlinkWriteOptions.BRANCH.key(), branch);
+      return this;
+    }
+
+    private String operatorName(String suffix) {
+      return uidSuffix != null ? uidSuffix + "-" + suffix : suffix;
+    }
+
+    IcebergSink build() {
+
+      Preconditions.checkArgument(
+          inputCreator != null,
+          "Please use forRowData() or forMapperOutputType() to initialize the 
input DataStream.");
+      Preconditions.checkNotNull(tableLoader(), "Table loader shouldn't be 
null");
+
+      // Set the table if it is not yet set in the builder, so we can do the 
equalityId checks
+      this.table = checkAndGetTable(tableLoader(), table);
+
+      // Init the `flinkWriteConf` here, so we can do the checks
+      FlinkWriteConf flinkWriteConf = new FlinkWriteConf(table, writeOptions, 
readableConfig);
+
+      Duration tableRefreshInterval = flinkWriteConf.tableRefreshInterval();
+      SerializableSupplier<Table> tableSupplier;
+      if (tableRefreshInterval != null) {
+        tableSupplier = new CachingTableSupplier(table, tableLoader(), 
tableRefreshInterval);
+      } else {
+        SerializableTable serializableTable = (SerializableTable) 
SerializableTable.copyOf(table);

Review Comment:
   this line is not needed since `table` is already a `SerializableTable`



##########
flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergSink.java:
##########
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.sink;
+
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION;
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION_STRATEGY;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+import java.util.function.Function;
+import org.apache.flink.annotation.Experimental;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.sink2.Committer;
+import org.apache.flink.api.connector.sink2.CommitterInitContext;
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.api.connector.sink2.SinkWriter;
+import org.apache.flink.api.connector.sink2.SupportsCommitter;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.streaming.api.connector.sink2.CommittableMessage;
+import 
org.apache.flink.streaming.api.connector.sink2.CommittableMessageTypeInfo;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPostCommitTopology;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPreCommitTopology;
+import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.util.DataFormatConverters;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DistributionMode;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.FlinkWriteConf;
+import org.apache.iceberg.flink.FlinkWriteOptions;
+import org.apache.iceberg.flink.TableLoader;
+import org.apache.iceberg.flink.util.FlinkCompatibilityUtil;
+import org.apache.iceberg.io.WriteResult;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.util.SerializableSupplier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Flink v2 sink offer different hooks to insert custom topologies into the 
sink. We will use the
+ * following:
+ *
+ * <ul>
+ *   <li>{@link SupportsPreWriteTopology} which redistributes the data to the 
writers based on the
+ *       {@link DistributionMode}
+ *   <li>{@link org.apache.flink.api.connector.sink2.SinkWriter} which writes 
data/delete files, and
+ *       generates the {@link org.apache.iceberg.io.WriteResult} objects for 
the files
+ *   <li>{@link SupportsPreCommitTopology} which we use to place the {@link
+ *       org.apache.iceberg.flink.sink.IcebergWriteAggregator} which merges 
the individual {@link
+ *       org.apache.flink.api.connector.sink2.SinkWriter}'s {@link
+ *       org.apache.iceberg.io.WriteResult}s to a single {@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}
+ *   <li>{@link org.apache.iceberg.flink.sink.IcebergCommitter} which commits 
the incoming{@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}s to the Iceberg 
table
+ *   <li>{@link SupportsPostCommitTopology} we could use for incremental 
compaction later. This is
+ *       not implemented yet.
+ * </ul>
+ *
+ * The job graph looks like below:
+ *
+ * <pre>{@code
+ *                            Flink sink
+ *               
+-----------------------------------------------------------------------------------+
+ *               |                                                             
                      |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ * | Map 1 | ==> | | writer 1 |                               | committer 1 | 
---> | post commit 1 | |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ *               |             \                             /                
\                      |
+ *               |              \                           /                  
\                     |
+ *               |               \                         /                   
 \                    |
+ * +-------+     | +----------+   \ +-------------------+ /   +-------------+  
  \ +---------------+ |
+ * | Map 2 | ==> | | writer 2 | --->| commit aggregator |     | committer 2 |  
    | post commit 2 | |
+ * +-------+     | +----------+     +-------------------+     +-------------+  
    +---------------+ |
+ *               |                                             Commit only on  
                      |
+ *               |                                             committer 1     
                      |
+ *               
+-----------------------------------------------------------------------------------+
+ * }</pre>
+ */
+@Experimental
+public class IcebergSink
+    implements Sink<RowData>,
+        SupportsPreWriteTopology<RowData>,
+        SupportsCommitter<IcebergCommittable>,
+        SupportsPreCommitTopology<WriteResult, IcebergCommittable>,
+        SupportsPostCommitTopology<IcebergCommittable> {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergSink.class);
+  private final TableLoader tableLoader;
+  private final Map<String, String> snapshotProperties;
+  private final String uidPrefix;
+  private final String sinkId;
+  private final Map<String, String> writeProperties;
+  private final RowType flinkRowType;
+  private final SerializableSupplier<Table> tableSupplier;
+  private final transient FlinkWriteConf flinkWriteConf;
+  private final List<Integer> equalityFieldIds;
+  private final boolean upsertMode;
+  private final FileFormat dataFileFormat;
+  private final long targetDataFileSize;
+  private final String branch;
+  private final boolean overwriteMode;
+  private final int workerPoolSize;
+
+  private final Table table;
+  private final List<String> equalityFieldColumns = null;
+
+  private IcebergSink(
+      TableLoader tableLoader,
+      Table table,
+      Map<String, String> snapshotProperties,
+      String uidPrefix,
+      Map<String, String> writeProperties,
+      RowType flinkRowType,
+      SerializableSupplier<Table> tableSupplier,
+      FlinkWriteConf flinkWriteConf,
+      List<Integer> equalityFieldIds,
+      String branch,
+      boolean overwriteMode) {
+    this.tableLoader = tableLoader;
+    this.snapshotProperties = snapshotProperties;
+    this.uidPrefix = uidPrefix;
+    this.writeProperties = writeProperties;
+    this.flinkRowType = flinkRowType;
+    this.tableSupplier = tableSupplier;
+    this.flinkWriteConf = flinkWriteConf;
+    this.equalityFieldIds = equalityFieldIds;
+    this.branch = branch;
+    this.overwriteMode = overwriteMode;
+    this.table = table;
+    this.upsertMode = flinkWriteConf.upsertMode();
+    this.dataFileFormat = flinkWriteConf.dataFileFormat();
+    this.targetDataFileSize = flinkWriteConf.targetDataFileSize();
+    this.workerPoolSize = flinkWriteConf.workerPoolSize();
+    // We generate a random UUID every time when a sink is created.
+    // This is used to separate files generated by different sinks writing the 
same table.
+    // Also used to generate the aggregator operator name
+    this.sinkId = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public SinkWriter<RowData> createWriter(InitContext context) {
+    RowDataTaskWriterFactory taskWriterFactory =
+        new RowDataTaskWriterFactory(
+            tableSupplier,
+            flinkRowType,
+            targetDataFileSize,
+            dataFileFormat,
+            writeProperties,
+            equalityFieldIds,
+            upsertMode);
+    IcebergStreamWriterMetrics metrics =
+        new IcebergStreamWriterMetrics(context.metricGroup(), table.name());
+    return new IcebergSinkWriter(
+        tableSupplier.get().name(),
+        taskWriterFactory,
+        metrics,
+        context.getSubtaskId(),
+        context.getAttemptNumber());
+  }
+
+  @Override
+  public Committer<IcebergCommittable> createCommitter(CommitterInitContext 
context) {
+    IcebergFilesCommitterMetrics metrics =
+        new IcebergFilesCommitterMetrics(context.metricGroup(), table.name());
+    return new IcebergCommitter(
+        tableLoader, branch, snapshotProperties, overwriteMode, 
workerPoolSize, sinkId, metrics);
+  }
+
+  @Override
+  public SimpleVersionedSerializer<IcebergCommittable> 
getCommittableSerializer() {
+    return new IcebergCommittableSerializer();
+  }
+
+  @Override
+  public void addPostCommitTopology(
+      DataStream<CommittableMessage<IcebergCommittable>> committables) {
+    // TODO Support small file compaction
+  }
+
+  @Override
+  public DataStream<RowData> addPreWriteTopology(DataStream<RowData> 
inputDataStream) {
+    return distributeDataStream(inputDataStream);
+  }
+
+  @Override
+  public DataStream<CommittableMessage<IcebergCommittable>> 
addPreCommitTopology(
+      DataStream<CommittableMessage<WriteResult>> writeResults) {
+    TypeInformation<CommittableMessage<IcebergCommittable>> typeInformation =
+        CommittableMessageTypeInfo.of(this::getCommittableSerializer);
+
+    // global forces all output records send to subtask 0 of the downstream 
committer operator.
+    // This is to ensure commit only happen in one committer subtask.
+    // Once upstream Flink provides the capability of setting committer 
operator
+    // parallelism to 1, this can be removed.
+    return writeResults
+        .global()
+        .transform(
+            suffixIfNotNull(uidPrefix, table.name() + "-" + sinkId + 
"-pre-commit-topology"),
+            typeInformation,
+            new IcebergWriteAggregator(tableLoader))
+        .uid(suffixIfNotNull(uidPrefix, "pre-commit-topology"))
+        .setParallelism(1)
+        .setMaxParallelism(1)
+        // global forces all output records send to subtask 0 of the 
downstream committer operator.
+        // This is to ensure commit only happen in one committer subtask.
+        // Once upstream Flink provides the capability of setting committer 
operator
+        // parallelism to 1, this can be removed.
+        .global();
+  }
+
+  @Override
+  public SimpleVersionedSerializer<WriteResult> getWriteResultSerializer() {
+    return new WriteResultSerializer();
+  }
+
+  public static class Builder {
+    private TableLoader tableLoader;
+    private String uidSuffix = null;
+    private Function<String, DataStream<RowData>> inputCreator = null;
+    private TableSchema tableSchema;
+    private SerializableTable table;
+    private final Map<String, String> writeOptions = Maps.newHashMap();
+    private final Map<String, String> snapshotSummary = Maps.newHashMap();
+    private ReadableConfig readableConfig = new Configuration();
+    private List<String> equalityFieldColumns = null;
+
+    private Builder() {}
+
+    private Builder forRowData(DataStream<RowData> newRowDataInput) {
+      this.inputCreator = ignored -> newRowDataInput;
+      return this;
+    }
+
+    private Builder forRow(DataStream<Row> input, TableSchema 
inputTableSchema) {
+      RowType rowType = (RowType) 
inputTableSchema.toRowDataType().getLogicalType();
+      DataType[] fieldDataTypes = inputTableSchema.getFieldDataTypes();
+
+      DataFormatConverters.RowConverter rowConverter =
+          new DataFormatConverters.RowConverter(fieldDataTypes);
+      return forMapperOutputType(
+              input, rowConverter::toInternal, 
FlinkCompatibilityUtil.toTypeInfo(rowType))
+          .tableSchema(inputTableSchema);
+    }
+
+    private <T> Builder forMapperOutputType(
+        DataStream<T> input, MapFunction<T, RowData> mapper, 
TypeInformation<RowData> outputType) {
+      this.inputCreator =
+          newUidSuffix -> {
+            // Input stream order is crucial for some situation(e.g. in cdc 
case). Therefore, we
+            // need to set the parallelism of map operator same as its input 
to keep map operator
+            // chaining its input, and avoid rebalanced by default.
+            SingleOutputStreamOperator<RowData> inputStream =
+                input.map(mapper, 
outputType).setParallelism(input.getParallelism());
+            if (newUidSuffix != null) {
+              inputStream.name(operatorName(newUidSuffix)).uid(newUidSuffix + 
"-mapper");
+            }
+            return inputStream;
+          };
+      return this;
+    }
+
+    /**
+     * This iceberg {@link SerializableTable} instance is used for 
initializing {@link
+     * IcebergStreamWriter} which will write all the records into {@link 
DataFile}s and emit them to
+     * downstream operator. Providing a table would avoid so many table 
loading from each separate
+     * task.
+     *
+     * @param newTable the loaded iceberg table instance.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder table(Table newTable) {
+      this.table = (SerializableTable) SerializableTable.copyOf(newTable);
+      return this;
+    }
+
+    /**
+     * The table loader is used for loading tables in {@link
+     * org.apache.iceberg.flink.sink.IcebergCommitter} lazily, we need this 
loader because {@link
+     * Table} is not serializable and could not just use the loaded table from 
Builder#table in the
+     * remote task manager.
+     *
+     * @param newTableLoader to load iceberg table inside tasks.
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder tableLoader(TableLoader newTableLoader) {
+      this.tableLoader = newTableLoader;
+      return this;
+    }
+
+    TableLoader tableLoader() {
+      return tableLoader;
+    }
+
+    /**
+     * Set the write properties for IcebergSink. View the supported properties 
in {@link
+     * FlinkWriteOptions}
+     */
+    public Builder set(String property, String value) {
+      writeOptions.put(property, value);
+      return this;
+    }
+
+    /**
+     * Set the write properties for IcebergSink. View the supported properties 
in {@link
+     * FlinkWriteOptions}
+     */
+    public Builder setAll(Map<String, String> properties) {
+      writeOptions.putAll(properties);
+      return this;
+    }
+
+    public Builder tableSchema(TableSchema newTableSchema) {
+      this.tableSchema = newTableSchema;
+      return this;
+    }
+
+    public Builder overwrite(boolean newOverwrite) {
+      writeOptions.put(FlinkWriteOptions.OVERWRITE_MODE.key(), 
Boolean.toString(newOverwrite));
+      return this;
+    }
+
+    public Builder flinkConf(ReadableConfig config) {
+      this.readableConfig = config;
+      return this;
+    }
+
+    /**
+     * Configure the write {@link DistributionMode} that the IcebergSink will 
use. Currently, flink
+     * support {@link DistributionMode#NONE} and {@link DistributionMode#HASH}.
+     *
+     * @param mode to specify the write distribution mode.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder distributionMode(DistributionMode mode) {
+      Preconditions.checkArgument(
+          !DistributionMode.RANGE.equals(mode),
+          "Flink does not support 'range' write distribution mode now.");
+      if (mode != null) {
+        writeOptions.put(FlinkWriteOptions.DISTRIBUTION_MODE.key(), 
mode.modeName());
+      }
+      return this;
+    }
+
+    /**
+     * Configuring the write parallel number for iceberg stream writer.
+     *
+     * @param newWriteParallelism the number of parallel iceberg stream writer.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder writeParallelism(int newWriteParallelism) {
+      writeOptions.put(
+          FlinkWriteOptions.WRITE_PARALLELISM.key(), 
Integer.toString(newWriteParallelism));
+      return this;
+    }
+
+    /**
+     * All INSERT/UPDATE_AFTER events from input stream will be transformed to 
UPSERT events, which
+     * means it will DELETE the old records and then INSERT the new records. 
In partitioned table,
+     * the partition fields should be a subset of equality fields, otherwise 
the old row that
+     * located in partition-A could not be deleted by the new row that located 
in partition-B.
+     *
+     * @param enabled indicate whether it should transform all 
INSERT/UPDATE_AFTER events to UPSERT.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder upsert(boolean enabled) {
+      writeOptions.put(FlinkWriteOptions.WRITE_UPSERT_ENABLED.key(), 
Boolean.toString(enabled));
+      return this;
+    }
+
+    /**
+     * Configuring the equality field columns for iceberg table that accept 
CDC or UPSERT events.
+     *
+     * @param columns defines the iceberg table's key.
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder equalityFieldColumns(List<String> columns) {
+      this.equalityFieldColumns = columns;
+      return this;
+    }
+
+    /**
+     * Set the uid suffix for IcebergSink operators. Note that IcebergSink 
internally consists of
+     * multiple operators (like writer, committer, aggregator). Actual 
operator uid will be
+     * prepended with a prefix like "Sink Committer: uidSuffix".
+     *
+     * <p>Flink auto generates operator uid if not set explicitly. It is a 
recommended <a
+     * 
href="https://ci.apache.org/projects/flink/flink-docs-master/docs/ops/production_ready/";>
+     * best-practice to set uid for all operators</a> before deploying to 
production. Flink has an
+     * option to {@code pipeline.auto-generate-uid=false} to disable 
auto-generation and force
+     * explicit setting of all operator uid.
+     *
+     * <p>Be careful with setting this for an existing job, because now we are 
changing the operator
+     * uid from an auto-generated one to this new value. When deploying the 
change with a
+     * checkpoint, Flink won't be able to restore the previous IcebergSink 
operator state (more
+     * specifically the committer operator state). You need to use {@code 
--allowNonRestoredState}
+     * to ignore the previous sink state. During restore IcebergSink state is 
used to check if last
+     * commit was actually successful or not. {@code --allowNonRestoredState} 
can lead to data loss
+     * if the Iceberg commit failed in the last completed checkpoint.
+     *
+     * @param newSuffix suffix for Flink sink operator uid and name
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder uidSuffix(String newSuffix) {
+      this.uidSuffix = newSuffix;
+      return this;
+    }
+
+    public Builder snapshotProperties(Map<String, String> properties) {
+      snapshotSummary.putAll(properties);
+      return this;
+    }
+
+    public Builder setSnapshotProperty(String property, String value) {
+      snapshotSummary.put(property, value);
+      return this;
+    }
+
+    public Builder toBranch(String branch) {
+      writeOptions.put(FlinkWriteOptions.BRANCH.key(), branch);
+      return this;
+    }
+
+    private String operatorName(String suffix) {
+      return uidSuffix != null ? uidSuffix + "-" + suffix : suffix;
+    }
+
+    IcebergSink build() {
+
+      Preconditions.checkArgument(
+          inputCreator != null,
+          "Please use forRowData() or forMapperOutputType() to initialize the 
input DataStream.");
+      Preconditions.checkNotNull(tableLoader(), "Table loader shouldn't be 
null");
+
+      // Set the table if it is not yet set in the builder, so we can do the 
equalityId checks
+      this.table = checkAndGetTable(tableLoader(), table);
+
+      // Init the `flinkWriteConf` here, so we can do the checks
+      FlinkWriteConf flinkWriteConf = new FlinkWriteConf(table, writeOptions, 
readableConfig);
+
+      Duration tableRefreshInterval = flinkWriteConf.tableRefreshInterval();
+      SerializableSupplier<Table> tableSupplier;
+      if (tableRefreshInterval != null) {
+        tableSupplier = new CachingTableSupplier(table, tableLoader(), 
tableRefreshInterval);
+      } else {
+        SerializableTable serializableTable = (SerializableTable) 
SerializableTable.copyOf(table);
+        tableSupplier = () -> serializableTable;
+      }
+
+      boolean overwriteMode = flinkWriteConf.overwriteMode();
+
+      // Validate the equality fields and partition fields if we enable the 
upsert mode.
+      List<Integer> equalityFieldIds =
+          SinkUtil.checkAndGetEqualityFieldIds(table, equalityFieldColumns);
+
+      if (flinkWriteConf.upsertMode()) {
+        Preconditions.checkState(
+            !overwriteMode,
+            "OVERWRITE mode shouldn't be enable when configuring to use UPSERT 
data stream.");
+        Preconditions.checkState(
+            !equalityFieldIds.isEmpty(),
+            "Equality field columns shouldn't be empty when configuring to use 
UPSERT data stream.");
+        if (!table.spec().isUnpartitioned()) {
+          for (PartitionField partitionField : table.spec().fields()) {
+            Preconditions.checkState(
+                equalityFieldIds.contains(partitionField.sourceId()),
+                "In UPSERT mode, partition field '%s' should be included in 
equality fields: '%s'",
+                partitionField,
+                equalityFieldColumns);
+          }
+        }
+      }
+
+      return new IcebergSink(
+          tableLoader,
+          table,
+          snapshotSummary,
+          Optional.ofNullable(uidSuffix).orElse(""),
+          writeProperties(table, flinkWriteConf.dataFileFormat(), 
flinkWriteConf),
+          toFlinkRowType(table.schema(), tableSchema),
+          tableSupplier,
+          flinkWriteConf,
+          equalityFieldIds,
+          flinkWriteConf.branch(),
+          overwriteMode);
+    }
+
+    /**
+     * Append the iceberg sink operators to write records to iceberg table.
+     *
+     * @return {@link DataStreamSink} for sink.
+     */
+    public DataStreamSink<RowData> append() {
+      IcebergSink sink = build();
+      DataStream<RowData> rowDataInput = inputCreator.apply(uidSuffix);
+      DataStreamSink<RowData> rowDataDataStreamSink =

Review Comment:
   add a comment to explain that `V2 sink framework would apply the uid here to 
the framework created operators like writer, committer. E.g. "Sink writer: 
<uidSuffix>"`



##########
flink/v1.19/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergSink.java:
##########
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.flink.sink;
+
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION;
+import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION;
+import static org.apache.iceberg.TableProperties.ORC_COMPRESSION_STRATEGY;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION;
+import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION_LEVEL;
+import static org.apache.iceberg.TableProperties.WRITE_DISTRIBUTION_MODE;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+import java.util.function.Function;
+import org.apache.flink.annotation.Experimental;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.sink2.Committer;
+import org.apache.flink.api.connector.sink2.CommitterInitContext;
+import org.apache.flink.api.connector.sink2.Sink;
+import org.apache.flink.api.connector.sink2.SinkWriter;
+import org.apache.flink.api.connector.sink2.SupportsCommitter;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.streaming.api.connector.sink2.CommittableMessage;
+import 
org.apache.flink.streaming.api.connector.sink2.CommittableMessageTypeInfo;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPostCommitTopology;
+import 
org.apache.flink.streaming.api.connector.sink2.SupportsPreCommitTopology;
+import org.apache.flink.streaming.api.connector.sink2.SupportsPreWriteTopology;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.util.DataFormatConverters;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DistributionMode;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.FlinkWriteConf;
+import org.apache.iceberg.flink.FlinkWriteOptions;
+import org.apache.iceberg.flink.TableLoader;
+import org.apache.iceberg.flink.util.FlinkCompatibilityUtil;
+import org.apache.iceberg.io.WriteResult;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.util.SerializableSupplier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Flink v2 sink offer different hooks to insert custom topologies into the 
sink. We will use the
+ * following:
+ *
+ * <ul>
+ *   <li>{@link SupportsPreWriteTopology} which redistributes the data to the 
writers based on the
+ *       {@link DistributionMode}
+ *   <li>{@link org.apache.flink.api.connector.sink2.SinkWriter} which writes 
data/delete files, and
+ *       generates the {@link org.apache.iceberg.io.WriteResult} objects for 
the files
+ *   <li>{@link SupportsPreCommitTopology} which we use to place the {@link
+ *       org.apache.iceberg.flink.sink.IcebergWriteAggregator} which merges 
the individual {@link
+ *       org.apache.flink.api.connector.sink2.SinkWriter}'s {@link
+ *       org.apache.iceberg.io.WriteResult}s to a single {@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}
+ *   <li>{@link org.apache.iceberg.flink.sink.IcebergCommitter} which commits 
the incoming{@link
+ *       org.apache.iceberg.flink.sink.IcebergCommittable}s to the Iceberg 
table
+ *   <li>{@link SupportsPostCommitTopology} we could use for incremental 
compaction later. This is
+ *       not implemented yet.
+ * </ul>
+ *
+ * The job graph looks like below:
+ *
+ * <pre>{@code
+ *                            Flink sink
+ *               
+-----------------------------------------------------------------------------------+
+ *               |                                                             
                      |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ * | Map 1 | ==> | | writer 1 |                               | committer 1 | 
---> | post commit 1 | |
+ * +-------+     | +----------+                               +-------------+  
    +---------------+ |
+ *               |             \                             /                
\                      |
+ *               |              \                           /                  
\                     |
+ *               |               \                         /                   
 \                    |
+ * +-------+     | +----------+   \ +-------------------+ /   +-------------+  
  \ +---------------+ |
+ * | Map 2 | ==> | | writer 2 | --->| commit aggregator |     | committer 2 |  
    | post commit 2 | |
+ * +-------+     | +----------+     +-------------------+     +-------------+  
    +---------------+ |
+ *               |                                             Commit only on  
                      |
+ *               |                                             committer 1     
                      |
+ *               
+-----------------------------------------------------------------------------------+
+ * }</pre>
+ */
+@Experimental
+public class IcebergSink
+    implements Sink<RowData>,
+        SupportsPreWriteTopology<RowData>,
+        SupportsCommitter<IcebergCommittable>,
+        SupportsPreCommitTopology<WriteResult, IcebergCommittable>,
+        SupportsPostCommitTopology<IcebergCommittable> {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergSink.class);
+  private final TableLoader tableLoader;
+  private final Map<String, String> snapshotProperties;
+  private final String uidPrefix;
+  private final String sinkId;
+  private final Map<String, String> writeProperties;
+  private final RowType flinkRowType;
+  private final SerializableSupplier<Table> tableSupplier;
+  private final transient FlinkWriteConf flinkWriteConf;
+  private final List<Integer> equalityFieldIds;
+  private final boolean upsertMode;
+  private final FileFormat dataFileFormat;
+  private final long targetDataFileSize;
+  private final String branch;
+  private final boolean overwriteMode;
+  private final int workerPoolSize;
+
+  private final Table table;
+  private final List<String> equalityFieldColumns = null;
+
+  private IcebergSink(
+      TableLoader tableLoader,
+      Table table,
+      Map<String, String> snapshotProperties,
+      String uidPrefix,
+      Map<String, String> writeProperties,
+      RowType flinkRowType,
+      SerializableSupplier<Table> tableSupplier,
+      FlinkWriteConf flinkWriteConf,
+      List<Integer> equalityFieldIds,
+      String branch,
+      boolean overwriteMode) {
+    this.tableLoader = tableLoader;
+    this.snapshotProperties = snapshotProperties;
+    this.uidPrefix = uidPrefix;
+    this.writeProperties = writeProperties;
+    this.flinkRowType = flinkRowType;
+    this.tableSupplier = tableSupplier;
+    this.flinkWriteConf = flinkWriteConf;
+    this.equalityFieldIds = equalityFieldIds;
+    this.branch = branch;
+    this.overwriteMode = overwriteMode;
+    this.table = table;
+    this.upsertMode = flinkWriteConf.upsertMode();
+    this.dataFileFormat = flinkWriteConf.dataFileFormat();
+    this.targetDataFileSize = flinkWriteConf.targetDataFileSize();
+    this.workerPoolSize = flinkWriteConf.workerPoolSize();
+    // We generate a random UUID every time when a sink is created.
+    // This is used to separate files generated by different sinks writing the 
same table.
+    // Also used to generate the aggregator operator name
+    this.sinkId = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public SinkWriter<RowData> createWriter(InitContext context) {
+    RowDataTaskWriterFactory taskWriterFactory =
+        new RowDataTaskWriterFactory(
+            tableSupplier,
+            flinkRowType,
+            targetDataFileSize,
+            dataFileFormat,
+            writeProperties,
+            equalityFieldIds,
+            upsertMode);
+    IcebergStreamWriterMetrics metrics =
+        new IcebergStreamWriterMetrics(context.metricGroup(), table.name());
+    return new IcebergSinkWriter(
+        tableSupplier.get().name(),
+        taskWriterFactory,
+        metrics,
+        context.getSubtaskId(),
+        context.getAttemptNumber());
+  }
+
+  @Override
+  public Committer<IcebergCommittable> createCommitter(CommitterInitContext 
context) {
+    IcebergFilesCommitterMetrics metrics =
+        new IcebergFilesCommitterMetrics(context.metricGroup(), table.name());
+    return new IcebergCommitter(
+        tableLoader, branch, snapshotProperties, overwriteMode, 
workerPoolSize, sinkId, metrics);
+  }
+
+  @Override
+  public SimpleVersionedSerializer<IcebergCommittable> 
getCommittableSerializer() {
+    return new IcebergCommittableSerializer();
+  }
+
+  @Override
+  public void addPostCommitTopology(
+      DataStream<CommittableMessage<IcebergCommittable>> committables) {
+    // TODO Support small file compaction
+  }
+
+  @Override
+  public DataStream<RowData> addPreWriteTopology(DataStream<RowData> 
inputDataStream) {
+    return distributeDataStream(inputDataStream);
+  }
+
+  @Override
+  public DataStream<CommittableMessage<IcebergCommittable>> 
addPreCommitTopology(
+      DataStream<CommittableMessage<WriteResult>> writeResults) {
+    TypeInformation<CommittableMessage<IcebergCommittable>> typeInformation =
+        CommittableMessageTypeInfo.of(this::getCommittableSerializer);
+
+    // global forces all output records send to subtask 0 of the downstream 
committer operator.
+    // This is to ensure commit only happen in one committer subtask.
+    // Once upstream Flink provides the capability of setting committer 
operator
+    // parallelism to 1, this can be removed.
+    return writeResults
+        .global()
+        .transform(
+            suffixIfNotNull(uidPrefix, table.name() + "-" + sinkId + 
"-pre-commit-topology"),
+            typeInformation,
+            new IcebergWriteAggregator(tableLoader))
+        .uid(suffixIfNotNull(uidPrefix, "pre-commit-topology"))
+        .setParallelism(1)
+        .setMaxParallelism(1)
+        // global forces all output records send to subtask 0 of the 
downstream committer operator.
+        // This is to ensure commit only happen in one committer subtask.
+        // Once upstream Flink provides the capability of setting committer 
operator
+        // parallelism to 1, this can be removed.
+        .global();
+  }
+
+  @Override
+  public SimpleVersionedSerializer<WriteResult> getWriteResultSerializer() {
+    return new WriteResultSerializer();
+  }
+
+  public static class Builder {
+    private TableLoader tableLoader;
+    private String uidSuffix = null;
+    private Function<String, DataStream<RowData>> inputCreator = null;
+    private TableSchema tableSchema;
+    private SerializableTable table;
+    private final Map<String, String> writeOptions = Maps.newHashMap();
+    private final Map<String, String> snapshotSummary = Maps.newHashMap();
+    private ReadableConfig readableConfig = new Configuration();
+    private List<String> equalityFieldColumns = null;
+
+    private Builder() {}
+
+    private Builder forRowData(DataStream<RowData> newRowDataInput) {
+      this.inputCreator = ignored -> newRowDataInput;
+      return this;
+    }
+
+    private Builder forRow(DataStream<Row> input, TableSchema 
inputTableSchema) {
+      RowType rowType = (RowType) 
inputTableSchema.toRowDataType().getLogicalType();
+      DataType[] fieldDataTypes = inputTableSchema.getFieldDataTypes();
+
+      DataFormatConverters.RowConverter rowConverter =
+          new DataFormatConverters.RowConverter(fieldDataTypes);
+      return forMapperOutputType(
+              input, rowConverter::toInternal, 
FlinkCompatibilityUtil.toTypeInfo(rowType))
+          .tableSchema(inputTableSchema);
+    }
+
+    private <T> Builder forMapperOutputType(
+        DataStream<T> input, MapFunction<T, RowData> mapper, 
TypeInformation<RowData> outputType) {
+      this.inputCreator =
+          newUidSuffix -> {
+            // Input stream order is crucial for some situation(e.g. in cdc 
case). Therefore, we
+            // need to set the parallelism of map operator same as its input 
to keep map operator
+            // chaining its input, and avoid rebalanced by default.
+            SingleOutputStreamOperator<RowData> inputStream =
+                input.map(mapper, 
outputType).setParallelism(input.getParallelism());
+            if (newUidSuffix != null) {
+              inputStream.name(operatorName(newUidSuffix)).uid(newUidSuffix + 
"-mapper");
+            }
+            return inputStream;
+          };
+      return this;
+    }
+
+    /**
+     * This iceberg {@link SerializableTable} instance is used for 
initializing {@link
+     * IcebergStreamWriter} which will write all the records into {@link 
DataFile}s and emit them to
+     * downstream operator. Providing a table would avoid so many table 
loading from each separate
+     * task.
+     *
+     * @param newTable the loaded iceberg table instance.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder table(Table newTable) {
+      this.table = (SerializableTable) SerializableTable.copyOf(newTable);
+      return this;
+    }
+
+    /**
+     * The table loader is used for loading tables in {@link
+     * org.apache.iceberg.flink.sink.IcebergCommitter} lazily, we need this 
loader because {@link
+     * Table} is not serializable and could not just use the loaded table from 
Builder#table in the
+     * remote task manager.
+     *
+     * @param newTableLoader to load iceberg table inside tasks.
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder tableLoader(TableLoader newTableLoader) {
+      this.tableLoader = newTableLoader;
+      return this;
+    }
+
+    TableLoader tableLoader() {
+      return tableLoader;
+    }
+
+    /**
+     * Set the write properties for IcebergSink. View the supported properties 
in {@link
+     * FlinkWriteOptions}
+     */
+    public Builder set(String property, String value) {
+      writeOptions.put(property, value);
+      return this;
+    }
+
+    /**
+     * Set the write properties for IcebergSink. View the supported properties 
in {@link
+     * FlinkWriteOptions}
+     */
+    public Builder setAll(Map<String, String> properties) {
+      writeOptions.putAll(properties);
+      return this;
+    }
+
+    public Builder tableSchema(TableSchema newTableSchema) {
+      this.tableSchema = newTableSchema;
+      return this;
+    }
+
+    public Builder overwrite(boolean newOverwrite) {
+      writeOptions.put(FlinkWriteOptions.OVERWRITE_MODE.key(), 
Boolean.toString(newOverwrite));
+      return this;
+    }
+
+    public Builder flinkConf(ReadableConfig config) {
+      this.readableConfig = config;
+      return this;
+    }
+
+    /**
+     * Configure the write {@link DistributionMode} that the IcebergSink will 
use. Currently, flink
+     * support {@link DistributionMode#NONE} and {@link DistributionMode#HASH}.
+     *
+     * @param mode to specify the write distribution mode.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder distributionMode(DistributionMode mode) {
+      Preconditions.checkArgument(
+          !DistributionMode.RANGE.equals(mode),
+          "Flink does not support 'range' write distribution mode now.");
+      if (mode != null) {
+        writeOptions.put(FlinkWriteOptions.DISTRIBUTION_MODE.key(), 
mode.modeName());
+      }
+      return this;
+    }
+
+    /**
+     * Configuring the write parallel number for iceberg stream writer.
+     *
+     * @param newWriteParallelism the number of parallel iceberg stream writer.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder writeParallelism(int newWriteParallelism) {
+      writeOptions.put(
+          FlinkWriteOptions.WRITE_PARALLELISM.key(), 
Integer.toString(newWriteParallelism));
+      return this;
+    }
+
+    /**
+     * All INSERT/UPDATE_AFTER events from input stream will be transformed to 
UPSERT events, which
+     * means it will DELETE the old records and then INSERT the new records. 
In partitioned table,
+     * the partition fields should be a subset of equality fields, otherwise 
the old row that
+     * located in partition-A could not be deleted by the new row that located 
in partition-B.
+     *
+     * @param enabled indicate whether it should transform all 
INSERT/UPDATE_AFTER events to UPSERT.
+     * @return {@link IcebergSink.Builder} to connect the iceberg table.
+     */
+    public Builder upsert(boolean enabled) {
+      writeOptions.put(FlinkWriteOptions.WRITE_UPSERT_ENABLED.key(), 
Boolean.toString(enabled));
+      return this;
+    }
+
+    /**
+     * Configuring the equality field columns for iceberg table that accept 
CDC or UPSERT events.
+     *
+     * @param columns defines the iceberg table's key.
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder equalityFieldColumns(List<String> columns) {
+      this.equalityFieldColumns = columns;
+      return this;
+    }
+
+    /**
+     * Set the uid suffix for IcebergSink operators. Note that IcebergSink 
internally consists of
+     * multiple operators (like writer, committer, aggregator). Actual 
operator uid will be
+     * prepended with a prefix like "Sink Committer: uidSuffix".
+     *
+     * <p>Flink auto generates operator uid if not set explicitly. It is a 
recommended <a
+     * 
href="https://ci.apache.org/projects/flink/flink-docs-master/docs/ops/production_ready/";>
+     * best-practice to set uid for all operators</a> before deploying to 
production. Flink has an
+     * option to {@code pipeline.auto-generate-uid=false} to disable 
auto-generation and force
+     * explicit setting of all operator uid.
+     *
+     * <p>Be careful with setting this for an existing job, because now we are 
changing the operator
+     * uid from an auto-generated one to this new value. When deploying the 
change with a
+     * checkpoint, Flink won't be able to restore the previous IcebergSink 
operator state (more
+     * specifically the committer operator state). You need to use {@code 
--allowNonRestoredState}
+     * to ignore the previous sink state. During restore IcebergSink state is 
used to check if last
+     * commit was actually successful or not. {@code --allowNonRestoredState} 
can lead to data loss
+     * if the Iceberg commit failed in the last completed checkpoint.
+     *
+     * @param newSuffix suffix for Flink sink operator uid and name
+     * @return {@link Builder} to connect the iceberg table.
+     */
+    public Builder uidSuffix(String newSuffix) {
+      this.uidSuffix = newSuffix;
+      return this;
+    }
+
+    public Builder snapshotProperties(Map<String, String> properties) {
+      snapshotSummary.putAll(properties);
+      return this;
+    }
+
+    public Builder setSnapshotProperty(String property, String value) {
+      snapshotSummary.put(property, value);
+      return this;
+    }
+
+    public Builder toBranch(String branch) {
+      writeOptions.put(FlinkWriteOptions.BRANCH.key(), branch);
+      return this;
+    }
+
+    private String operatorName(String suffix) {

Review Comment:
   this seems wrong. it probably should be
   
   ```
   private String operatorName(String prefix) {
     return uidSuffix != null ? prefix + "-" + uidSuffix : prefix;
   }
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Introduces the new IcebergSink based on the new V2 Flink Sink Abstraction [iceberg]

Reply via email to