laskoviymishka commented on code in PR #982:
URL: https://github.com/apache/iceberg-go/pull/982#discussion_r3202392992


##########
table/commit_retry_test.go:
##########
@@ -266,3 +307,244 @@ func TestReadRetryConfig_ClampsNegativeProperties(t 
*testing.T) {
        assert.Equal(t, uint(CommitMaxRetryWaitMsDefault), cfg.maxWaitMs)
        assert.Equal(t, uint(CommitTotalRetryTimeoutMsDefault), 
cfg.totalTimeoutMs)
 }
+
+// ---------------------------------------------------------------------------
+// Fix 5 — mandatory WriteFileIO check at top of doCommit
+// ---------------------------------------------------------------------------
+
+// TestDoCommit_NonWriteFileIOReturnsError verifies that doCommit fails
+// immediately when the table's file system does not implement WriteFileIO.
+// A silent skip would reuse the stale manifest list — exactly the bug
+// this PR was designed to fix.
+func TestDoCommit_NonWriteFileIOReturnsError(t *testing.T) {
+       cat := &flakyCatalog{}
+       schema := iceberg.NewSchema(0,
+               iceberg.NestedField{ID: 1, Name: "id", Type: 
iceberg.PrimitiveTypes.Int64, Required: true},
+       )
+       meta, err := NewMetadata(schema, iceberg.UnpartitionedSpec, 
UnsortedSortOrder, "file:///tmp/rotest",
+               iceberg.Properties{PropertyFormatVersion: "2"})
+       require.NoError(t, err)
+       cat.metadata = meta
+
+       tbl := New(
+               Identifier{"db", "ro-test"},
+               meta, "file:///tmp/rotest/v1.metadata.json",
+               func(context.Context) (iceio.IO, error) { return readOnlyIO{}, 
nil },
+               cat,
+       )
+
+       _, doErr := tbl.doCommit(t.Context(), nil, nil)
+       require.Error(t, doErr, "doCommit must fail when FS does not implement 
WriteFileIO")
+       assert.Contains(t, doErr.Error(), "WriteFileIO",
+               "error message must mention WriteFileIO so callers understand 
the requirement")
+       assert.Equal(t, int32(0), cat.attempts.Load(),
+               "CommitTable must not be called when FS check fails")
+}
+
+// ---------------------------------------------------------------------------
+// Fix 6 — orphan cleanup via defer
+// ---------------------------------------------------------------------------
+
+// newOCCTable creates a table that uses the given wfs for its FS and the given
+// catalog for commits. meta should include retry-config properties so that
+// doCommit's retry loop allows at least one retry.
+func newOCCTable(t *testing.T, meta Metadata, wfs iceio.WriteFileIO, cat 
CatalogIO) *Table {
+       t.Helper()
+
+       return New(
+               Identifier{"db", "occ-cleanup-test"},
+               meta,
+               "mem://default/table-location/metadata/v1.metadata.json",
+               func(context.Context) (iceio.IO, error) { return wfs, nil },
+               cat,
+       )
+}
+
+// newMemIOWithRetryMeta creates a test memIO and a matching table Metadata 
that
+// includes retry-config properties, so doCommit's retry loop allows retries.
+// The location matches createTestTransactionWithMemIO so they share the same
+// memIO for writing manifest files.
+func newMemIOWithRetryMeta(t *testing.T, spec iceberg.PartitionSpec) (*memIO, 
Metadata) {
+       t.Helper()
+       wfs := newMemIO(1<<20, nil)
+       schema := simpleSchema()
+       meta, err := NewMetadata(schema, &spec, UnsortedSortOrder, 
"mem://default/table-location",
+               iceberg.Properties{
+                       CommitNumRetriesKey:          "3",
+                       CommitMinRetryWaitMsKey:      "1",
+                       CommitMaxRetryWaitMsKey:      "2",
+                       CommitTotalRetryTimeoutMsKey: "60000",
+               })
+       require.NoError(t, err, "new metadata")
+
+       return wfs, meta
+}
+
+// TestDoCommit_OrphanCleanedOnSuccess verifies that manifest-list files
+// orphaned by OCC retries are removed after a successful commit. These files
+// are written during rebuild and must not leak on the happy path.
+func TestDoCommit_OrphanCleanedOnSuccess(t *testing.T) {
+       spec := iceberg.NewPartitionSpec()
+       wfs, meta := newMemIOWithRetryMeta(t, spec)
+
+       // Build a transaction from the retry-enabled meta and commit via the 
producer.
+       tbl := newOCCTable(t, meta, wfs, nil)
+       txn := tbl.NewTransaction()
+       sp := newFastAppendFilesProducer(OpAppend, txn, wfs, nil, nil)
+       sp.appendDataFile(newTestDataFile(t, spec, 
"mem://default/table-location/data/f.parquet", nil))
+
+       updates, reqs, err := sp.commit(context.Background())
+       require.NoError(t, err)
+       addSnap := updates[0].(*addSnapshotUpdate)
+       originalManifestList := addSnap.Snapshot.ManifestList
+
+       // Pre-populate the original manifest list path in the IO so that the
+       // defer's wfs.Remove call has a concrete entry to delete.
+       wfs.files[originalManifestList] = []byte("placeholder")
+
+       // Catalog: fail once with ErrCommitFailed (triggers rebuild that 
orphans
+       // originalManifestList), then succeed.
+       cat := &sequentialCatalog{
+               metadata: meta,
+               errs:     []error{ErrCommitFailed},
+       }
+       tbl = newOCCTable(t, meta, wfs, cat)
+
+       _, err = tbl.doCommit(t.Context(), updates, reqs, 
withCommitBranch(MainBranch))
+       require.NoError(t, err, "doCommit must succeed on the second attempt")
+
+       _, stillExists := wfs.files[originalManifestList]

Review Comment:
   [Important] The orphan tests don't actually exercise a real write→delete 
cycle. `memIO.Create` returns a `limitedWriteCloser` whose contents are not 
persisted into `wfs.files`, so the rebuilt manifest list path is never present 
in `wfs.files` to begin with. The test pre-populates the *original* path with a 
placeholder (line 403) and asserts it was removed — but a regression where the 
cleanup loop deleted the wrong path (e.g., the *committed* manifest list 
instead of the orphan) would slip through, because the committed path was never 
persisted either. Two ways to close this: (a) make `memIO.Create` persist on 
`Close()` so the test exercises the real flow; or (b) add `assert.NotEqual(t, 
originalManifestList, cat.metadata.CurrentSnapshot().ManifestList)` plus a 
positive assertion on the live path's existence. Same applies to the other 
three `TestDoCommit_Orphan*` tests.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to