laskoviymishka commented on code in PR #983:
URL: https://github.com/apache/iceberg-go/pull/983#discussion_r3188460747
##########
table/conflict_validation.go:
##########
@@ -426,6 +426,89 @@ func validateNoConflictingDataFiles(ctx *conflictContext,
filter iceberg.Boolean
return validateAddedDataFilesMatchingFilter(ctx, filter)
}
+// validateNoConflictingDataFilesInPartitions is like
+// validateNoConflictingDataFiles but scoped to specific partition
+// tuples derived from equality-delete files. It only flags concurrent
+// data files whose partition tuple matches one of the provided tuples,
+// avoiding false conflicts when a concurrent append lands in a
+// completely different partition.
+//
+// When any provided partition tuple is empty (the table is
+// unpartitioned or the delete file covers all partitions), the check
+// falls back to AlwaysTrue — the equality delete could affect any row.
+//
+// Under IsolationSnapshot this validator is a no-op.
+func validateNoConflictingDataFilesInPartitions(ctx *conflictContext,
eqDeletePartitions []map[int]any, level IsolationLevel) error {
+ if level != IsolationSerializable {
+ return nil
+ }
+
+ if len(ctx.concurrent) == 0 || len(eqDeletePartitions) == 0 {
+ return nil
+ }
+
+ // If any eq-delete file is unpartitioned (empty tuple), the delete
+ // could affect any row — fall back to the conservative AlwaysTrue
check.
+ for _, p := range eqDeletePartitions {
+ if len(p) == 0 {
+ return validateAddedDataFilesMatchingFilter(ctx,
iceberg.AlwaysTrue{})
+ }
+ }
+
+ // Build a set of partition tuple keys for O(1) lookup.
+ partSet := make(map[string]struct{}, len(eqDeletePartitions))
+ for _, p := range eqDeletePartitions {
+ partSet[partitionTupleKey(p)] = struct{}{}
+ }
+
+ for _, snap := range ctx.concurrent {
+ manifests, err := snap.Manifests(ctx.fs)
+ if err != nil {
+ return fmt.Errorf("loading manifests for concurrent
snapshot %d: %w", snap.SnapshotID, err)
+ }
+ for _, mf := range manifests {
+ if mf.ManifestContent() != iceberg.ManifestContentData {
+ continue
+ }
+ entries, err := mf.FetchEntries(ctx.fs, false)
+ if err != nil {
+ return fmt.Errorf("reading entries from
manifest %s: %w", mf.FilePath(), err)
+ }
+ for _, e := range entries {
+ if e.Status() != iceberg.EntryStatusADDED ||
e.SnapshotID() != snap.SnapshotID {
+ continue
+ }
+ if _, ok :=
partSet[partitionTupleKey(e.DataFile().Partition())]; ok {
+ return fmt.Errorf("%w: snapshot %d
added data file %s in eq-delete partition",
+ ErrConflictingDataFiles,
snap.SnapshotID, e.DataFile().FilePath())
+ }
+ }
+ }
+ }
+
+ return nil
+}
+
+// partitionTupleKey returns a deterministic string key for a partition
+// tuple map. Keys are sorted by field id so maps with the same content
+// always produce the same string.
+func partitionTupleKey(p map[int]any) string {
+ if len(p) == 0 {
+ return ""
+ }
+ keys := make([]int, 0, len(p))
+ for k := range p {
+ keys = append(keys, k)
+ }
+ sort.Ints(keys)
+ var buf []byte
+ for _, k := range keys {
+ buf = fmt.Appendf(buf, "%d=%v;", k, p[k])
Review Comment:
`fmt.Appendf("%d=%v;", k, p[k])` is pretty fragile as an equality oracle
here. `map[int]any` partition values can land as several Go types depending on
construction path — `uuid.UUID` (= `[16]byte`) vs raw `[16]byte`,
`iceberg.DecimalLiteral` vs `*big.Rat`, `time.Time` with monotonic-clock
suffix, etc., see `convertAvroValueToIcebergType` in `manifest.go:1756`. Same
logical value, different `%v`, missed match. Also `=` and `;` aren't escaped,
so a string value `"1=a;2=b"` collides with a different two-field tuple.
A silent miss here is worse than the bug being fixed — we'd accept a commit
Java would reject. I'd either build an `Or(And(...))` partition expression and
call the existing `validateAddedDataFilesMatchingFilter` (which uses
`iceberg.Literal` comparison and handles all of these), or normalize each known
type to a canonical hashable form before keying.
##########
table/conflict_validation.go:
##########
@@ -426,6 +426,89 @@ func validateNoConflictingDataFiles(ctx *conflictContext,
filter iceberg.Boolean
return validateAddedDataFilesMatchingFilter(ctx, filter)
}
+// validateNoConflictingDataFilesInPartitions is like
+// validateNoConflictingDataFiles but scoped to specific partition
+// tuples derived from equality-delete files. It only flags concurrent
+// data files whose partition tuple matches one of the provided tuples,
+// avoiding false conflicts when a concurrent append lands in a
+// completely different partition.
+//
+// When any provided partition tuple is empty (the table is
+// unpartitioned or the delete file covers all partitions), the check
+// falls back to AlwaysTrue — the equality delete could affect any row.
+//
+// Under IsolationSnapshot this validator is a no-op.
+func validateNoConflictingDataFilesInPartitions(ctx *conflictContext,
eqDeletePartitions []map[int]any, level IsolationLevel) error {
+ if level != IsolationSerializable {
+ return nil
+ }
+
+ if len(ctx.concurrent) == 0 || len(eqDeletePartitions) == 0 {
+ return nil
+ }
+
+ // If any eq-delete file is unpartitioned (empty tuple), the delete
+ // could affect any row — fall back to the conservative AlwaysTrue
check.
+ for _, p := range eqDeletePartitions {
+ if len(p) == 0 {
+ return validateAddedDataFilesMatchingFilter(ctx,
iceberg.AlwaysTrue{})
+ }
+ }
+
+ // Build a set of partition tuple keys for O(1) lookup.
+ partSet := make(map[string]struct{}, len(eqDeletePartitions))
+ for _, p := range eqDeletePartitions {
+ partSet[partitionTupleKey(p)] = struct{}{}
+ }
+
+ for _, snap := range ctx.concurrent {
+ manifests, err := snap.Manifests(ctx.fs)
+ if err != nil {
+ return fmt.Errorf("loading manifests for concurrent
snapshot %d: %w", snap.SnapshotID, err)
+ }
+ for _, mf := range manifests {
+ if mf.ManifestContent() != iceberg.ManifestContentData {
+ continue
+ }
+ entries, err := mf.FetchEntries(ctx.fs, false)
+ if err != nil {
+ return fmt.Errorf("reading entries from
manifest %s: %w", mf.FilePath(), err)
+ }
+ for _, e := range entries {
+ if e.Status() != iceberg.EntryStatusADDED ||
e.SnapshotID() != snap.SnapshotID {
+ continue
+ }
+ if _, ok :=
partSet[partitionTupleKey(e.DataFile().Partition())]; ok {
Review Comment:
The lookup compares only `(field_id, value)` and never consults
`mf.PartitionSpecID()`. After a partition-spec evolution (renamed field,
identity → bucket, day → hour), the same logical row gets written under a
different tuple — eq-delete bound to spec A might be `{1: "hot"}` while
concurrent data under spec B is `{1: 3}` (bucketed) or `{2: "hot"}` (renamed).
Tuples never match, real conflict missed.
The sibling helper at lines 343-434 handles this via
`buildPartitionProjection(specID, ...)` keyed per spec id. I'd at minimum key
on `(specID, tuple)` and use `mf.PartitionSpecID()` plus the eq-delete file's
`SpecID()`. Routing through `validateAddedDataFilesMatchingFilter` with an
OR-of-equalities filter gets it for free.
##########
table/conflict_validation.go:
##########
@@ -426,6 +426,89 @@ func validateNoConflictingDataFiles(ctx *conflictContext,
filter iceberg.Boolean
return validateAddedDataFilesMatchingFilter(ctx, filter)
}
+// validateNoConflictingDataFilesInPartitions is like
+// validateNoConflictingDataFiles but scoped to specific partition
+// tuples derived from equality-delete files. It only flags concurrent
+// data files whose partition tuple matches one of the provided tuples,
+// avoiding false conflicts when a concurrent append lands in a
+// completely different partition.
+//
+// When any provided partition tuple is empty (the table is
+// unpartitioned or the delete file covers all partitions), the check
+// falls back to AlwaysTrue — the equality delete could affect any row.
+//
+// Under IsolationSnapshot this validator is a no-op.
+func validateNoConflictingDataFilesInPartitions(ctx *conflictContext,
eqDeletePartitions []map[int]any, level IsolationLevel) error {
Review Comment:
Bigger-picture: I'd consider folding this into
`validateAddedDataFilesMatchingFilter` rather than adding a sibling. Derive a
`BooleanExpression` from `eqDeletePartitions` like `Or(And(field_a == v_a,
field_b == v_b), ...)` and pass it to `validateNoConflictingDataFiles(ctx,
filter, level)`. That helper already does per-spec projection (spec-evolution
correct), manifest-summary pruning (no full-manifest scan when summaries can't
match), and type-aware partition evaluation via `iceberg.Literal` (no
UUID/decimal/binary issues).
Resolves the comments on `partitionTupleKey` and spec-id awareness at the
same time, and matches the pattern documented in this file's preamble ("there
is one code path that pruning semantics flow through") and Java's
`MergingSnapshotProducer.validateNoNewDataFiles`.
##########
table/partition_conflict_test.go:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// partitionTupleKey
+// ---------------------------------------------------------------------------
+
+func TestPartitionTupleKey_Empty(t *testing.T) {
+ assert.Equal(t, "", partitionTupleKey(map[int]any{}))
+ assert.Equal(t, "", partitionTupleKey(nil))
+}
+
+func TestPartitionTupleKey_SingleField(t *testing.T) {
+ assert.Equal(t, "1=hello;", partitionTupleKey(map[int]any{1: "hello"}))
+}
+
+func TestPartitionTupleKey_MultipleFieldsOrderedByID(t *testing.T) {
+ // Keys must be sorted by field ID, so insertion order must not matter.
+ p1 := map[int]any{1: "a", 3: "c", 2: "b"}
+ p2 := map[int]any{3: "c", 1: "a", 2: "b"}
+ assert.Equal(t, partitionTupleKey(p1), partitionTupleKey(p2),
+ "same content, different insertion order must produce identical
keys")
+ assert.Equal(t, "1=a;2=b;3=c;", partitionTupleKey(p1))
+}
+
+func TestPartitionTupleKey_DifferentValuesProduceDifferentKeys(t *testing.T) {
+ pA := map[int]any{1: "us-east-1"}
+ pB := map[int]any{1: "eu-west-1"}
+ assert.NotEqual(t, partitionTupleKey(pA), partitionTupleKey(pB))
+}
+
+// ---------------------------------------------------------------------------
+// validateNoConflictingDataFilesInPartitions — short-circuit paths
+// ---------------------------------------------------------------------------
+
+// TestValidateNoConflictingDataFilesInPartitions_SnapshotIsolationIsNoOp
+// verifies that the validator is a no-op under snapshot isolation, matching
+// the behaviour of validateNoConflictingDataFiles.
+func TestValidateNoConflictingDataFilesInPartitions_SnapshotIsolationIsNoOp(t
*testing.T) {
+ head := int64(1)
+ meta := newConflictTestMetadata(t, &head)
+ ctx, err := newConflictContext(meta, meta, MainBranch, nil, true)
+ require.NoError(t, err)
+
+ partitions := []map[int]any{{1: "us-east-1"}}
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx,
partitions, IsolationSnapshot))
+}
+
+// TestValidateNoConflictingDataFilesInPartitions_EmptyInputs verifies that
+// the validator short-circuits when there are no concurrent snapshots or no
+// eq-delete partitions, without touching the filesystem.
+func TestValidateNoConflictingDataFilesInPartitions_EmptyInputs(t *testing.T) {
+ head := int64(1)
+ meta := newConflictTestMetadata(t, &head)
+ ctx, err := newConflictContext(meta, meta, MainBranch, nil, true)
+ require.NoError(t, err)
+
+ // No concurrent snapshots — must not read any manifests.
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx,
[]map[int]any{{1: "us-east-1"}}, IsolationSerializable))
+
+ // No eq-delete partitions — nothing to validate.
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx, nil,
IsolationSerializable))
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx,
[]map[int]any{}, IsolationSerializable))
+}
+
+// TestValidateNoConflictingDataFilesInPartitions_UnpartitionedFallsBack
+// verifies that a single unpartitioned eq-delete (empty tuple) causes the
+// validator to fall back to the conservative AlwaysTrue path. Because the
+// conflictContext has no concurrent snapshots (fs = nil, concurrent = nil),
+// the AlwaysTrue fallback itself returns nil — this test just ensures we
+// reach and exercise that code path without panicking.
+func TestValidateNoConflictingDataFilesInPartitions_UnpartitionedFallsBack(t
*testing.T) {
+ head := int64(1)
+ // Build metadata with a concurrent snapshot by using two different
base/current metas.
+ meta := newConflictTestMetadata(t, &head)
+
+ // writerBase has no snapshot; current has head snapshot → head is
concurrent.
+ ctx, err := newConflictContext(meta, meta, MainBranch, nil, true)
+ require.NoError(t, err)
+
+ // Empty partition tuple → unpartitioned eq-delete → must fall back to
AlwaysTrue.
+ // With no concurrent snapshots in ctx the AlwaysTrue path returns nil
(no-op).
+ emptyPartition := map[int]any{}
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx,
[]map[int]any{emptyPartition}, IsolationSerializable))
+}
Review Comment:
The #978 reproducer (`TestBugRepro_RowDeltaFalseConflictDifferentPartition`
from the issue body) is the test that would fail on `main` and pass on this
branch — it's the regression guard for the actual contract of this PR, and it
isn't here.
A few I'd want at minimum:
* the #978 reproducer as-is — different-partition allowed
* same-partition rejected — without this, a future change to
`partitionTupleKey` could silently degrade the validator into a no-op with no
signal
* UUID or decimal partition column, same-partition rejected — would fail
today against `%v` keying, that's the point
* spec evolution — eq-delete bound to spec A, concurrent data under spec B
(renamed field or transform change), will fail today
* a genuinely-unpartitioned table replacing the trivial-pass test above
The harness exists — `conflict_validation_test.go` builds metadata with
concurrent snapshots and `row_delta_test.go` builds eq-delete files. They just
need to be wired together.
##########
table/conflict_validation.go:
##########
@@ -426,6 +426,89 @@ func validateNoConflictingDataFiles(ctx *conflictContext,
filter iceberg.Boolean
return validateAddedDataFilesMatchingFilter(ctx, filter)
}
+// validateNoConflictingDataFilesInPartitions is like
+// validateNoConflictingDataFiles but scoped to specific partition
+// tuples derived from equality-delete files. It only flags concurrent
+// data files whose partition tuple matches one of the provided tuples,
+// avoiding false conflicts when a concurrent append lands in a
+// completely different partition.
+//
+// When any provided partition tuple is empty (the table is
+// unpartitioned or the delete file covers all partitions), the check
+// falls back to AlwaysTrue — the equality delete could affect any row.
+//
+// Under IsolationSnapshot this validator is a no-op.
+func validateNoConflictingDataFilesInPartitions(ctx *conflictContext,
eqDeletePartitions []map[int]any, level IsolationLevel) error {
+ if level != IsolationSerializable {
+ return nil
+ }
+
+ if len(ctx.concurrent) == 0 || len(eqDeletePartitions) == 0 {
+ return nil
+ }
+
+ // If any eq-delete file is unpartitioned (empty tuple), the delete
+ // could affect any row — fall back to the conservative AlwaysTrue
check.
+ for _, p := range eqDeletePartitions {
+ if len(p) == 0 {
Review Comment:
I'd hoist this fallback to the caller. The function name says
"InPartitions", but a single empty input element silently turns it into
AlwaysTrue across the whole table — leaky. Also "empty per-file partition
tuple" is a noisy proxy for "unpartitioned table": an eq-delete file with an
unset partition map on a *partitioned* table (easy to do via
`NewDataFileBuilder` without partition data) silently nullifies the
optimization. The right signal is the spec itself.
Move the decision into `RowDelta.validate`: if the table's only spec is
unpartitioned, call the AlwaysTrue version directly; otherwise the
partition-scoped one. Both paths become explicit.
##########
table/conflict_validation.go:
##########
@@ -426,6 +426,89 @@ func validateNoConflictingDataFiles(ctx *conflictContext,
filter iceberg.Boolean
return validateAddedDataFilesMatchingFilter(ctx, filter)
}
+// validateNoConflictingDataFilesInPartitions is like
+// validateNoConflictingDataFiles but scoped to specific partition
+// tuples derived from equality-delete files. It only flags concurrent
+// data files whose partition tuple matches one of the provided tuples,
+// avoiding false conflicts when a concurrent append lands in a
+// completely different partition.
+//
+// When any provided partition tuple is empty (the table is
+// unpartitioned or the delete file covers all partitions), the check
+// falls back to AlwaysTrue — the equality delete could affect any row.
+//
+// Under IsolationSnapshot this validator is a no-op.
+func validateNoConflictingDataFilesInPartitions(ctx *conflictContext,
eqDeletePartitions []map[int]any, level IsolationLevel) error {
+ if level != IsolationSerializable {
+ return nil
+ }
+
+ if len(ctx.concurrent) == 0 || len(eqDeletePartitions) == 0 {
+ return nil
+ }
+
+ // If any eq-delete file is unpartitioned (empty tuple), the delete
+ // could affect any row — fall back to the conservative AlwaysTrue
check.
+ for _, p := range eqDeletePartitions {
+ if len(p) == 0 {
+ return validateAddedDataFilesMatchingFilter(ctx,
iceberg.AlwaysTrue{})
+ }
+ }
+
+ // Build a set of partition tuple keys for O(1) lookup.
+ partSet := make(map[string]struct{}, len(eqDeletePartitions))
+ for _, p := range eqDeletePartitions {
+ partSet[partitionTupleKey(p)] = struct{}{}
+ }
+
+ for _, snap := range ctx.concurrent {
+ manifests, err := snap.Manifests(ctx.fs)
+ if err != nil {
+ return fmt.Errorf("loading manifests for concurrent
snapshot %d: %w", snap.SnapshotID, err)
+ }
+ for _, mf := range manifests {
+ if mf.ManifestContent() != iceberg.ManifestContentData {
+ continue
+ }
+ entries, err := mf.FetchEntries(ctx.fs, false)
+ if err != nil {
+ return fmt.Errorf("reading entries from
manifest %s: %w", mf.FilePath(), err)
+ }
+ for _, e := range entries {
+ if e.Status() != iceberg.EntryStatusADDED ||
e.SnapshotID() != snap.SnapshotID {
+ continue
+ }
+ if _, ok :=
partSet[partitionTupleKey(e.DataFile().Partition())]; ok {
+ return fmt.Errorf("%w: snapshot %d
added data file %s in eq-delete partition",
Review Comment:
The sibling on line 404 is `"snapshot %d added data file %s matching filter
%s"` — includes the filter that triggered the match. This one says "in
eq-delete partition" without saying which one. With multiple eq-delete files
spanning multiple partitions, an operator triaging this can't tell which one
conflicted without reading manifests by hand.
```go
return fmt.Errorf("%w: snapshot %d added data file %s in partition %v
overlapping eq-delete",
ErrConflictingDataFiles, snap.SnapshotID, e.DataFile().FilePath(),
e.DataFile().Partition())
```
##########
table/partition_conflict_test.go:
##########
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// partitionTupleKey
+// ---------------------------------------------------------------------------
+
+func TestPartitionTupleKey_Empty(t *testing.T) {
+ assert.Equal(t, "", partitionTupleKey(map[int]any{}))
+ assert.Equal(t, "", partitionTupleKey(nil))
+}
+
+func TestPartitionTupleKey_SingleField(t *testing.T) {
+ assert.Equal(t, "1=hello;", partitionTupleKey(map[int]any{1: "hello"}))
+}
+
+func TestPartitionTupleKey_MultipleFieldsOrderedByID(t *testing.T) {
+ // Keys must be sorted by field ID, so insertion order must not matter.
+ p1 := map[int]any{1: "a", 3: "c", 2: "b"}
+ p2 := map[int]any{3: "c", 1: "a", 2: "b"}
+ assert.Equal(t, partitionTupleKey(p1), partitionTupleKey(p2),
+ "same content, different insertion order must produce identical
keys")
+ assert.Equal(t, "1=a;2=b;3=c;", partitionTupleKey(p1))
+}
+
+func TestPartitionTupleKey_DifferentValuesProduceDifferentKeys(t *testing.T) {
+ pA := map[int]any{1: "us-east-1"}
+ pB := map[int]any{1: "eu-west-1"}
+ assert.NotEqual(t, partitionTupleKey(pA), partitionTupleKey(pB))
+}
+
+// ---------------------------------------------------------------------------
+// validateNoConflictingDataFilesInPartitions — short-circuit paths
+// ---------------------------------------------------------------------------
+
+// TestValidateNoConflictingDataFilesInPartitions_SnapshotIsolationIsNoOp
+// verifies that the validator is a no-op under snapshot isolation, matching
+// the behaviour of validateNoConflictingDataFiles.
+func TestValidateNoConflictingDataFilesInPartitions_SnapshotIsolationIsNoOp(t
*testing.T) {
+ head := int64(1)
+ meta := newConflictTestMetadata(t, &head)
+ ctx, err := newConflictContext(meta, meta, MainBranch, nil, true)
+ require.NoError(t, err)
+
+ partitions := []map[int]any{{1: "us-east-1"}}
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx,
partitions, IsolationSnapshot))
+}
+
+// TestValidateNoConflictingDataFilesInPartitions_EmptyInputs verifies that
+// the validator short-circuits when there are no concurrent snapshots or no
+// eq-delete partitions, without touching the filesystem.
+func TestValidateNoConflictingDataFilesInPartitions_EmptyInputs(t *testing.T) {
+ head := int64(1)
+ meta := newConflictTestMetadata(t, &head)
+ ctx, err := newConflictContext(meta, meta, MainBranch, nil, true)
+ require.NoError(t, err)
+
+ // No concurrent snapshots — must not read any manifests.
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx,
[]map[int]any{{1: "us-east-1"}}, IsolationSerializable))
+
+ // No eq-delete partitions — nothing to validate.
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx, nil,
IsolationSerializable))
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx,
[]map[int]any{}, IsolationSerializable))
+}
+
+// TestValidateNoConflictingDataFilesInPartitions_UnpartitionedFallsBack
+// verifies that a single unpartitioned eq-delete (empty tuple) causes the
+// validator to fall back to the conservative AlwaysTrue path. Because the
+// conflictContext has no concurrent snapshots (fs = nil, concurrent = nil),
+// the AlwaysTrue fallback itself returns nil — this test just ensures we
+// reach and exercise that code path without panicking.
+func TestValidateNoConflictingDataFilesInPartitions_UnpartitionedFallsBack(t
*testing.T) {
+ head := int64(1)
+ // Build metadata with a concurrent snapshot by using two different
base/current metas.
+ meta := newConflictTestMetadata(t, &head)
+
+ // writerBase has no snapshot; current has head snapshot → head is
concurrent.
+ ctx, err := newConflictContext(meta, meta, MainBranch, nil, true)
+ require.NoError(t, err)
+
+ // Empty partition tuple → unpartitioned eq-delete → must fall back to
AlwaysTrue.
+ // With no concurrent snapshots in ctx the AlwaysTrue path returns nil
(no-op).
+ emptyPartition := map[int]any{}
+ require.NoError(t, validateNoConflictingDataFilesInPartitions(ctx,
[]map[int]any{emptyPartition}, IsolationSerializable))
Review Comment:
This test doesn't actually exercise the fallback. `newConflictContext(meta,
meta, ...)` produces `ctx.concurrent = []` because base and current point at
the same head, so the validator short-circuits at `len(ctx.concurrent) == 0`
before reaching the empty-tuple loop. The comment in the test admits it: *"With
no concurrent snapshots in ctx the AlwaysTrue path returns nil (no-op)"* — it'd
still pass if we deleted the fallback entirely.
I'd build a context with a real concurrent snapshot (mirror
`TestNewConflictContext_WriterHasNoBranchView` — `base =
newConflictTestMetadata(t, nil)`, `current = newConflictTestMetadata(t,
&head)`), then assert the fallback is reached, either by injecting a fake
`iceio.IO` and observing the manifest fetch, or by an end-to-end fixture where
AlwaysTrue would correctly flag a conflict the partition-scoped check would
miss.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]