kevinjqliu commented on code in PR #332: URL: https://github.com/apache/iceberg-go/pull/332#discussion_r1997684054
########## manifest.go: ########## @@ -146,25 +158,66 @@ type fallbackManifestFileV1 struct { AddedSnapshotID *int64 `avro:"added_snapshot_id"` } -func (f *fallbackManifestFileV1) toManifest() *manifestFileV1 { - f.manifestFileV1.AddedSnapshotID = *f.AddedSnapshotID +func (f *fallbackManifestFileV1) toFile() *manifestFile { + if f.AddedSnapshotID == nil { + f.manifestFileV1.AddedSnapshotID = -1 + } - return &f.manifestFileV1 + return f.manifestFileV1.toFile() } type manifestFileV1 struct { - Path string `avro:"manifest_path"` - Len int64 `avro:"manifest_length"` - SpecID int32 `avro:"partition_spec_id"` - AddedSnapshotID int64 `avro:"added_snapshot_id"` - AddedFilesCount *int32 `avro:"added_files_count"` - ExistingFilesCount *int32 `avro:"existing_files_count"` - DeletedFilesCount *int32 `avro:"deleted_files_count"` - AddedRowsCount *int64 `avro:"added_rows_count"` - ExistingRowsCount *int64 `avro:"existing_rows_count"` - DeletedRowsCount *int64 `avro:"deleted_rows_count"` - PartitionList *[]FieldSummary `avro:"partitions"` - Key []byte `avro:"key_metadata"` + manifestFile + AddedFilesCount *int32 `avro:"added_files_count"` + ExistingFilesCount *int32 `avro:"existing_files_count"` + DeletedFilesCount *int32 `avro:"deleted_files_count"` + AddedRowsCount *int64 `avro:"added_rows_count"` + ExistingRowsCount *int64 `avro:"existing_rows_count"` + DeletedRowsCount *int64 `avro:"deleted_rows_count"` +} + +func (m *manifestFileV1) toFile() *manifestFile { + m.manifestFile.version = 1 + m.Content = ManifestContentData + m.SeqNumber, m.MinSeqNumber = 0, 0 Review Comment: should this be 0 or -1? 0 should represent the INITIAL_SEQUENCE_NUMBER -1 should represent unassigned ########## manifest.go: ########## @@ -593,42 +624,29 @@ func ReadManifestList(in io.Reader) ([]ManifestFile, error) { return nil, err } - var fallbackAddedSnapshot bool - for _, f := range sc.(*avro.RecordSchema).Fields() { - if f.Name() == "added_snapshot_id" { - if f.Type().Type() == avro.Union { - fallbackAddedSnapshot = true - } - - break - } + version, err := strconv.Atoi(string(dec.Metadata()["format-version"])) + if err != nil { + return nil, fmt.Errorf("invalid format-version: %w", err) } - out := make([]ManifestFile, 0) - for dec.HasNext() { - var file ManifestFile - if string(dec.Metadata()["format-version"]) == "2" { - file = &manifestFileV2{} - } else { - if fallbackAddedSnapshot { - file = &fallbackManifestFileV1{} - } else { - file = &manifestFileV1{} - } - } - - if err := dec.Decode(file); err != nil { - return nil, err - } + if version == 1 { + for _, f := range sc.(*avro.RecordSchema).Fields() { + if f.Name() == "added_snapshot_id" { + if f.Type().Type() == avro.Union { + return decodeManifestsWithFallback[*fallbackManifestFileV1](dec) + } Review Comment: is there a corresponding test for this part? ########## manifest.go: ########## @@ -146,25 +158,66 @@ type fallbackManifestFileV1 struct { AddedSnapshotID *int64 `avro:"added_snapshot_id"` } -func (f *fallbackManifestFileV1) toManifest() *manifestFileV1 { - f.manifestFileV1.AddedSnapshotID = *f.AddedSnapshotID +func (f *fallbackManifestFileV1) toFile() *manifestFile { + if f.AddedSnapshotID == nil { + f.manifestFileV1.AddedSnapshotID = -1 + } - return &f.manifestFileV1 + return f.manifestFileV1.toFile() } type manifestFileV1 struct { - Path string `avro:"manifest_path"` - Len int64 `avro:"manifest_length"` - SpecID int32 `avro:"partition_spec_id"` - AddedSnapshotID int64 `avro:"added_snapshot_id"` - AddedFilesCount *int32 `avro:"added_files_count"` - ExistingFilesCount *int32 `avro:"existing_files_count"` - DeletedFilesCount *int32 `avro:"deleted_files_count"` - AddedRowsCount *int64 `avro:"added_rows_count"` - ExistingRowsCount *int64 `avro:"existing_rows_count"` - DeletedRowsCount *int64 `avro:"deleted_rows_count"` - PartitionList *[]FieldSummary `avro:"partitions"` - Key []byte `avro:"key_metadata"` + manifestFile + AddedFilesCount *int32 `avro:"added_files_count"` + ExistingFilesCount *int32 `avro:"existing_files_count"` + DeletedFilesCount *int32 `avro:"deleted_files_count"` + AddedRowsCount *int64 `avro:"added_rows_count"` + ExistingRowsCount *int64 `avro:"existing_rows_count"` + DeletedRowsCount *int64 `avro:"deleted_rows_count"` +} + +func (m *manifestFileV1) toFile() *manifestFile { + m.manifestFile.version = 1 + m.Content = ManifestContentData + m.SeqNumber, m.MinSeqNumber = 0, 0 Review Comment: In `NewManifestFile`, V1 table's seqNum is null -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org