zeroshade commented on code in PR #495:
URL: https://github.com/apache/iceberg-go/pull/495#discussion_r2231536234


##########
table/metadata.go:
##########
@@ -257,21 +261,32 @@ func (b *MetadataBuilder) currentSnapshot() *Snapshot {
        return s
 }
 
-func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema, newLastColumnID 
int, initial bool) (*MetadataBuilder, error) {
+func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema) (*MetadataBuilder, 
error) {
+       newLastColumnID := schema.HighestFieldID()
        if newLastColumnID < b.lastColumnId {
                return nil, fmt.Errorf("%w: newLastColumnID %d, must be >= %d", 
iceberg.ErrInvalidArgument, newLastColumnID, b.lastColumnId)
        }
 
-       var schemas []*iceberg.Schema
-       if initial {
-               schemas = []*iceberg.Schema{schema}
-       } else {
-               schemas = append(b.schemaList, schema)
+       newSchemaID := b.reuseOrCreateNewSchemaID(schema)
+       _, err := b.GetSchemaByID(newSchemaID)
+       schemaFound := err == nil
+
+       if schemaFound {

Review Comment:
   Just do `if _, err := b.GetSchemaByID(newSchemaID); err == nil {` instead



##########
table/metadata.go:
##########
@@ -149,7 +149,7 @@ type MetadataBuilder struct {
        schemaList         []*iceberg.Schema
        currentSchemaID    int
        specs              []iceberg.PartitionSpec
-       defaultSpecID      int
+       defaultSpecID      *int

Review Comment:
   why a pointer?



##########
table/metadata.go:
##########
@@ -610,7 +621,12 @@ func (b *MetadataBuilder) SetLastUpdatedMS() 
*MetadataBuilder {
        return b
 }
 
-func (b *MetadataBuilder) buildCommonMetadata() *commonMetadata {
+func (b *MetadataBuilder) buildCommonMetadata() (*commonMetadata, error) {
+       if b.defaultSpecID == nil {
+               return nil, errors.New("no default spec ID specified")
+       }

Review Comment:
   doesn't the iceberg spec say that the default specID is just 0?



##########
schema.go:
##########
@@ -78,6 +78,10 @@ func NewSchemaWithIdentifiers(id int, identifierIDs []int, 
fields ...NestedField
        return s
 }
 
+func (s *Schema) WithID(id int) {
+       s.ID = id
+}

Review Comment:
   because the ID is a public field, we don't need this method unless the 
intent is that it would return a *copy* of the schema but with the new id.
   
   A user can just do `s.ID = id` themselves rather than calling `s.WithID(id)`



##########
table/updates.go:
##########
@@ -166,25 +166,21 @@ func (u *upgradeFormatVersionUpdate) Apply(builder 
*MetadataBuilder) error {
 
 type addSchemaUpdate struct {
        baseUpdate
-       Schema       *iceberg.Schema `json:"schema"`
-       LastColumnID int             `json:"last-column-id"`
-       initial      bool
+       Schema  *iceberg.Schema `json:"schema"`
+       initial bool
 }
 
 // NewAddSchemaUpdate creates a new update that adds the given schema and last 
column ID to
-// the table metadata. If the initial flag is set to true, the schema is 
considered the initial
-// schema of the table, and all previously added schemas in the metadata 
builder are removed.
-func NewAddSchemaUpdate(schema *iceberg.Schema, lastColumnID int, initial 
bool) Update {
+// the table metadata.
+func NewAddSchemaUpdate(schema *iceberg.Schema) *addSchemaUpdate {
        return &addSchemaUpdate{
-               baseUpdate:   baseUpdate{ActionName: UpdateAddSchema},
-               Schema:       schema,
-               LastColumnID: lastColumnID,
-               initial:      initial,
+               baseUpdate: baseUpdate{ActionName: UpdateAddSchema},
+               Schema:     schema,
        }
 }

Review Comment:
   don't we still need the last column ID?



##########
table/metadata.go:
##########
@@ -257,21 +261,32 @@ func (b *MetadataBuilder) currentSnapshot() *Snapshot {
        return s
 }
 
-func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema, newLastColumnID 
int, initial bool) (*MetadataBuilder, error) {
+func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema) (*MetadataBuilder, 
error) {
+       newLastColumnID := schema.HighestFieldID()
        if newLastColumnID < b.lastColumnId {
                return nil, fmt.Errorf("%w: newLastColumnID %d, must be >= %d", 
iceberg.ErrInvalidArgument, newLastColumnID, b.lastColumnId)
        }
 
-       var schemas []*iceberg.Schema
-       if initial {
-               schemas = []*iceberg.Schema{schema}
-       } else {
-               schemas = append(b.schemaList, schema)
+       newSchemaID := b.reuseOrCreateNewSchemaID(schema)
+       _, err := b.GetSchemaByID(newSchemaID)
+       schemaFound := err == nil
+
+       if schemaFound {
+               if b.lastAddedSchemaID == nil || *b.lastAddedSchemaID != 
newSchemaID {
+                       b.updates = append(b.updates, 
NewAddSchemaUpdate(schema))
+                       b.lastAddedSchemaID = &newSchemaID
+               }
+
+               return b, nil
        }
 
-       b.lastColumnId = newLastColumnID
-       b.schemaList = schemas
-       b.updates = append(b.updates, NewAddSchemaUpdate(schema, 
newLastColumnID, initial))
+       b.lastColumnId = max(b.lastColumnId, schema.HighestFieldID())
+
+       schema.WithID(newSchemaID)

Review Comment:
   `schema.ID = newSchemaID` no need for the `WithID` method



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to