xixipi-lining commented on code in PR #431:
URL: https://github.com/apache/iceberg-go/pull/431#discussion_r2238324514


##########
table/update_schema.go:
##########
@@ -0,0 +1,834 @@
+package table
+
+import (
+       "encoding/json"
+       "errors"
+       "fmt"
+       "strings"
+
+       "github.com/apache/iceberg-go"
+)
+
+// UpdateSchema accumulates operations and validates on Build/Commit
+type UpdateSchema struct {
+       txn          *Transaction
+       schema       *iceberg.Schema
+       lastColumnID int
+
+       operations []SchemaOperation
+       errors     []error // Collect validation errors
+
+       allowIncompatibleChanges bool
+       identifierFields         map[string]struct{}
+       caseSensitive            bool
+}
+
+// Operation represents a deferred schema operation
+type SchemaOperation interface {
+       Apply(builder *schemaBuilder) error
+       Validate(schema *iceberg.Schema, settings *validationSettings) error
+       String() string
+}
+
+// Validation settings passed to operations
+type validationSettings struct {
+       allowIncompatibleChanges bool
+       caseSensitive            bool
+}
+
+type schemaBuilder struct {
+       deletes      map[int]struct{}
+       updates      map[int]*iceberg.NestedField
+       adds         map[int][]*iceberg.NestedField
+       moves        map[int][]moveReq
+       lastColumnID int
+       baseSchema   *iceberg.Schema
+       settings     *validationSettings
+}
+
+// Operation Types
+// Each schema update operation is represented by a struct that implements the 
SchemaOperation interface
+// and has a Validate and Apply method.
+// The Validate method validates the operation and returns an error if the 
operation is invalid.
+// The Apply method applies the operation to the schema builder.
+type addColumnOp struct {
+       path           []string
+       required       bool
+       dataType       iceberg.Type
+       doc            string
+       initialDefault any
+}
+
+type updateColumnOp struct {
+       path    []string
+       updates ColumnUpdate
+}
+
+type deleteColumnOp struct {
+       path []string
+}
+
+type moveColumnOp struct {
+       columnToMove    []string
+       referenceColumn []string
+       op              moveOp
+}
+
+type ColumnUpdate struct {
+       Type     iceberg.Optional[iceberg.Type] // nil means no change
+       Doc      iceberg.Optional[string]       // nil means no change
+       Default  any                            // nil means no change
+       Required iceberg.Optional[bool]         // nil means no change
+}
+
+// Move operation
+type moveOp string
+
+const (
+       OpFirst  moveOp = "first"
+       OpBefore moveOp = "before"
+       OpAfter  moveOp = "after"
+)
+
+type moveReq struct {
+       fieldID      int
+       otherFieldID int // 0 when opFirst
+       op           moveOp
+}
+
+func NewUpdateSchema(txn *Transaction, s *iceberg.Schema, lastColumnID int) 
*UpdateSchema {
+       return &UpdateSchema{
+               txn:                      txn,
+               schema:                   s,
+               lastColumnID:             lastColumnID,
+               operations:               make([]SchemaOperation, 0),
+               errors:                   make([]error, 0),
+               allowIncompatibleChanges: false,
+               identifierFields:         make(map[string]struct{}),
+               caseSensitive:            true,
+       }
+}
+
+// AllowIncompatibleChanges permits incompatible schema changes
+func (us *UpdateSchema) AllowIncompatibleChanges() *UpdateSchema {
+       us.allowIncompatibleChanges = true
+       return us
+}
+
+// SetCaseSensitive controls case sensitivity for field lookups
+func (us *UpdateSchema) SetCaseSensitive(caseSensitive bool) *UpdateSchema {
+       us.caseSensitive = caseSensitive
+       return us
+}
+
+// AddColumn queues an add column operation - validation deferred until Build 
is called
+func (us *UpdateSchema) AddColumn(path []string, required bool, dataType 
iceberg.Type, doc string, initialDefault any) *UpdateSchema {
+       op := &addColumnOp{
+               path:           path,
+               required:       required,
+               dataType:       dataType,
+               doc:            doc,
+               initialDefault: initialDefault,
+       }
+       us.operations = append(us.operations, op)
+       return us
+}
+
+// UpdateColumn queues an update column operation - validation deferred until 
Build is called
+func (us *UpdateSchema) UpdateColumn(path []string, updates ColumnUpdate) 
*UpdateSchema {
+       op := &updateColumnOp{
+               path:    path,
+               updates: updates,
+       }
+       us.operations = append(us.operations, op)
+       return us
+}
+
+// DeleteColumn queues a delete column operation - validation deferred until 
Build is called
+func (us *UpdateSchema) DeleteColumn(path []string) *UpdateSchema {
+       op := &deleteColumnOp{
+               path: path,
+       }
+       us.operations = append(us.operations, op)
+       return us
+}
+
+// Move queues a move column operation - validation deferred until Build is 
called
+func (us *UpdateSchema) Move(columnToMove, referenceColumn []string, op 
moveOp) *UpdateSchema {
+       moveOp := &moveColumnOp{
+               columnToMove:    columnToMove,
+               referenceColumn: referenceColumn,
+               op:              op,
+       }
+       us.operations = append(us.operations, moveOp)
+       return us
+}
+
+/// Operation Methods
+
+// Reset clears all queued operations and errors - validation deferred until 
Build is called
+func (us *UpdateSchema) Reset() *UpdateSchema {
+       us.operations = us.operations[:0]
+       us.errors = us.errors[:0]
+       return us
+}
+
+// RemoveLastOperation removes the most recently added operation
+func (us *UpdateSchema) RemoveLastOperation() *UpdateSchema {

Review Comment:
   I'm a little confused — when exactly do we need to use the Reset and 
RemoveLastOperation methods? Exposing the internal op queues doesn't seem to 
offer much benefit. Maybe a simpler interface would be sufficient?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to