zeroshade commented on code in PR #495:
URL: https://github.com/apache/iceberg-go/pull/495#discussion_r2231536234
##########
table/metadata.go:
##########
@@ -257,21 +261,32 @@ func (b *MetadataBuilder) currentSnapshot() *Snapshot {
return s
}
-func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema, newLastColumnID
int, initial bool) (*MetadataBuilder, error) {
+func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema) (*MetadataBuilder,
error) {
+ newLastColumnID := schema.HighestFieldID()
if newLastColumnID < b.lastColumnId {
return nil, fmt.Errorf("%w: newLastColumnID %d, must be >= %d",
iceberg.ErrInvalidArgument, newLastColumnID, b.lastColumnId)
}
- var schemas []*iceberg.Schema
- if initial {
- schemas = []*iceberg.Schema{schema}
- } else {
- schemas = append(b.schemaList, schema)
+ newSchemaID := b.reuseOrCreateNewSchemaID(schema)
+ _, err := b.GetSchemaByID(newSchemaID)
+ schemaFound := err == nil
+
+ if schemaFound {
Review Comment:
Just do `if _, err := b.GetSchemaByID(newSchemaID); err == nil {` instead
##########
table/metadata.go:
##########
@@ -149,7 +149,7 @@ type MetadataBuilder struct {
schemaList []*iceberg.Schema
currentSchemaID int
specs []iceberg.PartitionSpec
- defaultSpecID int
+ defaultSpecID *int
Review Comment:
why a pointer?
##########
table/metadata.go:
##########
@@ -610,7 +621,12 @@ func (b *MetadataBuilder) SetLastUpdatedMS()
*MetadataBuilder {
return b
}
-func (b *MetadataBuilder) buildCommonMetadata() *commonMetadata {
+func (b *MetadataBuilder) buildCommonMetadata() (*commonMetadata, error) {
+ if b.defaultSpecID == nil {
+ return nil, errors.New("no default spec ID specified")
+ }
Review Comment:
doesn't the iceberg spec say that the default specID is just 0?
##########
schema.go:
##########
@@ -78,6 +78,10 @@ func NewSchemaWithIdentifiers(id int, identifierIDs []int,
fields ...NestedField
return s
}
+func (s *Schema) WithID(id int) {
+ s.ID = id
+}
Review Comment:
because the ID is a public field, we don't need this method unless the
intent is that it would return a *copy* of the schema but with the new id.
A user can just do `s.ID = id` themselves rather than calling `s.WithID(id)`
##########
table/updates.go:
##########
@@ -166,25 +166,21 @@ func (u *upgradeFormatVersionUpdate) Apply(builder
*MetadataBuilder) error {
type addSchemaUpdate struct {
baseUpdate
- Schema *iceberg.Schema `json:"schema"`
- LastColumnID int `json:"last-column-id"`
- initial bool
+ Schema *iceberg.Schema `json:"schema"`
+ initial bool
}
// NewAddSchemaUpdate creates a new update that adds the given schema and last
column ID to
-// the table metadata. If the initial flag is set to true, the schema is
considered the initial
-// schema of the table, and all previously added schemas in the metadata
builder are removed.
-func NewAddSchemaUpdate(schema *iceberg.Schema, lastColumnID int, initial
bool) Update {
+// the table metadata.
+func NewAddSchemaUpdate(schema *iceberg.Schema) *addSchemaUpdate {
return &addSchemaUpdate{
- baseUpdate: baseUpdate{ActionName: UpdateAddSchema},
- Schema: schema,
- LastColumnID: lastColumnID,
- initial: initial,
+ baseUpdate: baseUpdate{ActionName: UpdateAddSchema},
+ Schema: schema,
}
}
Review Comment:
don't we still need the last column ID?
##########
table/metadata.go:
##########
@@ -257,21 +261,32 @@ func (b *MetadataBuilder) currentSnapshot() *Snapshot {
return s
}
-func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema, newLastColumnID
int, initial bool) (*MetadataBuilder, error) {
+func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema) (*MetadataBuilder,
error) {
+ newLastColumnID := schema.HighestFieldID()
if newLastColumnID < b.lastColumnId {
return nil, fmt.Errorf("%w: newLastColumnID %d, must be >= %d",
iceberg.ErrInvalidArgument, newLastColumnID, b.lastColumnId)
}
- var schemas []*iceberg.Schema
- if initial {
- schemas = []*iceberg.Schema{schema}
- } else {
- schemas = append(b.schemaList, schema)
+ newSchemaID := b.reuseOrCreateNewSchemaID(schema)
+ _, err := b.GetSchemaByID(newSchemaID)
+ schemaFound := err == nil
+
+ if schemaFound {
+ if b.lastAddedSchemaID == nil || *b.lastAddedSchemaID !=
newSchemaID {
+ b.updates = append(b.updates,
NewAddSchemaUpdate(schema))
+ b.lastAddedSchemaID = &newSchemaID
+ }
+
+ return b, nil
}
- b.lastColumnId = newLastColumnID
- b.schemaList = schemas
- b.updates = append(b.updates, NewAddSchemaUpdate(schema,
newLastColumnID, initial))
+ b.lastColumnId = max(b.lastColumnId, schema.HighestFieldID())
+
+ schema.WithID(newSchemaID)
Review Comment:
`schema.ID = newSchemaID` no need for the `WithID` method
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]