liurenjie1024 commented on code in PR #491:
URL: https://github.com/apache/iceberg-rust/pull/491#discussion_r1704049202


##########
crates/iceberg/src/spec/partition.rs:
##########
@@ -117,22 +144,312 @@ pub struct UnboundPartitionField {
 }
 
 /// Unbound partition spec can be built without a schema and later bound to a 
schema.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default, 
Builder)]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default)]
 #[serde(rename_all = "kebab-case")]
-#[builder(setter(prefix = "with"))]
 pub struct UnboundPartitionSpec {
     /// Identifier for PartitionSpec
-    #[builder(default, setter(strip_option))]
     pub spec_id: Option<i32>,
     /// Details of the partition spec
-    #[builder(setter(each(name = "with_unbound_partition_field")))]
     pub fields: Vec<UnboundPartitionField>,
 }
 
 impl UnboundPartitionSpec {
     /// Create unbound partition spec builer
-    pub fn builder() -> UnboundPartitionSpecBuilder {
-        UnboundPartitionSpecBuilder::default()
+    pub fn builder() -> PartitionSpecBuilder {
+        PartitionSpecBuilder::default()
+    }
+}
+
+/// Create valid partition specs for a given schema.
+#[derive(Debug, Default)]
+pub struct PartitionSpecBuilder {
+    spec_id: Option<i32>,
+    last_assigned_field_id: i32,
+    fields: Vec<UnboundPartitionField>,
+}
+
+impl PartitionSpecBuilder {

Review Comment:
   I don't think it's a good idea to mix unbound and bound partition spec 
builder in one struct, which makes  the api confusing.



##########
crates/iceberg/src/spec/partition.rs:
##########
@@ -117,22 +144,312 @@ pub struct UnboundPartitionField {
 }
 
 /// Unbound partition spec can be built without a schema and later bound to a 
schema.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default, 
Builder)]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default)]
 #[serde(rename_all = "kebab-case")]
-#[builder(setter(prefix = "with"))]
 pub struct UnboundPartitionSpec {
     /// Identifier for PartitionSpec
-    #[builder(default, setter(strip_option))]
     pub spec_id: Option<i32>,
     /// Details of the partition spec
-    #[builder(setter(each(name = "with_unbound_partition_field")))]
     pub fields: Vec<UnboundPartitionField>,
 }
 
 impl UnboundPartitionSpec {
     /// Create unbound partition spec builer
-    pub fn builder() -> UnboundPartitionSpecBuilder {
-        UnboundPartitionSpecBuilder::default()
+    pub fn builder() -> PartitionSpecBuilder {
+        PartitionSpecBuilder::default()
+    }
+}
+
+/// Create valid partition specs for a given schema.
+#[derive(Debug, Default)]
+pub struct PartitionSpecBuilder {
+    spec_id: Option<i32>,
+    last_assigned_field_id: i32,
+    fields: Vec<UnboundPartitionField>,
+}
+
+impl PartitionSpecBuilder {
+    pub(crate) const UNPARTITIONED_LAST_ASSIGNED_ID: i32 = 999;
+    // Default partition spec id is only used for building `PartitionSpec`.
+    // When building unbound partition specs, the spec id is not set by 
default.
+    pub(crate) const DEFAULT_PARTITION_SPEC_ID: i32 = 0;
+
+    /// Create a new partition spec builder with the given schema.
+    pub fn new() -> Self {
+        Self {
+            spec_id: None,
+            fields: vec![],
+            last_assigned_field_id: Self::UNPARTITIONED_LAST_ASSIGNED_ID,
+        }
+    }
+
+    /// Set the last assigned field id for the partition spec.
+    /// This is useful when re-binding partition specs.
+    pub fn with_last_assigned_field_id(mut self, last_assigned_field_id: i32) 
-> Self {

Review Comment:
   Sorry, this is confusing to me. Could you elaborate when this method would 
be useful?



##########
crates/iceberg/src/spec/partition.rs:
##########
@@ -117,22 +144,312 @@ pub struct UnboundPartitionField {
 }
 
 /// Unbound partition spec can be built without a schema and later bound to a 
schema.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default, 
Builder)]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default)]
 #[serde(rename_all = "kebab-case")]
-#[builder(setter(prefix = "with"))]
 pub struct UnboundPartitionSpec {
     /// Identifier for PartitionSpec
-    #[builder(default, setter(strip_option))]
     pub spec_id: Option<i32>,
     /// Details of the partition spec
-    #[builder(setter(each(name = "with_unbound_partition_field")))]
     pub fields: Vec<UnboundPartitionField>,
 }
 
 impl UnboundPartitionSpec {
     /// Create unbound partition spec builer
-    pub fn builder() -> UnboundPartitionSpecBuilder {
-        UnboundPartitionSpecBuilder::default()
+    pub fn builder() -> PartitionSpecBuilder {
+        PartitionSpecBuilder::default()
+    }
+}
+
+/// Create valid partition specs for a given schema.
+#[derive(Debug, Default)]
+pub struct PartitionSpecBuilder {
+    spec_id: Option<i32>,
+    last_assigned_field_id: i32,
+    fields: Vec<UnboundPartitionField>,
+}
+
+impl PartitionSpecBuilder {
+    pub(crate) const UNPARTITIONED_LAST_ASSIGNED_ID: i32 = 999;
+    // Default partition spec id is only used for building `PartitionSpec`.
+    // When building unbound partition specs, the spec id is not set by 
default.
+    pub(crate) const DEFAULT_PARTITION_SPEC_ID: i32 = 0;
+
+    /// Create a new partition spec builder with the given schema.
+    pub fn new() -> Self {
+        Self {
+            spec_id: None,
+            fields: vec![],
+            last_assigned_field_id: Self::UNPARTITIONED_LAST_ASSIGNED_ID,
+        }
+    }
+
+    /// Set the last assigned field id for the partition spec.
+    /// This is useful when re-binding partition specs.
+    pub fn with_last_assigned_field_id(mut self, last_assigned_field_id: i32) 
-> Self {
+        self.last_assigned_field_id = last_assigned_field_id;
+        self
+    }
+
+    /// Set the spec id for the partition spec.
+    pub fn with_spec_id(mut self, spec_id: i32) -> Self {
+        self.spec_id = Some(spec_id);
+        self
+    }
+
+    /// Add a new partition field to the partition spec.
+    pub fn with_partition_field(mut self, field: PartitionField) -> 
Result<Self> {
+        self.check_name_set_and_unique(&field.name)?;
+        self.check_for_redundant_partitions(field.source_id, 
&field.transform)?;
+        self.check_partition_id_unique(field.field_id)?;
+
+        self.fields.push(field.into_unbound());
+        Ok(self)
+    }
+
+    /// Add a new partition field to the partition spec.
+    /// Field ID is auto assigned if not set on `build()`.
+    /// On `build_unbound()` the field ID is not auto assigned.
+    pub fn with_unbound_partition_field(mut self, field: 
UnboundPartitionField) -> Result<Self> {
+        self.check_name_set_and_unique(&field.name)?;
+        self.check_for_redundant_partitions(field.source_id, 
&field.transform)?;
+        if let Some(partition_id) = field.partition_id {
+            self.check_partition_id_unique(partition_id)?;
+        }
+
+        self.fields.push(field);
+        Ok(self)
+    }
+
+    /// Add multiple unbound partition fields to the partition spec.
+    /// Field IDs are auto assigned if not set.
+    pub fn with_unbound_fields(
+        self,
+        fields: impl IntoIterator<Item = UnboundPartitionField>,
+    ) -> Result<Self> {
+        let mut builder = self;
+        for field in fields {
+            builder = builder.with_unbound_partition_field(field)?;
+        }
+        Ok(builder)
+    }
+
+    /// Add multiple partition fields to the partition spec.
+    pub fn with_fields(self, fields: impl IntoIterator<Item = PartitionField>) 
-> Result<Self> {
+        let mut builder = self;
+        for field in fields {
+            builder = builder.with_partition_field(field)?;
+        }
+        Ok(builder)
+    }
+
+    /// Build the unbound partition spec.
+    pub fn build_unbound(self) -> UnboundPartitionSpec {
+        UnboundPartitionSpec {
+            spec_id: self.spec_id,
+            fields: self.fields,
+        }
+    }
+
+    /// Build a bound partition spec with the given schema.
+    pub fn build(self, schema: &Schema) -> Result<PartitionSpec> {
+        let fields = Self::set_field_ids(self.fields, 
self.last_assigned_field_id);
+        for field in &fields {
+            Self::check_name_does_not_collide_with_schema(field, schema)?;
+            Self::check_transform_compatibility(field, schema)?;
+        }
+        Ok(PartitionSpec {
+            spec_id: self.spec_id.unwrap_or(Self::DEFAULT_PARTITION_SPEC_ID),
+            fields,
+        })
+    }
+
+    /// Build a partition spec without validating it against a schema.
+    /// This can lead to invalid partition specs. Use with caution.
+    pub fn build_unchecked(self) -> PartitionSpec {

Review Comment:
   Do we need this method for now? I have concerns making such a method public 
without seeing an actual use case.



##########
crates/iceberg/src/spec/partition.rs:
##########
@@ -117,22 +144,312 @@ pub struct UnboundPartitionField {
 }
 
 /// Unbound partition spec can be built without a schema and later bound to a 
schema.
-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default, 
Builder)]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default)]
 #[serde(rename_all = "kebab-case")]
-#[builder(setter(prefix = "with"))]
 pub struct UnboundPartitionSpec {
     /// Identifier for PartitionSpec
-    #[builder(default, setter(strip_option))]
     pub spec_id: Option<i32>,
     /// Details of the partition spec
-    #[builder(setter(each(name = "with_unbound_partition_field")))]
     pub fields: Vec<UnboundPartitionField>,
 }
 
 impl UnboundPartitionSpec {
     /// Create unbound partition spec builer
-    pub fn builder() -> UnboundPartitionSpecBuilder {
-        UnboundPartitionSpecBuilder::default()
+    pub fn builder() -> PartitionSpecBuilder {
+        PartitionSpecBuilder::default()
+    }
+}
+
+/// Create valid partition specs for a given schema.
+#[derive(Debug, Default)]
+pub struct PartitionSpecBuilder {
+    spec_id: Option<i32>,
+    last_assigned_field_id: i32,
+    fields: Vec<UnboundPartitionField>,
+}
+
+impl PartitionSpecBuilder {
+    pub(crate) const UNPARTITIONED_LAST_ASSIGNED_ID: i32 = 999;
+    // Default partition spec id is only used for building `PartitionSpec`.
+    // When building unbound partition specs, the spec id is not set by 
default.
+    pub(crate) const DEFAULT_PARTITION_SPEC_ID: i32 = 0;
+
+    /// Create a new partition spec builder with the given schema.
+    pub fn new() -> Self {
+        Self {
+            spec_id: None,
+            fields: vec![],
+            last_assigned_field_id: Self::UNPARTITIONED_LAST_ASSIGNED_ID,
+        }
+    }
+
+    /// Set the last assigned field id for the partition spec.
+    /// This is useful when re-binding partition specs.
+    pub fn with_last_assigned_field_id(mut self, last_assigned_field_id: i32) 
-> Self {
+        self.last_assigned_field_id = last_assigned_field_id;
+        self
+    }
+
+    /// Set the spec id for the partition spec.
+    pub fn with_spec_id(mut self, spec_id: i32) -> Self {
+        self.spec_id = Some(spec_id);
+        self
+    }
+
+    /// Add a new partition field to the partition spec.
+    pub fn with_partition_field(mut self, field: PartitionField) -> 
Result<Self> {

Review Comment:
   This method doesn't seem quite useful to me. For example, the user need to 
lookup schema for get `source_id`, and they need to guess a `partition_id`? 
Maybe a more useful would be similar to the java version:
   
   ```
   pub fn with_partition_field(mut self, source_name: impl IntoString, 
target_name: impl IntoString, tran: Transform) -> Result<Self> {
    ...
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to