liurenjie1024 commented on code in PR #1625:
URL: https://github.com/apache/iceberg-rust/pull/1625#discussion_r2297706225


##########
crates/iceberg/src/spec/partition.rs:
##########
@@ -176,6 +176,38 @@ impl PartitionSpec {
     }
 }
 
+/// A partition key represents a specific partition in a table, containing the 
partition spec,
+/// schema, and the actual partition values.
+#[derive(Clone, Debug)]
+pub struct PartitionKey {
+    /// The partition spec that contains the partition fields.
+    spec: PartitionSpec,
+    /// The schema to which the partition spec is bound.
+    schema: SchemaRef,
+    /// Partition fields' values in struct.
+    data: Struct,
+}
+
+impl PartitionKey {
+    /// Creates a new partition key with the given spec, schema, and data.
+    pub fn new(spec: PartitionSpec, schema: SchemaRef, data: Struct) -> Self {
+        Self { spec, schema, data }
+    }
+
+    /// Generates a partition path based on the partition values.
+    pub fn to_path(&self) -> String {
+        self.spec.partition_to_path(&self.data, self.schema.clone())
+    }
+}
+
+/// Checks if a partition key is effectively none.
+pub fn partition_key_is_none(partition_key: Option<&PartitionKey>) -> bool {

Review Comment:
   Does this have to be `pub`?



##########
crates/iceberg/src/writer/file_writer/location_generator.rs:
##########
@@ -21,14 +21,24 @@ use std::sync::Arc;
 use std::sync::atomic::AtomicU64;
 
 use crate::Result;
-use crate::spec::{DataFileFormat, TableMetadata};
+use crate::spec::{DataFileFormat, PartitionKey, TableMetadata, 
partition_key_is_none};
 
 /// `LocationGenerator` used to generate the location of data file.
 pub trait LocationGenerator: Clone + Send + 'static {
-    /// Generate an absolute path for the given file name.
-    /// e.g.
-    /// For file name "part-00000.parquet", the generated location maybe 
"/table/data/part-00000.parquet"
-    fn generate_location(&self, file_name: &str) -> String;
+    /// Generate an absolute path for the given file name that includes the 
partition path.
+    ///
+    /// # Arguments
+    ///
+    /// * `partition_key` - The partition key of the file. If None, generate a 
non-partitioned path.
+    /// * `file_name` - The name of the file
+    ///
+    /// # Returns
+    ///
+    /// An absolute path that includes the partition path, e.g.,
+    /// "/table/data/id=1/name=alice/part-00000.parquet"
+    /// or non-partitioned path:
+    /// "/table/data/part-00000.parquet"
+    fn generate_location(&self, partition_key: Option<PartitionKey>, 
file_name: &str) -> String;

Review Comment:
   This doesn't have to be breaking for user, you are providing a new method to 
trait.



##########
crates/iceberg/src/spec/partition.rs:
##########
@@ -176,6 +176,38 @@ impl PartitionSpec {
     }
 }
 
+/// A partition key represents a specific partition in a table, containing the 
partition spec,
+/// schema, and the actual partition values.
+#[derive(Clone, Debug)]
+pub struct PartitionKey {
+    /// The partition spec that contains the partition fields.
+    spec: PartitionSpec,
+    /// The schema to which the partition spec is bound.
+    schema: SchemaRef,
+    /// Partition fields' values in struct.
+    data: Struct,
+}
+
+impl PartitionKey {
+    /// Creates a new partition key with the given spec, schema, and data.
+    pub fn new(spec: PartitionSpec, schema: SchemaRef, data: Struct) -> Self {
+        Self { spec, schema, data }
+    }
+
+    /// Generates a partition path based on the partition values.
+    pub fn to_path(&self) -> String {
+        self.spec.partition_to_path(&self.data, self.schema.clone())
+    }
+}
+
+/// Checks if a partition key is effectively none.
+pub fn partition_key_is_none(partition_key: Option<&PartitionKey>) -> bool {

Review Comment:
   Why not put it in `PartitionKey`?



##########
crates/iceberg/src/writer/file_writer/location_generator.rs:
##########
@@ -217,7 +249,82 @@ pub(crate) mod test {
         let location_generator =
             
super::DefaultLocationGenerator::new(table_metadata.clone()).unwrap();
         let location =
-            
location_generator.generate_location(&file_name_genertaor.generate_file_name());
+            location_generator.generate_location(None, 
&file_name_generator.generate_file_name());
         assert_eq!(location, "s3://data.db/data_3/part-00003-test.parquet");
     }
+
+    #[test]
+    fn test_location_generate_with_partition() {
+        // Create a schema with two fields: id (int) and name (string)
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", 
Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(2, "name", 
Type::Primitive(PrimitiveType::String)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        // Create a partition spec with both fields
+        let partition_spec = PartitionSpec::builder(schema.clone())
+            .add_partition_field("id", "id", Transform::Identity)
+            .unwrap()
+            .add_partition_field("name", "name", Transform::Identity)
+            .unwrap()
+            .build()
+            .unwrap();
+
+        // Create partition data with values
+        let partition_data =
+            Struct::from_iter([Some(Literal::int(42)), 
Some(Literal::string("alice"))]);
+
+        // Create a partition key
+        let partition_key = PartitionKey::new(partition_spec, schema, 
partition_data);
+
+        // Test with MockLocationGenerator
+        let mock_location_gen = 
MockLocationGenerator::new("/base/path".to_string());
+        let file_name = "data-00000.parquet";
+        let location = 
mock_location_gen.generate_location(Some(partition_key.clone()), file_name);
+        assert_eq!(
+            location,
+            "/base/path/id=42/name=\"alice\"/data-00000.parquet"
+        );
+
+        // Create a table metadata for DefaultLocationGenerator
+        let table_metadata = TableMetadata {
+            format_version: FormatVersion::V2,
+            table_uuid: 
Uuid::parse_str("fb072c92-a02b-11e9-ae9c-1bb7bc9eca94").unwrap(),
+            location: "s3://data.db/table".to_string(),
+            last_updated_ms: 1515100955770,
+            last_column_id: 2,
+            schemas: HashMap::new(),
+            current_schema_id: 1,
+            partition_specs: HashMap::new(),
+            default_spec: PartitionSpec::unpartition_spec().into(),
+            default_partition_type: StructType::new(vec![]),
+            last_partition_id: 1000,
+            default_sort_order_id: 0,
+            sort_orders: HashMap::from_iter(vec![]),
+            snapshots: HashMap::default(),
+            current_snapshot_id: None,
+            last_sequence_number: 1,
+            properties: HashMap::new(),
+            snapshot_log: Vec::new(),
+            metadata_log: vec![],
+            refs: HashMap::new(),
+            statistics: HashMap::new(),
+            partition_statistics: HashMap::new(),
+            encryption_keys: HashMap::new(),
+        };
+
+        // Test with DefaultLocationGenerator
+        let default_location_gen = 
super::DefaultLocationGenerator::new(table_metadata).unwrap();
+        let location = 
default_location_gen.generate_location(Some(partition_key), file_name);
+        assert_eq!(
+            location,
+            "s3://data.db/table/data/id=42/name=\"alice\"/data-00000.parquet"

Review Comment:
   THis is incorrect, it should be url encoded.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to