a-agmon commented on issue #338: URL: https://github.com/apache/iceberg-rust/issues/338#issuecomment-2092089365
> Can you share the metadata JSON? I don't think the field ID resolution is being applied, described in issue #353. `added_data_files_count` is the old name since in V2 it also included delete files. The name should not matter and should be corrected as in #354. Thanks @Fokko and @zeodtr Here is the metadata.json (I had to anonymize a few things). It was generated by Trino (via its DBT connector). The referenced ManifestList follows ``` { "format-version" : 2, "table-uuid" : "aa3b9ef5-c067-4a08-8e9e-8a061d6c64e1", "location" : "s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8", "last-sequence-number" : 1, "last-updated-ms" : 1714657784414, "last-column-id" : 12, "current-schema-id" : 0, "schemas" : [ { "type" : "struct", "schema-id" : 0, "fields" : [ { "id" : 1, "name" : "row_id", "required" : false, "type" : "string" }, { "id" : 2, "name" : "dt", "required" : false, "type" : "date" }, { "id" : 3, "name" : "sub_acc_id", "required" : false, "type" : "string" }, { "id" : 4, "name" : "master_acc_id", "required" : false, "type" : "string" } ........... shortened , { "id" : 12, "name" : "total_quantity", "required" : false, "type" : "long" } ] } ], "default-spec-id" : 0, "partition-specs" : [ { "spec-id" : 0, "fields" : [ { "name" : "dt_month", "transform" : "month", "source-id" : 2, "field-id" : 1000 } ] } ], "last-partition-id" : 1000, "default-sort-order-id" : 0, "sort-orders" : [ { "order-id" : 0, "fields" : [ ] } ], "properties" : { "write.format.default" : "PARQUET", "write.parquet.compression-codec" : "zstd" }, "current-snapshot-id" : 6685531058123427778, "refs" : { "main" : { "snapshot-id" : 6685531058123427778, "type" : "branch" } }, "snapshots" : [ { "sequence-number" : 1, "snapshot-id" : 6685531058123427778, "timestamp-ms" : 1714657783993, "summary" : { "operation" : "append", "trino_query_id" : "20240502_134523_18961_n5bhn", "added-data-files" : "503", "added-records" : "176720085", "added-files-size" : "3221301527", "changed-partition-count" : "32", "total-records" : "176720085", "total-files-size" : "3221301527", "total-data-files" : "503", "total-delete-files" : "0", "total-position-deletes" : "0", "total-equality-deletes" : "0" }, "manifest-list" : "s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/snap-6685531058123427778-1-2584bfc0-abb3-4257-a61c-0498a8e29dc4.avro", "schema-id" : 0 } ], "statistics" : [ { "snapshot-id" : 6685531058123427778, "statistics-path" : "s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/20240502_134523_18961_n5bhn-748b8bf4-ef54-4882-8d7f-2f6b9edbe85b.stats", "file-size-in-bytes" : 294179, "file-footer-size-in-bytes" : 2418, "blob-metadata" : [ { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 1 ], "properties" : { "ndv" : "172900302" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 2 ], "properties" : { "ndv" : "944" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 3 ], "properties" : { "ndv" : "47786" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 4 ], "properties" : { "ndv" : "47513" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 5 ], "properties" : { "ndv" : "195336" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 6 ], "properties" : { "ndv" : "171901" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 7 ], "properties" : { "ndv" : "7237440" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 8 ], "properties" : { "ndv" : "4" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 9 ], "properties" : { "ndv" : "42995" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 10 ], "properties" : { "ndv" : "47504" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 11 ], "properties" : { "ndv" : "10" } }, { "type" : "apache-datasketches-theta-v1", "snapshot-id" : 6685531058123427778, "sequence-number" : 1, "fields" : [ 12 ], "properties" : { "ndv" : "583133" } } ] } ], "partition-statistics" : [ ], "snapshot-log" : [ { "timestamp-ms" : 1714657783993, "snapshot-id" : 6685531058123427778 } ], "metadata-log" : [ { "timestamp-ms" : 1714657783993, "metadata-file" : "s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/00000-52cc6f6a-259c-4ca8-ba89-cd3a99b9eedb.metadata.json" } ] } ``` The referenced manifest ``` { "manifest_path":"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/2584bfc0-abb3-4257-a61c-0498a8e29dc4-m0.avro", "manifest_length":98246, "partition_spec_id":0, "content":0, "sequence_number":1, "min_sequence_number":1, "added_snapshot_id":6685531058123427778, "added_files_count":503, "existing_files_count":0, "deleted_files_count":0, "added_rows_count":176720085, "existing_rows_count":0, "deleted_rows_count":0, "partitions":{ "array":[ { "contains_null":false, "contains_nan":{ "boolean":false }, "lower_bound":{ "bytes":"m\u0002\u0000\u0000" }, "upper_bound":{ "bytes":"\u0002\u0000\u0000" } } ] } } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org