a-agmon commented on issue #338:
URL: https://github.com/apache/iceberg-rust/issues/338#issuecomment-2092089365

   > Can you share the metadata JSON? I don't think the field ID resolution is 
being applied, described in issue #353. `added_data_files_count` is the old 
name since in V2 it also included delete files. The name should not matter and 
should be corrected as in #354.
   
   Thanks @Fokko and @zeodtr 
   Here is the metadata.json (I had to anonymize a few things). 
   It was generated by Trino (via its DBT connector). The referenced 
ManifestList follows
   
   ```
   {
     "format-version" : 2,
     "table-uuid" : "aa3b9ef5-c067-4a08-8e9e-8a061d6c64e1",
     "location" : 
"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8",
     "last-sequence-number" : 1,
     "last-updated-ms" : 1714657784414,
     "last-column-id" : 12,
     "current-schema-id" : 0,
     "schemas" : [ {
       "type" : "struct",
       "schema-id" : 0,
       "fields" : [ {
         "id" : 1,
         "name" : "row_id",
         "required" : false,
         "type" : "string"
       }, {
         "id" : 2,
         "name" : "dt",
         "required" : false,
         "type" : "date"
       }, {
         "id" : 3,
         "name" : "sub_acc_id",
         "required" : false,
         "type" : "string"
       }, {
         "id" : 4,
         "name" : "master_acc_id",
         "required" : false,
         "type" : "string"
       }
       ........... shortened 
       , {
         "id" : 12,
         "name" : "total_quantity",
         "required" : false,
         "type" : "long"
       } ]
     } ],
     "default-spec-id" : 0,
     "partition-specs" : [ {
       "spec-id" : 0,
       "fields" : [ {
         "name" : "dt_month",
         "transform" : "month",
         "source-id" : 2,
         "field-id" : 1000
       } ]
     } ],
     "last-partition-id" : 1000,
     "default-sort-order-id" : 0,
     "sort-orders" : [ {
       "order-id" : 0,
       "fields" : [ ]
     } ],
     "properties" : {
       "write.format.default" : "PARQUET",
       "write.parquet.compression-codec" : "zstd"
     },
     "current-snapshot-id" : 6685531058123427778,
     "refs" : {
       "main" : {
         "snapshot-id" : 6685531058123427778,
         "type" : "branch"
       }
     },
     "snapshots" : [ {
       "sequence-number" : 1,
       "snapshot-id" : 6685531058123427778,
       "timestamp-ms" : 1714657783993,
       "summary" : {
         "operation" : "append",
         "trino_query_id" : "20240502_134523_18961_n5bhn",
         "added-data-files" : "503",
         "added-records" : "176720085",
         "added-files-size" : "3221301527",
         "changed-partition-count" : "32",
         "total-records" : "176720085",
         "total-files-size" : "3221301527",
         "total-data-files" : "503",
         "total-delete-files" : "0",
         "total-position-deletes" : "0",
         "total-equality-deletes" : "0"
       },
       "manifest-list" : 
"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/snap-6685531058123427778-1-2584bfc0-abb3-4257-a61c-0498a8e29dc4.avro",
       "schema-id" : 0
     } ],
     "statistics" : [ {
       "snapshot-id" : 6685531058123427778,
       "statistics-path" : 
"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/20240502_134523_18961_n5bhn-748b8bf4-ef54-4882-8d7f-2f6b9edbe85b.stats",
       "file-size-in-bytes" : 294179,
       "file-footer-size-in-bytes" : 2418,
       "blob-metadata" : [ {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 1 ],
         "properties" : {
           "ndv" : "172900302"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 2 ],
         "properties" : {
           "ndv" : "944"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 3 ],
         "properties" : {
           "ndv" : "47786"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 4 ],
         "properties" : {
           "ndv" : "47513"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 5 ],
         "properties" : {
           "ndv" : "195336"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 6 ],
         "properties" : {
           "ndv" : "171901"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 7 ],
         "properties" : {
           "ndv" : "7237440"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 8 ],
         "properties" : {
           "ndv" : "4"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 9 ],
         "properties" : {
           "ndv" : "42995"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 10 ],
         "properties" : {
           "ndv" : "47504"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 11 ],
         "properties" : {
           "ndv" : "10"
         }
       }, {
         "type" : "apache-datasketches-theta-v1",
         "snapshot-id" : 6685531058123427778,
         "sequence-number" : 1,
         "fields" : [ 12 ],
         "properties" : {
           "ndv" : "583133"
         }
       } ]
     } ],
     "partition-statistics" : [ ],
     "snapshot-log" : [ {
       "timestamp-ms" : 1714657783993,
       "snapshot-id" : 6685531058123427778
     } ],
     "metadata-log" : [ {
       "timestamp-ms" : 1714657783993,
       "metadata-file" : 
"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/00000-52cc6f6a-259c-4ca8-ba89-cd3a99b9eedb.metadata.json"
     } ]
   }
   ```
   
   The referenced manifest
   ```
   {
      
"manifest_path":"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/2584bfc0-abb3-4257-a61c-0498a8e29dc4-m0.avro",
      "manifest_length":98246,
      "partition_spec_id":0,
      "content":0,
      "sequence_number":1,
      "min_sequence_number":1,
      "added_snapshot_id":6685531058123427778,
      "added_files_count":503,
      "existing_files_count":0,
      "deleted_files_count":0,
      "added_rows_count":176720085,
      "existing_rows_count":0,
      "deleted_rows_count":0,
      "partitions":{
         "array":[
            {
               "contains_null":false,
               "contains_nan":{
                  "boolean":false
               },
               "lower_bound":{
                  "bytes":"m\u0002\u0000\u0000"
               },
               "upper_bound":{
                  "bytes":"\u0002\u0000\u0000"
               }
            }
         ]
      }
   }
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to