wgtmac commented on code in PR #75: URL: https://github.com/apache/iceberg-cpp/pull/75#discussion_r2048052442
########## src/iceberg/json_internal.cc: ########## @@ -602,8 +757,431 @@ Result<std::unique_ptr<Snapshot>> SnapshotFromJson(const nlohmann::json& json) { return std::make_unique<Snapshot>( snapshot_id, parent_snapshot_id, - sequence_number.has_value() ? *sequence_number : kInitialSequenceNumber, - timestamp_ms, manifest_list, std::move(summary), schema_id); + sequence_number.value_or(TableMetadata::kInitialSequenceNumber), timestamp_ms, + manifest_list, std::move(summary), schema_id); +} + +nlohmann::json ToJson(const BlobMetadata& blob_metadata) { + nlohmann::json json; + json[kType] = blob_metadata.type; + json[kSnapshotId] = blob_metadata.source_snapshot_id; + json[kSequenceNumber] = blob_metadata.source_snapshot_sequence_number; + json[kFields] = blob_metadata.fields; + if (!blob_metadata.properties.empty()) { + json[kProperties] = blob_metadata.properties; + } + return json; +} + +Result<BlobMetadata> BlobMetadataFromJson(const nlohmann::json& json) { + BlobMetadata blob_metadata; + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.type, GetJsonValue<std::string>(json, kType)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.source_snapshot_id, + GetJsonValue<int64_t>(json, kSnapshotId)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.source_snapshot_sequence_number, + GetJsonValue<int64_t>(json, kSequenceNumber)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.fields, + GetJsonValue<std::vector<int32_t>>(json, kFields)); + ICEBERG_ASSIGN_OR_RAISE( + blob_metadata.properties, + (GetJsonValueOrDefault<std::unordered_map<std::string, std::string>>(json, + kProperties))); + return blob_metadata; +} + +nlohmann::json ToJson(const StatisticsFile& statistics_file) { + nlohmann::json json; + json[kSnapshotId] = statistics_file.snapshot_id; + json[kStatisticsPath] = statistics_file.path; + json[kFileSizeInBytes] = statistics_file.file_size_in_bytes; + json[kFileFooterSizeInBytes] = statistics_file.file_footer_size_in_bytes; + + nlohmann::json blob_metadata_array = nlohmann::json::array(); + for (const auto& blob_metadata : statistics_file.blob_metadata) { + blob_metadata_array.push_back(ToJson(blob_metadata)); + } + json[kBlobMetadata] = blob_metadata_array; + + return json; +} + +Result<std::unique_ptr<StatisticsFile>> StatisticsFileFromJson( + const nlohmann::json& json) { + auto stats_file = std::make_unique<StatisticsFile>(); + ICEBERG_ASSIGN_OR_RAISE(stats_file->snapshot_id, + GetJsonValue<int64_t>(json, kSnapshotId)); + ICEBERG_ASSIGN_OR_RAISE(stats_file->path, + GetJsonValue<std::string>(json, kStatisticsPath)); + ICEBERG_ASSIGN_OR_RAISE(stats_file->file_size_in_bytes, + GetJsonValue<int64_t>(json, kFileSizeInBytes)); + ICEBERG_ASSIGN_OR_RAISE(stats_file->file_footer_size_in_bytes, + GetJsonValue<int64_t>(json, kFileFooterSizeInBytes)); + + ICEBERG_ASSIGN_OR_RAISE(auto blob_metadata_array, + GetJsonValue<nlohmann::json>(json, kBlobMetadata)); + for (const auto& blob_json : blob_metadata_array) { + ICEBERG_ASSIGN_OR_RAISE(auto blob, BlobMetadataFromJson(blob_json)); + stats_file->blob_metadata.push_back(std::move(blob)); + } + + return stats_file; +} + +nlohmann::json ToJson(const PartitionStatisticsFile& partition_statistics_file) { + nlohmann::json json; + json[kSnapshotId] = partition_statistics_file.snapshot_id; + json[kStatisticsPath] = partition_statistics_file.path; + json[kFileSizeInBytes] = partition_statistics_file.file_size_in_bytes; + return json; +} + +Result<std::unique_ptr<PartitionStatisticsFile>> PartitionStatisticsFileFromJson( + const nlohmann::json& json) { + auto stats_file = std::make_unique<PartitionStatisticsFile>(); + ICEBERG_ASSIGN_OR_RAISE(stats_file->snapshot_id, + GetJsonValue<int64_t>(json, kSnapshotId)); + ICEBERG_ASSIGN_OR_RAISE(stats_file->path, + GetJsonValue<std::string>(json, kStatisticsPath)); + ICEBERG_ASSIGN_OR_RAISE(stats_file->file_size_in_bytes, + GetJsonValue<int64_t>(json, kFileSizeInBytes)); + return stats_file; +} + +nlohmann::json ToJson(const SnapshotLogEntry& snapshot_log_entry) { + nlohmann::json json; + json[kTimestampMs] = UnixMsFromTimePointMs(snapshot_log_entry.timestamp_ms); + json[kSnapshotId] = snapshot_log_entry.snapshot_id; + return json; +} + +Result<SnapshotLogEntry> SnapshotLogEntryFromJson(const nlohmann::json& json) { + SnapshotLogEntry snapshot_log_entry; + ICEBERG_ASSIGN_OR_RAISE( + snapshot_log_entry.timestamp_ms, + GetJsonValue<int64_t>(json, kTimestampMs).and_then(TimePointMsFromUnixMs)); + ICEBERG_ASSIGN_OR_RAISE(snapshot_log_entry.snapshot_id, + GetJsonValue<int64_t>(json, kSnapshotId)); + return snapshot_log_entry; +} + +nlohmann::json ToJson(const MetadataLogEntry& metadata_log_entry) { + nlohmann::json json; + json[kTimestampMs] = UnixMsFromTimePointMs(metadata_log_entry.timestamp_ms); + json[kMetadataFile] = metadata_log_entry.metadata_file; + return json; +} + +Result<MetadataLogEntry> MetadataLogEntryFromJson(const nlohmann::json& json) { + MetadataLogEntry metadata_log_entry; + ICEBERG_ASSIGN_OR_RAISE( + metadata_log_entry.timestamp_ms, + GetJsonValue<int64_t>(json, kTimestampMs).and_then(TimePointMsFromUnixMs)); + ICEBERG_ASSIGN_OR_RAISE(metadata_log_entry.metadata_file, + GetJsonValue<std::string>(json, kMetadataFile)); + return metadata_log_entry; +} + +nlohmann::json ToJson(const TableMetadata& table_metadata) { + nlohmann::json json; + + json[kFormatVersion] = table_metadata.format_version; + json[kTableUuid] = table_metadata.table_uuid; + json[kLocation] = table_metadata.location; + if (table_metadata.format_version > 1) { + json[kLastSequenceNumber] = table_metadata.last_sequence_number; + } + json[kLastUpdatedMs] = UnixMsFromTimePointMs(table_metadata.last_updated_ms); + json[kLastColumnId] = table_metadata.last_column_id; + + // for older readers, continue writing the current schema as "schema". + // this is only needed for v1 because support for schemas and current-schema-id + // is required in v2 and later. + if (table_metadata.format_version == 1) { + for (const auto& schema : table_metadata.schemas) { + if (schema->schema_id() == table_metadata.current_schema_id) { + json[kSchema] = ToJson(*schema); + break; + } + } + } + + // write the current schema ID and schema list + json[kCurrentSchemaId] = table_metadata.current_schema_id; + json[kSchemas] = ToJsonList(table_metadata.schemas); + + // for older readers, continue writing the default spec as "partition-spec" + if (table_metadata.format_version == 1) { + for (const auto& partition_spec : table_metadata.partition_specs) { + if (partition_spec->spec_id() == table_metadata.default_spec_id) { + json[kPartitionSpec] = ToJson(*partition_spec); + break; + } + } + } + + // write the default spec ID and spec list + json[kDefaultSpecId] = table_metadata.default_spec_id; + json[kPartitionSpecs] = ToJsonList(table_metadata.partition_specs); + json[kLastPartitionId] = table_metadata.last_partition_id; + + // write the default order ID and sort order list + json[kDefaultSortOrderId] = table_metadata.default_sort_order_id; + json[kSortOrders] = ToJsonList(table_metadata.sort_orders); + + // write properties map + json[kProperties] = table_metadata.properties; + + if (std::ranges::find_if(table_metadata.snapshots, [&](const auto& snapshot) { + return snapshot->snapshot_id == table_metadata.current_snapshot_id; + }) != table_metadata.snapshots.cend()) { + json[kCurrentSnapshotId] = table_metadata.current_snapshot_id; + } else if (table_metadata.format_version >= kMinNullCurrentSnapshotVersion) { + json[kCurrentSnapshotId] = nullptr; + } else { + json[kCurrentSnapshotId] = TableMetadata::kInvalidSnapshotId; Review Comment: I did this in line 939 above. It can be verified by the test below: ``` TEST(JsonInternalTest, JsonNullTest) { nlohmann::json json; json["json-null"] = nlohmann::json::value_t::null; json["null-ptr"] = nullptr; std::cout << json.dump() << std::endl; } [==========] Running 1 test from 1 test suite. [----------] Global test environment set-up. [----------] 1 test from JsonInternalTest [ RUN ] JsonInternalTest.JsonNullTest {"json-null":null,"null-ptr":null} [ OK ] JsonInternalTest.JsonNullTest (0 ms) [----------] 1 test from JsonInternalTest (0 ms total) [----------] Global test environment tear-down [==========] 1 test from 1 test suite ran. (0 ms total) [ PASSED ] 1 test. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org