zhjwpku commented on code in PR #91: URL: https://github.com/apache/iceberg-cpp/pull/91#discussion_r2083021722
########## src/iceberg/manifest_entry.h: ########## @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <any> +#include <cstdint> +#include <map> +#include <optional> +#include <string> +#include <unordered_map> +#include <vector> + +#include "iceberg/file_format.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +enum class ManifestStatus { + kExisting = 0, + kAdded = 1, + kDeleted = 2, +}; + +/// \brief Get the relative manifest status type from int +ICEBERG_EXPORT constexpr Result<ManifestStatus> ManifestStatusFromInt( + int status) noexcept { + switch (status) { + case 0: + return ManifestStatus::kExisting; + case 1: + return ManifestStatus::kAdded; + case 2: + return ManifestStatus::kDeleted; + default: + return InvalidArgument("Invalid manifest status: {}", status); + } +} + +enum class DataFileContent { + kData = 0, + kPositionDeletes = 1, + kEqualityDeletes = 2, +}; + +/// \brief Get the relative data file content type from int +ICEBERG_EXPORT constexpr Result<DataFileContent> DataFileContentFromInt( + int content) noexcept { + switch (content) { + case 0: + return DataFileContent::kData; + case 1: + return DataFileContent::kPositionDeletes; + case 2: + return DataFileContent::kEqualityDeletes; + default: + return InvalidArgument("Invalid data file content: {}", content); + } +} + +/// \brief DataFile carries data file path, partition tuple, metrics, ... +struct ICEBERG_EXPORT DataFile { + /// Field id: 134 + /// Type of content stored by the data file: data, equality deletes, or position + /// deletes (all v1 files are data files) + DataFileContent content; + /// Field id: 100 + /// Full URI for the file with FS scheme + std::string file_path; + /// Field id: 101 + /// File format type, avro, orc, parquet, or puffin + FileFormatType file_format; + /// Field id: 102 + /// Partition data tuple, schema based on the partition spec output using partition + /// field ids for the struct field ids + /// TODO(zhjwpku): use StructLike to represent partition data tuple + std::map<std::string, std::any> partition; + /// Field id: 103 + /// Number of records in this file, or the cardinality of a deletion vector + int64_t record_count = 0; + /// Field id: 104 + /// Total file size in bytes + int64_t file_size_in_bytes = 0; + /// Field id: 108 + /// Key field id: 117 + /// Value field id: 118 + /// Map from column id to the total size on disk of all regions that store the column. + /// Does not include bytes necessary to read other columns, like footers. Leave null for + /// row-oriented formats (Avro) + std::unordered_map<int32_t, int64_t> column_sizes; Review Comment: I was following the Rust implementation, which uses a HashMap. However, if we need to preserve the order of field IDs, let's switch to using map instead. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org