dongxiao1198 commented on code in PR #216:
URL: https://github.com/apache/iceberg-cpp/pull/216#discussion_r2422443101
##########
src/iceberg/manifest_adapter.h:
##########
@@ -33,34 +41,107 @@ class ICEBERG_EXPORT ManifestAdapter {
public:
ManifestAdapter() = default;
virtual ~ManifestAdapter() = default;
+ virtual Status Init() = 0;
- virtual Status StartAppending() = 0;
- virtual Result<ArrowArray> FinishAppending() = 0;
+ Status StartAppending();
+ Result<ArrowArray*> FinishAppending();
int64_t size() const { return size_; }
+ protected:
+ static Status AppendField(ArrowArray* arrowArray, int64_t value);
+ static Status AppendField(ArrowArray* arrowArray, uint64_t value);
+ static Status AppendField(ArrowArray* arrowArray, double value);
+ static Status AppendField(ArrowArray* arrowArray, std::string_view value);
+ static Status AppendField(ArrowArray* arrowArray,
+ const std::span<const uint8_t>& value);
+
protected:
ArrowArray array_;
+ ArrowSchema schema_; // converted from manifest_schema_ or
manifest_list_schema_
int64_t size_ = 0;
};
// \brief Implemented by different versions with different schemas to
// append a list of `ManifestEntry`s to an `ArrowArray`.
class ICEBERG_EXPORT ManifestEntryAdapter : public ManifestAdapter {
public:
- ManifestEntryAdapter() = default;
- ~ManifestEntryAdapter() override = default;
+ explicit ManifestEntryAdapter(std::shared_ptr<PartitionSpec> partition_spec)
+ : partition_spec_(std::move(partition_spec)) {}
+ ~ManifestEntryAdapter() override;
virtual Status Append(const ManifestEntry& entry) = 0;
+
+ const std::shared_ptr<Schema>& schema() const { return manifest_schema_; }
+
+ protected:
+ virtual Result<std::shared_ptr<StructType>> GetManifestEntryStructType();
+
+ /// \brief Init version-specific schema for each version.
+ ///
+ /// \param fields_ids each version of manifest schema has schema, we will
init this
+ /// schema based on the fields_ids.
+ Status InitSchema(const std::unordered_set<int32_t>& fields_ids);
+ Status AppendInternal(const ManifestEntry& entry);
+ Status AppendDataFile(ArrowArray* arrow_array,
+ const std::shared_ptr<StructType>& data_file_type,
+ const std::shared_ptr<DataFile>& file);
+ static Status AppendPartition(ArrowArray* arrow_array,
+ const std::shared_ptr<StructType>&
partition_type,
+ const std::vector<Literal>& partitions);
+ static Status AppendList(ArrowArray* arrow_array,
+ const std::vector<int32_t>& list_value);
+ static Status AppendList(ArrowArray* arrow_array,
+ const std::vector<int64_t>& list_value);
+ static Status AppendMap(ArrowArray* arrow_array,
+ const std::map<int32_t, int64_t>& map_value);
+ static Status AppendMap(ArrowArray* arrow_array,
+ const std::map<int32_t, std::vector<uint8_t>>&
map_value);
+
+ virtual Result<std::optional<int64_t>> GetSequenceNumber(const
ManifestEntry& entry);
+ virtual Result<std::optional<std::string>> GetWrappedReferenceDataFile(
+ const std::shared_ptr<DataFile>& file);
+ virtual Result<std::optional<int64_t>> GetWrappedFirstRowId(
+ const std::shared_ptr<DataFile>& file);
+ virtual Result<std::optional<int64_t>> GetWrappedContentOffset(
+ const std::shared_ptr<DataFile>& file);
+ virtual Result<std::optional<int64_t>> GetWrappedContentSizeInBytes(
+ const std::shared_ptr<DataFile>& file);
+
+ protected:
+ std::shared_ptr<PartitionSpec> partition_spec_;
+ std::shared_ptr<Schema> manifest_schema_;
+ std::unordered_map<std::string, std::string> metadata_;
};
// \brief Implemented by different versions with different schemas to
// append a list of `ManifestFile`s to an `ArrowArray`.
class ICEBERG_EXPORT ManifestFileAdapter : public ManifestAdapter {
public:
ManifestFileAdapter() = default;
- ~ManifestFileAdapter() override = default;
+ ~ManifestFileAdapter() override;
virtual Status Append(const ManifestFile& file) = 0;
+
+ const std::shared_ptr<Schema>& schema() const { return
manifest_list_schema_; }
+
+ protected:
+ /// \brief Init version-specific schema for each version.
+ ///
+ /// \param fields_ids each version of manifest schema has schema, we will
init this
+ /// schema based on the fields_ids.
+ Status InitSchema(const std::unordered_set<int32_t>& fields_ids);
+ Status AppendInternal(const ManifestFile& file);
+ static Status AppendPartitions(ArrowArray* arrow_array,
+ const std::shared_ptr<ListType>&
partition_type,
+ const std::vector<PartitionFieldSummary>&
partitions);
+
+ virtual Result<int64_t> GetSequenceNumber(const ManifestFile& file);
+ virtual Result<int64_t> GetWrappedMinSequenceNumber(const ManifestFile&
file);
+ virtual Result<std::optional<int64_t>> GetWrappedFirstRowId(const
ManifestFile& file);
+
+ protected:
+ std::shared_ptr<Schema> manifest_list_schema_;
+ std::unordered_map<std::string, std::string> metadata_;
Review Comment:
metadata_ moved to base. manifest_list_schema_ and manifest_schema_ has
different means so leaved here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]