HeartLinked commented on code in PR #216:
URL: https://github.com/apache/iceberg-cpp/pull/216#discussion_r2339497135
##########
src/iceberg/manifest_adapter.h:
##########
@@ -33,34 +40,100 @@ class ICEBERG_EXPORT ManifestAdapter {
public:
ManifestAdapter() = default;
virtual ~ManifestAdapter() = default;
+ virtual Status Init() = 0;
- virtual Status StartAppending() = 0;
- virtual Result<ArrowArray> FinishAppending() = 0;
+ Status StartAppending();
+ Result<ArrowArray> FinishAppending();
int64_t size() const { return size_; }
+ virtual std::shared_ptr<Schema> schema() const = 0;
+
+ protected:
+ Status AppendField(ArrowArray* arrowArray, int64_t value);
+ Status AppendField(ArrowArray* arrowArray, uint64_t value);
Review Comment:
Why these function are independent of the class's fields and looks like a
static method, but it is not defined as static?
##########
src/iceberg/v1_metadata.cc:
##########
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/v1_metadata.h"
+
+#include "iceberg/manifest_entry.h"
+#include "iceberg/manifest_list.h"
+#include "iceberg/partition_spec.h"
Review Comment:
```suggestion
```
##########
test/manifest_reader_writer_test.cc:
##########
@@ -28,7 +26,12 @@
#include "iceberg/avro/avro_register.h"
#include "iceberg/manifest_entry.h"
#include "iceberg/manifest_list.h"
+#include "iceberg/manifest_reader.h"
+#include "iceberg/manifest_writer.h"
+#include "iceberg/partition_spec.h"
Review Comment:
```suggestion
```
##########
src/iceberg/manifest_adapter.cc:
##########
@@ -0,0 +1,704 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/manifest_adapter.h"
+
+#include "iceberg/manifest_entry.h"
+#include "iceberg/manifest_list.h"
+#include "iceberg/schema.h"
+#include "iceberg/schema_internal.h"
+#include "iceberg/util/macros.h"
+#include "nanoarrow/nanoarrow.h"
+
+namespace {
+static constexpr int64_t kBlockSizeInBytes = 64 * 1024 * 1024L;
+}
+
+namespace iceberg {
+
+Status ManifestAdapter::StartAppending() {
+ if (size_ > 0) {
+ return InvalidArgument("Adapter buffer not empty, cannot start
appending.");
+ }
+ array_ = {};
+ size_ = 0;
+ ArrowError error;
+ auto status = ArrowArrayInitFromSchema(&array_, &schema_, &error);
+ NANOARROW_RETURN_IF_NOT_OK(status, error);
+ status = ArrowArrayStartAppending(&array_);
+ NANOARROW_RETURN_IF_NOT_OK(status, error);
+ return {};
+}
+
+Result<ArrowArray> ManifestAdapter::FinishAppending() {
+ ArrowError error;
+ auto status = ArrowArrayFinishBuildingDefault(&array_, &error);
+ NANOARROW_RETURN_IF_NOT_OK(status, error);
+ return array_;
+}
+
+Status ManifestAdapter::AppendField(ArrowArray* arrowArray, int64_t value) {
+ auto status = ArrowArrayAppendInt(arrowArray, value);
+ NANOARROW_RETURN_IF_FAILED(status);
+ return {};
+}
+
+Status ManifestAdapter::AppendField(ArrowArray* arrowArray, uint64_t value) {
+ auto status = ArrowArrayAppendUInt(arrowArray, value);
+ NANOARROW_RETURN_IF_FAILED(status);
+ return {};
+}
+
+Status ManifestAdapter::AppendField(ArrowArray* arrowArray, double value) {
+ auto status = ArrowArrayAppendDouble(arrowArray, value);
+ NANOARROW_RETURN_IF_FAILED(status);
+ return {};
+}
+
+Status ManifestAdapter::AppendField(ArrowArray* arrowArray, std::string_view
value) {
+ ArrowStringView view(value.data(), value.size());
+ auto status = ArrowArrayAppendString(arrowArray, view);
+ NANOARROW_RETURN_IF_FAILED(status);
+ return {};
+}
+
+Status ManifestAdapter::AppendField(ArrowArray* arrowArray,
+ const std::vector<uint8_t>& value) {
+ ArrowBufferViewData data;
+ data.as_char = reinterpret_cast<const char*>(value.data());
+ ArrowBufferView view(data, value.size());
+ auto status = ArrowArrayAppendBytes(arrowArray, view);
+ NANOARROW_RETURN_IF_FAILED(status);
+ return {};
+}
+
+ManifestEntryAdapter::~ManifestEntryAdapter() {
+ if (is_initialized_) {
+ // arrow::ImportedArrayData::Release() bridge.cc:1478 will release the
+ // internal array, so we have no need to release it here.
+ // ArrowArrayRelease(&array_);
+ ArrowSchemaRelease(&schema_);
+ }
+}
+
+std::shared_ptr<StructType> ManifestEntryAdapter::GetManifestEntryStructType()
{
+ return ManifestEntry::TypeFromPartitionType(std::move(partition_schema_));
+}
+
+Status ManifestEntryAdapter::AppendPartitions(
+ ArrowArray* arrow_array, const std::shared_ptr<StructType>& partition_type,
+ const std::vector<Literal>& partitions) {
+ if (arrow_array->n_children != partition_type->fields().size()) {
+ return InvalidManifest("Partition arrow not match partition type.");
+ }
+ auto fields = partition_type->fields();
+
+ for (const auto& partition : partitions) {
+ for (int32_t i = 0; i < fields.size(); i++) {
+ const auto& field = fields[i];
+ auto array = arrow_array->children[i];
+ if (partition.IsNull()) {
+ auto status = ArrowArrayAppendNull(array, 1);
+ NANOARROW_RETURN_IF_FAILED(status);
+ continue;
+ }
+ switch (field.type()->type_id()) {
+ case TypeId::kBoolean:
+ ICEBERG_RETURN_UNEXPECTED(AppendField(
+ array, static_cast<uint64_t>(
+ std::get<bool>(partition.value()) == true ? 1L :
0L)));
+ break;
+ case TypeId::kInt:
+ ICEBERG_RETURN_UNEXPECTED(AppendField(
+ array,
static_cast<int64_t>(std::get<int32_t>(partition.value()))));
+ break;
+ case TypeId::kLong:
+ ICEBERG_RETURN_UNEXPECTED(
+ AppendField(array, std::get<int64_t>(partition.value())));
+ break;
+ case TypeId::kFloat:
+ ICEBERG_RETURN_UNEXPECTED(AppendField(
+ array, static_cast<double>(std::get<float>(partition.value()))));
+ break;
+ case TypeId::kDouble:
+ ICEBERG_RETURN_UNEXPECTED(
+ AppendField(array, std::get<double>(partition.value())));
+ break;
+ case TypeId::kString:
+ ICEBERG_RETURN_UNEXPECTED(
+ AppendField(array, std::get<std::string>(partition.value())));
+ break;
+ case TypeId::kBinary:
+ ICEBERG_RETURN_UNEXPECTED(
+ AppendField(array,
std::get<std::vector<uint8_t>>(partition.value())));
+ break;
+ case TypeId::kDate:
+ ICEBERG_RETURN_UNEXPECTED(AppendField(
+ array,
static_cast<int64_t>(std::get<int32_t>(partition.value()))));
+ break;
+ case TypeId::kTime:
+ case TypeId::kTimestamp:
+ case TypeId::kTimestampTz:
+ ICEBERG_RETURN_UNEXPECTED(
+ AppendField(array, std::get<int64_t>(partition.value())));
+ break;
+ case TypeId::kDecimal:
+ case TypeId::kUuid:
+ case TypeId::kFixed:
Review Comment:
Only Decimal and Uuid are not supported yet? Fixed should be as same as
Binary(using vector<uint8_t>)?
##########
src/iceberg/v1_metadata.cc:
##########
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/v1_metadata.h"
+
+#include "iceberg/manifest_entry.h"
+#include "iceberg/manifest_list.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/schema.h"
+
+namespace iceberg {
+
+Status ManifestEntryAdapterV1::Init() {
+ static std::unordered_set<int32_t> compatible_fields{
+ 0, 1, 2, 100, 101, 102, 103, 104, 105, 108, 109, 110, 137, 125, 128,
131, 132, 140,
+ };
+ // TODO(xiao.dong) schema to json
+ metadata_["schema"] = "{}";
+ // TODO(xiao.dong) partition spec to json
+ metadata_["partition-spec"] = "{}";
+ if (partition_spec_ != nullptr) {
+ metadata_["partition-spec-id"] =
std::to_string(partition_spec_->spec_id());
+ }
+ metadata_["format-version"] = "1";
+ return InitSchema(compatible_fields);
+}
+
+Status ManifestEntryAdapterV1::Append(const iceberg::ManifestEntry& entry) {
+ return AppendInternal(entry);
+}
+
+std::shared_ptr<StructType>
ManifestEntryAdapterV1::GetManifestEntryStructType() {
+ // V1 compatible fields. Official suggestions:
Review Comment:
```suggestion
// 'block_size_in_bytes' (ID 105) is a deprecated field that is REQUIRED
in the v1 data_file schema for backward compatibility.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]