wgtmac commented on code in PR #299:
URL: https://github.com/apache/iceberg-cpp/pull/299#discussion_r2506139411
##########
src/iceberg/json_internal.cc:
##########
@@ -519,8 +519,7 @@ Result<std::unique_ptr<PartitionField>>
PartitionFieldFromJson(
std::move(transform));
}
-Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
- const std::shared_ptr<Schema>& schema, const nlohmann::json& json) {
+Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(const
nlohmann::json& json) {
Review Comment:
Let's keep the function signature to still accept `const
std::shared_ptr<Schema>& schema` and add a TODO comment below for PartitionSpec
to validate the partition fields with this schema. Otherwise you need to add
this parameter back in the next PR.
##########
src/iceberg/table_scan.cc:
##########
@@ -269,12 +269,17 @@ Result<std::vector<std::shared_ptr<FileScanTask>>>
DataTableScan::PlanFiles() co
std::vector<std::shared_ptr<FileScanTask>> tasks;
ICEBERG_ASSIGN_OR_RAISE(auto partition_spec,
context_.table_metadata->PartitionSpec());
- auto partition_schema = partition_spec->schema();
+ // auto partition_schema = context_.table_metadata->Schema().value();
+
+ // Get the table schema and partition type
+ ICEBERG_ASSIGN_OR_RAISE(auto table_schema,
context_.table_metadata->Schema());
+ ICEBERG_ASSIGN_OR_RAISE(auto partition_type,
+ partition_spec->PartitionType(table_schema));
Review Comment:
```suggestion
// Get the current schema and partition type
ICEBERG_ASSIGN_OR_RAISE(auto current_schema,
context_.table_metadata->Schema());
ICEBERG_ASSIGN_OR_RAISE(auto partition_type,
partition_spec->PartitionType(current_schema));
```
##########
src/iceberg/partition_spec.cc:
##########
@@ -48,19 +48,17 @@ PartitionSpec::PartitionSpec(std::shared_ptr<Schema>
schema, int32_t spec_id,
const std::shared_ptr<PartitionSpec>& PartitionSpec::Unpartitioned() {
static const std::shared_ptr<PartitionSpec> unpartitioned =
- std::make_shared<PartitionSpec>(
- /*schema=*/std::make_shared<Schema>(std::vector<SchemaField>{}),
kInitialSpecId,
- std::vector<PartitionField>{}, kLegacyPartitionDataIdStart - 1);
+ std::make_shared<PartitionSpec>(kInitialSpecId,
std::vector<PartitionField>{},
+ kLegacyPartitionDataIdStart - 1);
return unpartitioned;
}
-const std::shared_ptr<Schema>& PartitionSpec::schema() const { return schema_;
}
-
int32_t PartitionSpec::spec_id() const { return spec_id_; }
std::span<const PartitionField> PartitionSpec::fields() const { return
fields_; }
-Result<std::shared_ptr<StructType>> PartitionSpec::PartitionType() {
+Result<std::shared_ptr<StructType>> PartitionSpec::PartitionType(
+ std::shared_ptr<Schema> schema) {
Review Comment:
```suggestion
const std::shared_ptr<Schema>& schema) {
```
Both `const std::shared_ptr<Schema>&` or `const Schema&` is OK. Perhaps the
latter is better since it accepts wider range of inputs.
##########
src/iceberg/json_internal.cc:
##########
@@ -977,9 +973,9 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataFromJson(const nlohmann::jso
ParseSchemas(json, table_metadata->format_version,
table_metadata->current_schema_id,
table_metadata->schemas));
- ICEBERG_RETURN_UNEXPECTED(ParsePartitionSpecs(
- json, table_metadata->format_version, current_schema,
- table_metadata->default_spec_id, table_metadata->partition_specs));
+ ICEBERG_RETURN_UNEXPECTED(ParsePartitionSpecs(json,
table_metadata->format_version,
Review Comment:
ditto
##########
src/iceberg/json_internal.cc:
##########
@@ -853,11 +852,9 @@ Result<std::shared_ptr<Schema>> ParseSchemas(
///
/// \param[in] json The JSON object to parse.
/// \param[in] format_version The format version of the table.
-/// \param[in] current_schema The current schema.
/// \param[out] default_spec_id The default partition spec ID.
/// \param[out] partition_specs The list of partition specs.
Status ParsePartitionSpecs(const nlohmann::json& json, int8_t format_version,
- const std::shared_ptr<Schema>& current_schema,
Review Comment:
ditto, let's keep it.
##########
src/iceberg/manifest_writer.cc:
##########
@@ -68,9 +68,10 @@ Result<std::unique_ptr<Writer>> OpenFileWriter(
Result<std::unique_ptr<ManifestWriter>> ManifestWriter::MakeV1Writer(
std::optional<int64_t> snapshot_id, std::string_view manifest_location,
- std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec) {
- auto adapter =
- std::make_unique<ManifestEntryAdapterV1>(snapshot_id,
std::move(partition_spec));
+ std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec,
+ std::shared_ptr<Schema> table_schema) {
Review Comment:
ditto for the name of `table_schema`
##########
src/iceberg/json_internal.cc:
##########
@@ -902,8 +898,8 @@ Status ParsePartitionSpecs(const nlohmann::json& json,
int8_t format_version,
std::move(field->transform()));
}
- auto spec = std::make_unique<PartitionSpec>(
- current_schema, PartitionSpec::kInitialSpecId, std::move(fields));
+ auto spec =
Review Comment:
add a TODO comment here as well.
##########
src/iceberg/json_internal.cc:
##########
@@ -870,8 +867,7 @@ Status ParsePartitionSpecs(const nlohmann::json& json,
int8_t format_version,
ICEBERG_ASSIGN_OR_RAISE(default_spec_id, GetJsonValue<int32_t>(json,
kDefaultSpecId));
for (const auto& spec_json : spec_array) {
- ICEBERG_ASSIGN_OR_RAISE(auto spec,
- PartitionSpecFromJson(current_schema,
spec_json));
+ ICEBERG_ASSIGN_OR_RAISE(auto spec, PartitionSpecFromJson(spec_json));
Review Comment:
ditto
##########
src/iceberg/json_internal.h:
##########
@@ -177,7 +177,7 @@ ICEBERG_EXPORT Result<std::string> ToJsonString(const
PartitionSpec& partition_s
/// \return An `expected` value containing either a `PartitionSpec` object or
an error. If
/// the JSON is malformed or missing expected fields, an error will be
returned.
ICEBERG_EXPORT Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
- const std::shared_ptr<Schema>& schema, const nlohmann::json& json);
Review Comment:
ditto
##########
src/iceberg/manifest_adapter.h:
##########
@@ -61,8 +61,10 @@ class ICEBERG_EXPORT ManifestAdapter {
/// Implemented by different versions with version-specific schemas.
class ICEBERG_EXPORT ManifestEntryAdapter : public ManifestAdapter {
public:
- explicit ManifestEntryAdapter(std::shared_ptr<PartitionSpec> partition_spec)
- : partition_spec_(std::move(partition_spec)) {}
+ ManifestEntryAdapter(std::shared_ptr<PartitionSpec> partition_spec,
+ std::shared_ptr<Schema> table_schema)
+ : partition_spec_(std::move(partition_spec)),
+ table_schema_(std::move(table_schema)) {}
Review Comment:
```suggestion
current_schema_(std::move(current_schema)) {}
```
Let's avoid calling it table schema because a table can have schemas of
different versions.
##########
src/iceberg/partition_spec.h:
##########
@@ -56,24 +58,20 @@ class ICEBERG_EXPORT PartitionSpec : public
util::Formattable {
/// \param fields The partition fields.
/// \param last_assigned_field_id The last assigned field ID. If not
provided, it will
/// be calculated from the fields.
- PartitionSpec(std::shared_ptr<Schema> schema, int32_t spec_id,
- std::vector<PartitionField> fields,
+ PartitionSpec(int32_t spec_id, std::vector<PartitionField> fields,
std::optional<int32_t> last_assigned_field_id = std::nullopt);
/// \brief Get an unsorted partition spec singleton.
static const std::shared_ptr<PartitionSpec>& Unpartitioned();
- /// \brief Get the table schema
- const std::shared_ptr<Schema>& schema() const;
-
/// \brief Get the spec ID.
int32_t spec_id() const;
/// \brief Get a list view of the partition fields.
std::span<const PartitionField> fields() const;
/// \brief Get the partition type.
- Result<std::shared_ptr<StructType>> PartitionType();
+ Result<std::shared_ptr<StructType>> PartitionType(std::shared_ptr<Schema>
schema);
Review Comment:
```suggestion
Result<std::shared_ptr<StructType>> PartitionType(const Schema& schema);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]