wgtmac commented on code in PR #31:
URL: https://github.com/apache/iceberg-cpp/pull/31#discussion_r1930297551


##########
src/iceberg/util/formatter.h:
##########
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/util/formatter.h

Review Comment:
   Should this file be renamed to `formatter_internal.h`? We don't have to 
export and install this file.
   
   FYI: I have followed the same pattern from Apache Arrow: 
https://github.com/apache/iceberg-cpp/blob/main/cmake_modules/IcebergBuildUtils.cmake#L251



##########
src/iceberg/type.cc:
##########
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/type.h"
+
+#include <format>
+#include <iterator>
+#include <stdexcept>
+
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+StructType::StructType(std::vector<SchemaField> fields) : 
fields_(std::move(fields)) {
+  size_t index = 0;
+  for (const auto& field : fields_) {
+    auto [it, inserted] = field_id_to_index_.try_emplace(field.field_id(), 
index);
+    if (!inserted) {
+      throw std::runtime_error(
+          std::format("StructType: duplicate field ID {} (field indices {} and 
{})",
+                      field.field_id(), it->second, index));
+    }
+
+    ++index;
+  }
+}
+
+TypeId StructType::type_id() const { return TypeId::kStruct; }
+std::string StructType::ToString() const {
+  std::string repr = "struct<\n";
+  for (const auto& field : fields_) {
+    std::format_to(std::back_inserter(repr), "  {}\n", field);
+  }
+  repr += ">";
+  return repr;
+}
+std::span<const SchemaField> StructType::fields() const { return fields_; }
+std::optional<std::reference_wrapper<const SchemaField>> 
StructType::GetFieldById(
+    int32_t field_id) const {
+  auto it = field_id_to_index_.find(field_id);
+  if (it == field_id_to_index_.end()) return std::nullopt;
+  return fields_[it->second];
+}
+std::optional<std::reference_wrapper<const SchemaField>> 
StructType::GetFieldByIndex(
+    int32_t index) const {
+  if (index < 0 || index >= static_cast<int>(fields_.size())) {
+    return std::nullopt;
+  }
+  return fields_[index];
+}
+std::optional<std::reference_wrapper<const SchemaField>> 
StructType::GetFieldByName(
+    std::string_view name) const {
+  // TODO: what is the right behavior if there are duplicate names? (Are

Review Comment:
   It seems that the spec is unclear about this so the behavior is undefined 
and implementation-specific.



##########
src/iceberg/type.h:
##########
@@ -0,0 +1,414 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type.h
+/// Data types for Iceberg.  This header defines the data types, but see
+/// iceberg/type_fwd.h for the enum defining the list of types.
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief Interface for a data type for a field.
+class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
+ public:
+  virtual ~Type() = default;
+
+  /// \brief Get the type ID.
+  [[nodiscard]] virtual TypeId type_id() const = 0;
+
+  /// \brief Is this a primitive type (may not have child fields)?
+  [[nodiscard]] virtual bool is_primitive() const = 0;
+
+  /// \brief Is this a nested type (may have child fields)?
+  [[nodiscard]] virtual bool is_nested() const = 0;
+
+  /// \brief Compare two types for equality.
+  friend bool operator==(const Type& lhs, const Type& rhs) { return 
lhs.Equals(rhs); }
+
+  /// \brief Compare two types for inequality.
+  friend bool operator!=(const Type& lhs, const Type& rhs) { return !(lhs == 
rhs); }
+
+ protected:
+  /// \brief Compare two types for equality.
+  [[nodiscard]] virtual bool Equals(const Type& other) const = 0;
+};
+
+/// \brief A data type that does not have child fields.
+class ICEBERG_EXPORT PrimitiveType : public Type {
+ public:
+  bool is_primitive() const override { return true; }
+  bool is_nested() const override { return false; }
+};
+
+/// \brief A data type that has child fields.
+class ICEBERG_EXPORT NestedType : public Type {
+ public:
+  bool is_primitive() const override { return false; }
+  bool is_nested() const override { return true; }
+
+  /// \brief Get a view of the child fields.
+  [[nodiscard]] virtual std::span<const SchemaField> fields() const = 0;
+  /// \brief Get a field by field ID.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldById(int32_t field_id) const = 0;
+  /// \brief Get a field by index.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldByIndex(int32_t index) const = 0;
+  /// \brief Get a field by name.

Review Comment:
   Is this case sensitive?



##########
src/iceberg/type.h:
##########
@@ -0,0 +1,414 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type.h
+/// Data types for Iceberg.  This header defines the data types, but see
+/// iceberg/type_fwd.h for the enum defining the list of types.
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief Interface for a data type for a field.
+class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
+ public:
+  virtual ~Type() = default;
+
+  /// \brief Get the type ID.
+  [[nodiscard]] virtual TypeId type_id() const = 0;
+
+  /// \brief Is this a primitive type (may not have child fields)?
+  [[nodiscard]] virtual bool is_primitive() const = 0;
+
+  /// \brief Is this a nested type (may have child fields)?
+  [[nodiscard]] virtual bool is_nested() const = 0;
+
+  /// \brief Compare two types for equality.
+  friend bool operator==(const Type& lhs, const Type& rhs) { return 
lhs.Equals(rhs); }
+
+  /// \brief Compare two types for inequality.
+  friend bool operator!=(const Type& lhs, const Type& rhs) { return !(lhs == 
rhs); }
+
+ protected:
+  /// \brief Compare two types for equality.
+  [[nodiscard]] virtual bool Equals(const Type& other) const = 0;
+};
+
+/// \brief A data type that does not have child fields.
+class ICEBERG_EXPORT PrimitiveType : public Type {
+ public:
+  bool is_primitive() const override { return true; }
+  bool is_nested() const override { return false; }
+};
+
+/// \brief A data type that has child fields.
+class ICEBERG_EXPORT NestedType : public Type {

Review Comment:
   Do we need a function for getting number of fields?



##########
src/iceberg/type.cc:
##########
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/type.h"
+
+#include <format>
+#include <iterator>
+#include <stdexcept>
+
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+StructType::StructType(std::vector<SchemaField> fields) : 
fields_(std::move(fields)) {
+  size_t index = 0;
+  for (const auto& field : fields_) {
+    auto [it, inserted] = field_id_to_index_.try_emplace(field.field_id(), 
index);
+    if (!inserted) {
+      throw std::runtime_error(

Review Comment:
   Should we define `IcebergException` or even a more specific exception to 
replace it?



##########
src/iceberg/schema.cc:
##########
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/schema.h"
+
+#include <format>

Review Comment:
   This is redundant.



##########
src/iceberg/type.cc:
##########
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/type.h"
+
+#include <format>
+#include <iterator>
+#include <stdexcept>
+
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+StructType::StructType(std::vector<SchemaField> fields) : 
fields_(std::move(fields)) {
+  size_t index = 0;
+  for (const auto& field : fields_) {
+    auto [it, inserted] = field_id_to_index_.try_emplace(field.field_id(), 
index);
+    if (!inserted) {
+      throw std::runtime_error(
+          std::format("StructType: duplicate field ID {} (field indices {} and 
{})",
+                      field.field_id(), it->second, index));
+    }
+
+    ++index;
+  }
+}
+
+TypeId StructType::type_id() const { return TypeId::kStruct; }
+std::string StructType::ToString() const {
+  std::string repr = "struct<\n";

Review Comment:
   Is there a specific reason to use a separate line for each sub-field? An 
alternative is to use the Hive-style where all fields are concatenated in a 
single line.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to