zhjwpku commented on code in PR #31:
URL: https://github.com/apache/iceberg-cpp/pull/31#discussion_r1929540361


##########
src/iceberg/type_fwd.h:
##########
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type_fwd.h
+/// Forward declarations and enum definitions.  When writing your own headers,
+/// you can include this instead of the "full" headers to help reduce compile
+/// times.
+
+namespace iceberg {
+
+/// \brief A data type.
+///
+/// This is not a complete data type by itself because some types are nested
+/// and/or parameterized.
+///
+/// Iceberg V3 types are not currently supported.
+enum class TypeId {
+  kBoolean,
+  kInt32,
+  kInt64,
+  kFloat32,
+  kFloat64,
+  kDecimal,
+  kDate,
+  kTime,
+  kTimestamp,
+  kTimestampTz,
+  kBinary,
+  kString,
+  kFixed,
+  kUuid,
+  kStruct,
+  kList,
+  kMap,
+};
+
+/// \brief The time unit.  In Iceberg V3 nanoseconds are also supported.
+enum class TimeUnit {
+  kMicrosecond,
+};
+
+class BinaryType;
+class BooleanType;
+class DateType;
+class DecimalType;
+class FixedType;
+class Float32Type;
+class Float64Type;
+class Int32Type;
+class Int64Type;
+class ListType;
+class MapType;
+class NestedType;
+class PrimitiveType;
+class Schema;
+class SchemaField;
+class StringType;
+class StructType;

Review Comment:
   typo: redundant



##########
src/iceberg/type.h:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type.h
+/// Data types for Iceberg.  This header defines the data types, but see
+/// iceberg/type_fwd.h for the enum defining the list of types.
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief Interface for a data type for a field.
+class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
+ public:
+  virtual ~Type() = default;
+
+  /// \brief Get the type ID.
+  [[nodiscard]] virtual TypeId type_id() const = 0;
+
+  /// \brief Is this a primitive type (may not have child fields)?
+  [[nodiscard]] virtual bool is_primitive() const = 0;
+
+  /// \brief Is this a nested type (may have child fields)?
+  [[nodiscard]] virtual bool is_nested() const = 0;
+
+  /// \brief Compare two types for equality.
+  friend bool operator==(const Type& lhs, const Type& rhs) { return 
lhs.Equals(rhs); }
+
+  /// \brief Compare two types for inequality.
+  friend bool operator!=(const Type& lhs, const Type& rhs) { return !(lhs == 
rhs); }
+
+ protected:
+  /// \brief Compare two types for equality.
+  [[nodiscard]] virtual bool Equals(const Type& other) const = 0;
+};
+
+/// \brief A data type that may not have child fields.
+class ICEBERG_EXPORT PrimitiveType : public Type {
+ public:
+  bool is_primitive() const override { return true; }
+  bool is_nested() const override { return false; }
+};
+
+/// \brief A data type that may have child fields.
+class ICEBERG_EXPORT NestedType : public Type {
+ public:
+  bool is_primitive() const override { return false; }
+  bool is_nested() const override { return true; }
+
+  /// \brief Get a view of the child fields.
+  [[nodiscard]] virtual std::span<const SchemaField> fields() const = 0;
+  /// \brief Get a field by field ID.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldById(int32_t field_id) const = 0;
+  /// \brief Get a field by index.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldByIndex(int32_t index) const = 0;
+  /// \brief Get a field by name.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldByName(std::string_view name) const = 0;
+};
+
+/// \defgroup type-primitive Primitive Types
+/// Primitive types do not have nested fields.
+/// @{
+
+/// \brief A data type representing a boolean.
+class ICEBERG_EXPORT BooleanType : public PrimitiveType {
+ public:
+  BooleanType() = default;
+  ~BooleanType() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 32-bit signed integer.
+class ICEBERG_EXPORT Int32Type : public PrimitiveType {
+ public:
+  Int32Type() = default;
+  ~Int32Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 64-bit signed integer.
+class ICEBERG_EXPORT Int64Type : public PrimitiveType {
+ public:
+  Int64Type() = default;
+  ~Int64Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 32-bit (single precision) float.
+class ICEBERG_EXPORT Float32Type : public PrimitiveType {
+ public:
+  Float32Type() = default;
+  ~Float32Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 64-bit (double precision) float.
+class ICEBERG_EXPORT Float64Type : public PrimitiveType {
+ public:
+  Float64Type() = default;
+  ~Float64Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a fixed-precision decimal.
+class ICEBERG_EXPORT DecimalType : public PrimitiveType {
+ public:
+  constexpr static const int32_t kMaxPrecision = 38;
+
+  /// \brief Construct a decimal type with the given precision and scale.
+  DecimalType(int32_t precision, int32_t scale);
+  ~DecimalType() = default;
+
+  /// \brief Get the precision (the number of decimal digits).
+  [[nodiscard]] int32_t precision() const;
+  /// \brief Get the scale (essentially, the number of decimal digits after
+  ///   the decimal point; precisely, the value is scaled by $$10^{-s}$$.).
+  [[nodiscard]] int32_t scale() const;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+
+ private:
+  int32_t precision_;
+  int32_t scale_;
+};
+
+/// \brief A data type representing a calendar date without reference to a
+///   timezone or time.
+class ICEBERG_EXPORT DateType : public PrimitiveType {
+ public:
+  DateType() = default;
+  ~DateType() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a wall clock time in microseconds without
+///   reference to a timezone or date.
+class ICEBERG_EXPORT TimeType : public PrimitiveType {
+ public:
+  TimeType() = default;
+  ~TimeType() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A base class for any timestamp time (irrespective of unit or
+///   timezone).
+class ICEBERG_EXPORT TimestampBase : public PrimitiveType {
+ public:
+  /// \brief Is this type zoned or naive?
+  [[nodiscard]] virtual bool is_zoned() const = 0;
+  /// \brief The time resolution.
+  [[nodiscard]] virtual TimeUnit time_unit() const = 0;
+};
+
+/// \brief A data type representing a timestamp in microseconds without
+///   reference to a timezone.
+class ICEBERG_EXPORT TimestampType : public TimestampBase {
+ public:
+  TimestampType() = default;
+  ~TimestampType() = default;
+
+  bool is_zoned() const override;
+  TimeUnit time_unit() const override;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a timestamp as microseconds since the
+///   epoch in UTC.
+class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
+ public:
+  TimestampTzType() = default;
+  ~TimestampTzType() = default;
+
+  bool is_zoned() const override;
+  TimeUnit time_unit() const override;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a bytestring.
+class ICEBERG_EXPORT BinaryType : public PrimitiveType {
+ public:
+  BinaryType() = default;
+  ~BinaryType() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a string.
+class ICEBERG_EXPORT StringType : public PrimitiveType {
+ public:
+  StringType() = default;
+  ~StringType() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a fixed-length bytestring.
+class ICEBERG_EXPORT FixedType : public PrimitiveType {
+ public:
+  /// \brief Construct a fixed type with the given length.
+  FixedType(int32_t length);
+  ~FixedType() = default;
+
+  /// \brief The length (the number of bytes to store).
+  [[nodiscard]] int32_t length() const;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+
+ private:
+  int32_t length_;

Review Comment:
   iceberg-rust has the length of `u64`[1], `int32_t` can represent about 2 
gigabytes, maybe we should change to `int64_t` to be compatible with other 
language implementation?
   
   [1] 
https://github.com/apache/iceberg-rust/blob/main/crates/iceberg/src/spec/datatypes.rs#L241



##########
src/iceberg/type.h:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type.h
+/// Data types for Iceberg.  This header defines the data types, but see
+/// iceberg/type_fwd.h for the enum defining the list of types.
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief Interface for a data type for a field.
+class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
+ public:
+  virtual ~Type() = default;
+
+  /// \brief Get the type ID.
+  [[nodiscard]] virtual TypeId type_id() const = 0;
+
+  /// \brief Is this a primitive type (may not have child fields)?
+  [[nodiscard]] virtual bool is_primitive() const = 0;
+
+  /// \brief Is this a nested type (may have child fields)?
+  [[nodiscard]] virtual bool is_nested() const = 0;
+
+  /// \brief Compare two types for equality.
+  friend bool operator==(const Type& lhs, const Type& rhs) { return 
lhs.Equals(rhs); }
+
+  /// \brief Compare two types for inequality.
+  friend bool operator!=(const Type& lhs, const Type& rhs) { return !(lhs == 
rhs); }
+
+ protected:
+  /// \brief Compare two types for equality.
+  [[nodiscard]] virtual bool Equals(const Type& other) const = 0;
+};
+
+/// \brief A data type that may not have child fields.
+class ICEBERG_EXPORT PrimitiveType : public Type {
+ public:
+  bool is_primitive() const override { return true; }
+  bool is_nested() const override { return false; }
+};
+
+/// \brief A data type that may have child fields.
+class ICEBERG_EXPORT NestedType : public Type {
+ public:
+  bool is_primitive() const override { return false; }
+  bool is_nested() const override { return true; }
+
+  /// \brief Get a view of the child fields.
+  [[nodiscard]] virtual std::span<const SchemaField> fields() const = 0;
+  /// \brief Get a field by field ID.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldById(int32_t field_id) const = 0;
+  /// \brief Get a field by index.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldByIndex(int32_t index) const = 0;
+  /// \brief Get a field by name.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldByName(std::string_view name) const = 0;
+};
+
+/// \defgroup type-primitive Primitive Types
+/// Primitive types do not have nested fields.
+/// @{
+
+/// \brief A data type representing a boolean.
+class ICEBERG_EXPORT BooleanType : public PrimitiveType {
+ public:
+  BooleanType() = default;
+  ~BooleanType() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 32-bit signed integer.
+class ICEBERG_EXPORT Int32Type : public PrimitiveType {
+ public:
+  Int32Type() = default;
+  ~Int32Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 64-bit signed integer.
+class ICEBERG_EXPORT Int64Type : public PrimitiveType {
+ public:
+  Int64Type() = default;
+  ~Int64Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 32-bit (single precision) float.

Review Comment:
   32-bit IEEE 754 floating point



##########
src/iceberg/type_fwd.h:
##########
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type_fwd.h
+/// Forward declarations and enum definitions.  When writing your own headers,
+/// you can include this instead of the "full" headers to help reduce compile
+/// times.
+
+namespace iceberg {
+
+/// \brief A data type.
+///
+/// This is not a complete data type by itself because some types are nested
+/// and/or parameterized.
+///
+/// Iceberg V3 types are not currently supported.
+enum class TypeId {
+  kBoolean,
+  kInt32,
+  kInt64,
+  kFloat32,
+  kFloat64,
+  kDecimal,
+  kDate,
+  kTime,
+  kTimestamp,
+  kTimestampTz,
+  kBinary,

Review Comment:
   nit: list these the same order as spec.



##########
src/iceberg/schema.h:
##########
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/schema.h
+/// Schemas for Iceberg tables.  This header contains the definition of Schema
+/// and any utility functions.  See iceberg/type.h and iceberg/field.h as well.
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/type.h"
+
+namespace iceberg {
+
+/// \brief A schema for a Table.
+///
+/// A schema is a list of typed columns, along with a unique integer ID.  A
+/// Table may have different schemas over its lifetime due to schema
+/// evolution.
+class ICEBERG_EXPORT Schema : public StructType {
+ public:
+  Schema(int32_t schema_id, std::vector<SchemaField> fields);
+
+  /// \brief Get the schema ID.
+  ///
+  /// Schemas are identified by a unique ID for the purposes of schema

Review Comment:
   nit: should this be `A Schema is identified by a unique ID` or `Schemas are 
identified by unique IDs`?



##########
src/iceberg/type.cc:
##########
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/type.h"
+
+#include <format>
+#include <iterator>
+#include <stdexcept>
+
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+TypeId BooleanType::type_id() const { return TypeId::kBoolean; }
+std::string BooleanType::ToString() const { return "boolean"; }
+bool BooleanType::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kBoolean;
+}
+
+TypeId Int32Type::type_id() const { return TypeId::kInt32; }

Review Comment:
   Should we use Int/Long/Float/Double instead? I guess this arrow style, but I 
think we should be consistent with spec? I checked iceberg and pyiceberg, they 
do the spec way.



##########
src/iceberg/type.cc:
##########
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/type.h"
+
+#include <format>
+#include <iterator>
+#include <stdexcept>
+
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+TypeId BooleanType::type_id() const { return TypeId::kBoolean; }
+std::string BooleanType::ToString() const { return "boolean"; }
+bool BooleanType::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kBoolean;
+}
+
+TypeId Int32Type::type_id() const { return TypeId::kInt32; }
+std::string Int32Type::ToString() const { return "int32"; }
+bool Int32Type::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kInt32;
+}
+
+TypeId Int64Type::type_id() const { return TypeId::kInt64; }
+std::string Int64Type::ToString() const { return "int64"; }
+bool Int64Type::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kInt64;
+}
+
+TypeId Float32Type::type_id() const { return TypeId::kFloat32; }
+std::string Float32Type::ToString() const { return "float32"; }
+bool Float32Type::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kFloat32;
+}
+
+TypeId Float64Type::type_id() const { return TypeId::kFloat64; }
+std::string Float64Type::ToString() const { return "float64"; }
+bool Float64Type::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kFloat64;
+}
+
+DecimalType::DecimalType(int32_t precision, int32_t scale)
+    : precision_(precision), scale_(scale) {
+  if (precision < 0 || precision > kMaxPrecision) {
+    throw std::runtime_error(
+        std::format("DecimalType: precision must be in [0, 38], was {}", 
precision));
+  }
+}
+
+int32_t DecimalType::precision() const { return precision_; }
+int32_t DecimalType::scale() const { return scale_; }
+TypeId DecimalType::type_id() const { return TypeId::kDecimal; }
+std::string DecimalType::ToString() const {
+  return std::format("decimal({}, {})", precision_, scale_);
+}
+bool DecimalType::Equals(const Type& other) const {
+  if (other.type_id() != TypeId::kDecimal) {
+    return false;
+  }
+  const auto& decimal = static_cast<const DecimalType&>(other);
+  return precision_ == decimal.precision_ && scale_ == decimal.scale_;
+}
+
+TypeId TimeType::type_id() const { return TypeId::kTime; }

Review Comment:
   nit: place TimeType after DateType



##########
src/iceberg/type.h:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type.h
+/// Data types for Iceberg.  This header defines the data types, but see
+/// iceberg/type_fwd.h for the enum defining the list of types.
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief Interface for a data type for a field.
+class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
+ public:
+  virtual ~Type() = default;
+
+  /// \brief Get the type ID.
+  [[nodiscard]] virtual TypeId type_id() const = 0;
+
+  /// \brief Is this a primitive type (may not have child fields)?
+  [[nodiscard]] virtual bool is_primitive() const = 0;
+
+  /// \brief Is this a nested type (may have child fields)?
+  [[nodiscard]] virtual bool is_nested() const = 0;

Review Comment:
   Isn't this redundant with is_primitive()?



##########
src/iceberg/type.cc:
##########
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/type.h"
+
+#include <format>
+#include <iterator>
+#include <stdexcept>
+
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+TypeId BooleanType::type_id() const { return TypeId::kBoolean; }
+std::string BooleanType::ToString() const { return "boolean"; }
+bool BooleanType::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kBoolean;
+}
+
+TypeId Int32Type::type_id() const { return TypeId::kInt32; }
+std::string Int32Type::ToString() const { return "int32"; }
+bool Int32Type::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kInt32;
+}
+
+TypeId Int64Type::type_id() const { return TypeId::kInt64; }
+std::string Int64Type::ToString() const { return "int64"; }
+bool Int64Type::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kInt64;
+}
+
+TypeId Float32Type::type_id() const { return TypeId::kFloat32; }
+std::string Float32Type::ToString() const { return "float32"; }
+bool Float32Type::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kFloat32;
+}
+
+TypeId Float64Type::type_id() const { return TypeId::kFloat64; }
+std::string Float64Type::ToString() const { return "float64"; }
+bool Float64Type::Equals(const Type& other) const {
+  return other.type_id() == TypeId::kFloat64;
+}
+
+DecimalType::DecimalType(int32_t precision, int32_t scale)
+    : precision_(precision), scale_(scale) {
+  if (precision < 0 || precision > kMaxPrecision) {
+    throw std::runtime_error(
+        std::format("DecimalType: precision must be in [0, 38], was {}", 
precision));
+  }

Review Comment:
   `scale` should be greater or equal than 0 and less or equal than 
`precision`, we can be more strict if we do the right thing. Apache arrow-rs 
has this this restriction[0], but I don't see this in Apache arrow cpp.
   
   [0] 
https://github.com/apache/arrow-rs/blob/main/parquet/src/schema/types.rs#L505-L538



##########
src/iceberg/type.h:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type.h
+/// Data types for Iceberg.  This header defines the data types, but see
+/// iceberg/type_fwd.h for the enum defining the list of types.
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief Interface for a data type for a field.
+class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
+ public:
+  virtual ~Type() = default;
+
+  /// \brief Get the type ID.
+  [[nodiscard]] virtual TypeId type_id() const = 0;
+
+  /// \brief Is this a primitive type (may not have child fields)?
+  [[nodiscard]] virtual bool is_primitive() const = 0;
+
+  /// \brief Is this a nested type (may have child fields)?
+  [[nodiscard]] virtual bool is_nested() const = 0;
+
+  /// \brief Compare two types for equality.
+  friend bool operator==(const Type& lhs, const Type& rhs) { return 
lhs.Equals(rhs); }
+
+  /// \brief Compare two types for inequality.
+  friend bool operator!=(const Type& lhs, const Type& rhs) { return !(lhs == 
rhs); }
+
+ protected:
+  /// \brief Compare two types for equality.
+  [[nodiscard]] virtual bool Equals(const Type& other) const = 0;
+};
+
+/// \brief A data type that may not have child fields.

Review Comment:
   how about: A data type that does not have child fields.
   
   I think the word `may` is not precise, the same applied to some other places.



##########
src/iceberg/type.h:
##########
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type.h
+/// Data types for Iceberg.  This header defines the data types, but see
+/// iceberg/type_fwd.h for the enum defining the list of types.
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief Interface for a data type for a field.
+class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
+ public:
+  virtual ~Type() = default;
+
+  /// \brief Get the type ID.
+  [[nodiscard]] virtual TypeId type_id() const = 0;
+
+  /// \brief Is this a primitive type (may not have child fields)?
+  [[nodiscard]] virtual bool is_primitive() const = 0;
+
+  /// \brief Is this a nested type (may have child fields)?
+  [[nodiscard]] virtual bool is_nested() const = 0;
+
+  /// \brief Compare two types for equality.
+  friend bool operator==(const Type& lhs, const Type& rhs) { return 
lhs.Equals(rhs); }
+
+  /// \brief Compare two types for inequality.
+  friend bool operator!=(const Type& lhs, const Type& rhs) { return !(lhs == 
rhs); }
+
+ protected:
+  /// \brief Compare two types for equality.
+  [[nodiscard]] virtual bool Equals(const Type& other) const = 0;
+};
+
+/// \brief A data type that may not have child fields.
+class ICEBERG_EXPORT PrimitiveType : public Type {
+ public:
+  bool is_primitive() const override { return true; }
+  bool is_nested() const override { return false; }
+};
+
+/// \brief A data type that may have child fields.
+class ICEBERG_EXPORT NestedType : public Type {
+ public:
+  bool is_primitive() const override { return false; }
+  bool is_nested() const override { return true; }
+
+  /// \brief Get a view of the child fields.
+  [[nodiscard]] virtual std::span<const SchemaField> fields() const = 0;
+  /// \brief Get a field by field ID.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldById(int32_t field_id) const = 0;
+  /// \brief Get a field by index.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldByIndex(int32_t index) const = 0;
+  /// \brief Get a field by name.
+  [[nodiscard]] virtual std::optional<std::reference_wrapper<const 
SchemaField>>
+  GetFieldByName(std::string_view name) const = 0;
+};
+
+/// \defgroup type-primitive Primitive Types
+/// Primitive types do not have nested fields.
+/// @{
+
+/// \brief A data type representing a boolean.
+class ICEBERG_EXPORT BooleanType : public PrimitiveType {
+ public:
+  BooleanType() = default;
+  ~BooleanType() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 32-bit signed integer.
+class ICEBERG_EXPORT Int32Type : public PrimitiveType {
+ public:
+  Int32Type() = default;
+  ~Int32Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 64-bit signed integer.
+class ICEBERG_EXPORT Int64Type : public PrimitiveType {
+ public:
+  Int64Type() = default;
+  ~Int64Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 32-bit (single precision) float.
+class ICEBERG_EXPORT Float32Type : public PrimitiveType {
+ public:
+  Float32Type() = default;
+  ~Float32Type() = default;
+
+  TypeId type_id() const override;
+  std::string ToString() const override;
+
+ protected:
+  bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 64-bit (double precision) float.

Review Comment:
   64-bit IEEE 754 floating point
   It will great we just copy past the type description from the spec. The same 
applied to other types.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org


Reply via email to