wgtmac commented on code in PR #61: URL: https://github.com/apache/iceberg-cpp/pull/61#discussion_r2039748800
########## src/iceberg/transform.cc: ########## @@ -21,65 +21,113 @@ #include <format> +#include "iceberg/transform_function.h" +#include "iceberg/type.h" + namespace iceberg { -namespace { -/// \brief Get the relative transform name -constexpr std::string_view ToString(TransformType type) { - switch (type) { - case TransformType::kUnknown: - return "unknown"; +std::shared_ptr<Transform> Transform::Identity() { + static auto instance = std::make_shared<Transform>(TransformType::kIdentity); + return instance; +} + +Transform::Transform(TransformType transform_type) : transform_type_(transform_type) {} Review Comment: Throw for parameterized transform_type? ########## src/iceberg/transform.h: ########## @@ -56,16 +57,98 @@ enum class TransformType { kVoid, }; +/// \brief Get the relative transform name +constexpr std::string_view TransformTypeToString(TransformType type) { + switch (type) { + case TransformType::kUnknown: + return "unknown"; + case TransformType::kIdentity: + return "identity"; + case TransformType::kBucket: + return "bucket"; + case TransformType::kTruncate: + return "truncate"; + case TransformType::kYear: + return "year"; + case TransformType::kMonth: + return "month"; + case TransformType::kDay: + return "day"; + case TransformType::kHour: + return "hour"; + case TransformType::kVoid: + return "void"; + } +} + +/// \brief Represents a transform used in partitioning or sorting in Iceberg. +/// +/// This class supports binding to a source type and instantiating the corresponding +/// TransformFunction, as well as serialization-friendly introspection. +class ICEBERG_EXPORT Transform : public util::Formattable { + public: + /// \brief Returns a shared singleton instance of the Identity transform. + /// + /// This transform leaves values unchanged and is commonly used for direct partitioning. + /// \return A shared pointer to the Identity transform. + static std::shared_ptr<Transform> Identity(); + + /// \brief Constructs a Transform of the specified type (for non-parametric types). + /// \param transform_type The transform type (e.g., identity, year, day). + explicit Transform(TransformType transform_type); + + /// \brief Constructs a parameterized Transform (e.g., bucket(16), truncate(4)). + /// \param transform_type The transform type. + /// \param param The integer parameter associated with the transform. + Transform(TransformType transform_type, int32_t param); + + /// \brief Returns the transform type. + TransformType transform_type() const; + + /// \brief Binds this transform to a source type, returning a typed TransformFunction. + /// + /// This creates a concrete transform implementation based on the transform type and + /// parameter. + /// \param source_type The source column type to bind to. + /// \return A TransformFunction instance wrapped in `expected`, or an error on failure. + expected<std::unique_ptr<TransformFunction>, Error> Bind( Review Comment: Can we use forward declaration here and define TransformFunction in the `transform_function.h`? ########## src/iceberg/transform.cc: ########## @@ -21,65 +21,113 @@ #include <format> +#include "iceberg/transform_function.h" +#include "iceberg/type.h" + namespace iceberg { -namespace { -/// \brief Get the relative transform name -constexpr std::string_view ToString(TransformType type) { - switch (type) { - case TransformType::kUnknown: - return "unknown"; +std::shared_ptr<Transform> Transform::Identity() { + static auto instance = std::make_shared<Transform>(TransformType::kIdentity); + return instance; +} + +Transform::Transform(TransformType transform_type) : transform_type_(transform_type) {} + +Transform::Transform(TransformType transform_type, int32_t param) Review Comment: Throw for non-parameterized transform_type? ########## src/iceberg/transform.cc: ########## @@ -21,65 +21,113 @@ #include <format> +#include "iceberg/transform_function.h" +#include "iceberg/type.h" + namespace iceberg { -namespace { -/// \brief Get the relative transform name -constexpr std::string_view ToString(TransformType type) { - switch (type) { - case TransformType::kUnknown: - return "unknown"; +std::shared_ptr<Transform> Transform::Identity() { + static auto instance = std::make_shared<Transform>(TransformType::kIdentity); + return instance; +} + +Transform::Transform(TransformType transform_type) : transform_type_(transform_type) {} + +Transform::Transform(TransformType transform_type, int32_t param) + : transform_type_(transform_type), param_(param) {} + +TransformType Transform::transform_type() const { return transform_type_; } + +expected<std::unique_ptr<TransformFunction>, Error> Transform::Bind( Review Comment: It seems that we should merge `transform_function.h/cc` to `transform.h/cc`. They have strong connections. ########## src/iceberg/transform.cc: ########## @@ -21,65 +21,113 @@ #include <format> +#include "iceberg/transform_function.h" +#include "iceberg/type.h" + namespace iceberg { -namespace { -/// \brief Get the relative transform name -constexpr std::string_view ToString(TransformType type) { - switch (type) { - case TransformType::kUnknown: - return "unknown"; +std::shared_ptr<Transform> Transform::Identity() { + static auto instance = std::make_shared<Transform>(TransformType::kIdentity); + return instance; +} + +Transform::Transform(TransformType transform_type) : transform_type_(transform_type) {} + +Transform::Transform(TransformType transform_type, int32_t param) Review Comment: If we want to avoid invalid inputs. Perhaps we should define a separate static functions to create each transform type? ########## src/iceberg/transform.h: ########## @@ -56,16 +57,98 @@ enum class TransformType { kVoid, }; +/// \brief Get the relative transform name +constexpr std::string_view TransformTypeToString(TransformType type) { + switch (type) { + case TransformType::kUnknown: + return "unknown"; + case TransformType::kIdentity: + return "identity"; + case TransformType::kBucket: + return "bucket"; + case TransformType::kTruncate: + return "truncate"; + case TransformType::kYear: + return "year"; + case TransformType::kMonth: + return "month"; + case TransformType::kDay: + return "day"; + case TransformType::kHour: + return "hour"; + case TransformType::kVoid: + return "void"; + } +} + +/// \brief Represents a transform used in partitioning or sorting in Iceberg. +/// +/// This class supports binding to a source type and instantiating the corresponding +/// TransformFunction, as well as serialization-friendly introspection. +class ICEBERG_EXPORT Transform : public util::Formattable { + public: + /// \brief Returns a shared singleton instance of the Identity transform. + /// + /// This transform leaves values unchanged and is commonly used for direct partitioning. + /// \return A shared pointer to the Identity transform. + static std::shared_ptr<Transform> Identity(); + + /// \brief Constructs a Transform of the specified type (for non-parametric types). + /// \param transform_type The transform type (e.g., identity, year, day). + explicit Transform(TransformType transform_type); + + /// \brief Constructs a parameterized Transform (e.g., bucket(16), truncate(4)). + /// \param transform_type The transform type. + /// \param param The integer parameter associated with the transform. + Transform(TransformType transform_type, int32_t param); + + /// \brief Returns the transform type. + TransformType transform_type() const; + + /// \brief Binds this transform to a source type, returning a typed TransformFunction. + /// + /// This creates a concrete transform implementation based on the transform type and + /// parameter. + /// \param source_type The source column type to bind to. + /// \return A TransformFunction instance wrapped in `expected`, or an error on failure. + expected<std::unique_ptr<TransformFunction>, Error> Bind( + const std::shared_ptr<Type>& source_type) const; + + /// \brief Returns a string representation of this transform (e.g., "bucket[16]"). + std::string ToString() const override; + + /// \brief Equality comparison. + friend bool operator==(const Transform& lhs, const Transform& rhs) { + return lhs.Equals(rhs); + } + + /// \brief Inequality comparison. + friend bool operator!=(const Transform& lhs, const Transform& rhs) { + return !(lhs == rhs); + } + + private: + /// \brief Checks equality with another Transform instance. + [[nodiscard]] virtual bool Equals(const Transform& other) const; + + TransformType transform_type_; + ///< Optional parameter (e.g., num_buckets, width) + std::variant<std::monostate, int32_t> param_; +}; + /// \brief A transform function used for partitioning. -class ICEBERG_EXPORT TransformFunction : public util::Formattable { +class ICEBERG_EXPORT TransformFunction { public: - explicit TransformFunction(TransformType type); + virtual ~TransformFunction() = default; + TransformFunction(TransformType transform_type, std::shared_ptr<Type> source_type); /// \brief Transform an input array to a new array virtual expected<ArrowArray, Error> Transform(const ArrowArray& data) = 0; /// \brief Get the transform type - virtual TransformType transform_type() const; - - std::string ToString() const override; + TransformType transform_type() const; + /// \brief Get the source type of transform function + const std::shared_ptr<Type>& source_type() const; + /// \brief Get the result type of transform function + virtual expected<std::shared_ptr<Type>, Error> ResultType() const = 0; Review Comment: Use `Result`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org