wgtmac commented on code in PR #54: URL: https://github.com/apache/iceberg-cpp/pull/54#discussion_r2020448679
########## src/iceberg/transform.h: ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/transform.h + +#include <cstdint> +#include <memory> + +#include "iceberg/arrow_c_data_internal.h" +#include "iceberg/error.h" +#include "iceberg/expected.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief Transform types used for partitioning +enum class TransformType : uint8_t { + /// Equal to source value, unmodified + kIdentity, + /// Hash of value, mod `N` + kBucket, + /// Value truncated to width `W` + kTruncate, + /// Extract a date or timestamp year, as years from 1970 + kYear, + /// Extract a date or timestamp month, as months from 1970-01-01 + kMonth, + /// Extract a date or timestamp day, as days from 1970-01-01 + kDay, + /// Extract a timestamp hour, as hours from 1970-01-01 00:00:00 + kHour, + /// Always produces `null` + kVoid, + /// Used to represent some customized transform that can't be recognized or supported now. + kUnknown, +}; + +/// \brief Get the relative transform name +constexpr std::string_view ToString(TransformType type) { + /// Transform names used for formatting + constexpr std::string_view TransformTypeNames[] = { + "Identity", "Bucket", "Truncate", "Year", "Month", "Day", "Hour", "Void", "Unknown" + }; + return TransformTypeNames[static_cast<size_t>(type)]; Review Comment: Is it undefined behavior to do this? ########## src/iceberg/transform.h: ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/transform.h + +#include <cstdint> +#include <memory> + +#include "iceberg/arrow_c_data_internal.h" +#include "iceberg/error.h" +#include "iceberg/expected.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief Transform types used for partitioning +enum class TransformType : uint8_t { + /// Equal to source value, unmodified + kIdentity, + /// Hash of value, mod `N` + kBucket, + /// Value truncated to width `W` + kTruncate, + /// Extract a date or timestamp year, as years from 1970 + kYear, + /// Extract a date or timestamp month, as months from 1970-01-01 + kMonth, + /// Extract a date or timestamp day, as days from 1970-01-01 + kDay, + /// Extract a timestamp hour, as hours from 1970-01-01 00:00:00 + kHour, + /// Always produces `null` + kVoid, + /// Used to represent some customized transform that can't be recognized or supported now. + kUnknown, +}; + +/// \brief Get the relative transform name +constexpr std::string_view ToString(TransformType type) { + /// Transform names used for formatting + constexpr std::string_view TransformTypeNames[] = { + "Identity", "Bucket", "Truncate", "Year", "Month", "Day", "Hour", "Void", "Unknown" + }; + return TransformTypeNames[static_cast<size_t>(type)]; +} + +/// \brief A transform function used for partitioning. +class ICEBERG_EXPORT TransformFunction : public util::Formattable { + public: + explicit TransformFunction(TransformType type); + /// \brief Transform an input array to a new array + [[nodiscard]] virtual expected<std::unique_ptr<ArrowArray>, Error> Transform(const ArrowArray& inputArrowArray) = 0; Review Comment: ```suggestion virtual expected<ArrowArray, Error> Transform(const ArrowArray& data) = 0; ``` `expected` already has `nodiscard` attribute in its definition, we don't need it here. For `ArrowArray`, usually we don't use `std::unique_ptr` since we have to explicitly call its release function. ########## src/iceberg/transform.h: ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/transform.h + +#include <cstdint> +#include <memory> + +#include "iceberg/arrow_c_data_internal.h" Review Comment: Do not include any internal header in the public header file. To use Arrow C data, we can include `arrow_c_data.h`. ########## src/iceberg/transform.h: ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/transform.h + +#include <cstdint> +#include <memory> + +#include "iceberg/arrow_c_data_internal.h" +#include "iceberg/error.h" +#include "iceberg/expected.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief Transform types used for partitioning +enum class TransformType : uint8_t { + /// Equal to source value, unmodified + kIdentity, + /// Hash of value, mod `N` + kBucket, + /// Value truncated to width `W` + kTruncate, + /// Extract a date or timestamp year, as years from 1970 + kYear, + /// Extract a date or timestamp month, as months from 1970-01-01 + kMonth, + /// Extract a date or timestamp day, as days from 1970-01-01 + kDay, + /// Extract a timestamp hour, as hours from 1970-01-01 00:00:00 + kHour, + /// Always produces `null` + kVoid, + /// Used to represent some customized transform that can't be recognized or supported now. + kUnknown, Review Comment: What about putting `kUnknown` to the beginning? ########## test/transform_test.cc: ########## @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/transform.h" + +#include <format> +#include <memory> + +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "iceberg/util/formatter.h" + +TEST(TransformTest, TransformType) { + { + EXPECT_EQ("Identity", iceberg::ToString(iceberg::TransformType::kIdentity)); + EXPECT_EQ("Bucket", iceberg::ToString(iceberg::TransformType::kBucket)); + EXPECT_EQ("Truncate", iceberg::ToString(iceberg::TransformType::kTruncate)); + EXPECT_EQ("Year", iceberg::ToString(iceberg::TransformType::kYear)); + EXPECT_EQ("Month", iceberg::ToString(iceberg::TransformType::kMonth)); + EXPECT_EQ("Day", iceberg::ToString(iceberg::TransformType::kDay)); + EXPECT_EQ("Hour", iceberg::ToString(iceberg::TransformType::kHour)); + EXPECT_EQ("Void", iceberg::ToString(iceberg::TransformType::kVoid)); + EXPECT_EQ("Unknown", iceberg::ToString(iceberg::TransformType::kUnknown)); + } +} + +TEST(TransformTest, TransformFunction) { + class TestTransformFunction : public iceberg::TransformFunction { + public: + TestTransformFunction() : TransformFunction(iceberg::TransformType::kUnknown) {} + iceberg::expected<std::unique_ptr<ArrowArray>, iceberg::Error> Transform( + const ArrowArray& input) override { + return iceberg::unexpected(iceberg::Error{.kind = iceberg::ErrorKind::kNotSupported, + .message = "test transform function"}); + } + }; + + TestTransformFunction transform; + EXPECT_EQ(iceberg::TransformType::kUnknown, transform.transform_type()); + EXPECT_EQ("Unknown", transform.ToString()); + EXPECT_EQ("Unknown", std::format("{}", transform)); + + auto [_, arrow_array] = + iceberg::internal::CreateExampleArrowSchemaAndArrayByNanoarrow(); Review Comment: I will remove CreateExampleArrowSchemaAndArrayByNanoarrow some day. Perhaps add a TODO here? ########## test/transform_test.cc: ########## @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/transform.h" + +#include <format> +#include <memory> + +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "iceberg/util/formatter.h" + +TEST(TransformTest, TransformType) { + { + EXPECT_EQ("Identity", iceberg::ToString(iceberg::TransformType::kIdentity)); + EXPECT_EQ("Bucket", iceberg::ToString(iceberg::TransformType::kBucket)); + EXPECT_EQ("Truncate", iceberg::ToString(iceberg::TransformType::kTruncate)); + EXPECT_EQ("Year", iceberg::ToString(iceberg::TransformType::kYear)); + EXPECT_EQ("Month", iceberg::ToString(iceberg::TransformType::kMonth)); + EXPECT_EQ("Day", iceberg::ToString(iceberg::TransformType::kDay)); + EXPECT_EQ("Hour", iceberg::ToString(iceberg::TransformType::kHour)); + EXPECT_EQ("Void", iceberg::ToString(iceberg::TransformType::kVoid)); + EXPECT_EQ("Unknown", iceberg::ToString(iceberg::TransformType::kUnknown)); + } Review Comment: Add the `iceberg` namespace so we don't have to add `iceberg::` everywhere. ########## src/iceberg/exception.h: ########## @@ -38,4 +38,11 @@ class ICEBERG_EXPORT IcebergError : public std::runtime_error { explicit IcebergError(const std::string& what) : std::runtime_error(what) {} }; +#define ICEBERG_CHECK_FMT(condition, ...) \ Review Comment: ```suggestion #define ICEBERG_CHECK(condition, ...) \ ``` ########## src/iceberg/partition_field.h: ########## @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/partition_field.h +/// A partition field in a partition spec + +#include <cstdint> +#include <memory> +#include <string> +#include <string_view> + +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief a field with its transform. +class ICEBERG_EXPORT PartitionField : public util::Formattable { + public: + /// \brief Construct a field. + /// \param[in] source_id The source field ID. + /// \param[in] field_id The partition field ID. + /// \param[in] name The partition field name. + /// \param[in] transformType The transform type. + PartitionField(int32_t source_id, int32_t field_id, std::string name, TransformType transformType); + + /// \brief Get the source field ID. + [[nodiscard]] int32_t source_field_id() const; + + /// \brief Get the partition field ID. + [[nodiscard]] int32_t field_id() const; + + /// \brief Get the partition field name. + [[nodiscard]] std::string_view name() const; + + /// \brief Get the transform type. + [[nodiscard]] TransformType transform_type() const; Review Comment: I think so. Some transform types required additional parameters like bucket number or truncate length. ########## src/iceberg/partition_spec.h: ########## @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/partition_spec.h +/// Partition specs for Iceberg tables. + +#include <cstdint> +#include <string> +#include <vector> +#include <span> + +#include "iceberg/iceberg_export.h" +#include "iceberg/partition_field.h" + +namespace iceberg { + +/// \brief A partition spec for a Table. +/// +/// A partition spec is a list of partition fields, along with a unique integer ID. A +/// Table may have different partition specs over its lifetime due to partition spec +/// evolution. +class ICEBERG_EXPORT PartitionSpec { + public: + virtual ~PartitionSpec() = default; + PartitionSpec(int32_t spec_id, std::vector<PartitionField> fields); Review Comment: IMO, we need an `ArrowSchema` in the constructor to denote to the partition schema. ########## src/iceberg/partition_spec.h: ########## @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/partition_spec.h +/// Partition specs for Iceberg tables. + +#include <cstdint> +#include <string> +#include <vector> +#include <span> + +#include "iceberg/iceberg_export.h" +#include "iceberg/partition_field.h" + +namespace iceberg { + +/// \brief A partition spec for a Table. +/// +/// A partition spec is a list of partition fields, along with a unique integer ID. A +/// Table may have different partition specs over its lifetime due to partition spec +/// evolution. +class ICEBERG_EXPORT PartitionSpec { + public: + virtual ~PartitionSpec() = default; + PartitionSpec(int32_t spec_id, std::vector<PartitionField> fields); + /// \brief Get the spec ID. + [[nodiscard]] int32_t spec_id() const; + /// \brief Get a view of the partition fields. + [[nodiscard]] virtual std::span<const PartitionField> fields() const; + /// \brief Get a field by field ID. Review Comment: Perhaps we don't need these `GetFieldByXXX` at this moment? ########## test/transform_test.cc: ########## @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/transform.h" + +#include <format> +#include <memory> + +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "iceberg/util/formatter.h" + +TEST(TransformTest, TransformType) { + { + EXPECT_EQ("Identity", iceberg::ToString(iceberg::TransformType::kIdentity)); + EXPECT_EQ("Bucket", iceberg::ToString(iceberg::TransformType::kBucket)); + EXPECT_EQ("Truncate", iceberg::ToString(iceberg::TransformType::kTruncate)); + EXPECT_EQ("Year", iceberg::ToString(iceberg::TransformType::kYear)); + EXPECT_EQ("Month", iceberg::ToString(iceberg::TransformType::kMonth)); + EXPECT_EQ("Day", iceberg::ToString(iceberg::TransformType::kDay)); + EXPECT_EQ("Hour", iceberg::ToString(iceberg::TransformType::kHour)); + EXPECT_EQ("Void", iceberg::ToString(iceberg::TransformType::kVoid)); + EXPECT_EQ("Unknown", iceberg::ToString(iceberg::TransformType::kUnknown)); + } Review Comment: ```suggestion EXPECT_EQ("Identity", iceberg::ToString(iceberg::TransformType::kIdentity)); EXPECT_EQ("Bucket", iceberg::ToString(iceberg::TransformType::kBucket)); EXPECT_EQ("Truncate", iceberg::ToString(iceberg::TransformType::kTruncate)); EXPECT_EQ("Year", iceberg::ToString(iceberg::TransformType::kYear)); EXPECT_EQ("Month", iceberg::ToString(iceberg::TransformType::kMonth)); EXPECT_EQ("Day", iceberg::ToString(iceberg::TransformType::kDay)); EXPECT_EQ("Hour", iceberg::ToString(iceberg::TransformType::kHour)); EXPECT_EQ("Void", iceberg::ToString(iceberg::TransformType::kVoid)); EXPECT_EQ("Unknown", iceberg::ToString(iceberg::TransformType::kUnknown)); ``` ########## src/iceberg/partition_field.h: ########## @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/partition_field.h +/// A partition field in a partition spec + +#include <cstdint> +#include <memory> +#include <string> +#include <string_view> + +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief a field with its transform. +class ICEBERG_EXPORT PartitionField : public util::Formattable { + public: + /// \brief Construct a field. + /// \param[in] source_id The source field ID. + /// \param[in] field_id The partition field ID. + /// \param[in] name The partition field name. + /// \param[in] transformType The transform type. + PartitionField(int32_t source_id, int32_t field_id, std::string name, TransformType transformType); + + /// \brief Get the source field ID. + [[nodiscard]] int32_t source_field_id() const; + + /// \brief Get the partition field ID. + [[nodiscard]] int32_t field_id() const; + + /// \brief Get the partition field name. + [[nodiscard]] std::string_view name() const; + + /// \brief Get the transform type. + [[nodiscard]] TransformType transform_type() const; + + [[nodiscard]] std::string ToString() const override; + + friend bool operator==(const PartitionField& lhs, const PartitionField& rhs) { + return lhs.Equals(rhs); + } + + friend bool operator!=(const PartitionField& lhs, const PartitionField& rhs) { + return !(lhs == rhs); + } + + private: + /// \brief Compare two fields for equality. + [[nodiscard]] bool Equals(const PartitionField& other) const; + + int32_t source_field_id_; Review Comment: Should we consider multi-argument transform here? In that case, a partition field might have more than one source field ids. See https://docs.google.com/document/d/1aDoZqRgvDOOUVAGhvKZbp5vFstjsAMY4EFCyjlxpaaw ########## src/iceberg/transform.h: ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/transform.h + +#include <cstdint> +#include <memory> + +#include "iceberg/arrow_c_data_internal.h" +#include "iceberg/error.h" +#include "iceberg/expected.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief Transform types used for partitioning +enum class TransformType : uint8_t { + /// Equal to source value, unmodified + kIdentity, + /// Hash of value, mod `N` + kBucket, + /// Value truncated to width `W` + kTruncate, + /// Extract a date or timestamp year, as years from 1970 + kYear, + /// Extract a date or timestamp month, as months from 1970-01-01 + kMonth, + /// Extract a date or timestamp day, as days from 1970-01-01 + kDay, + /// Extract a timestamp hour, as hours from 1970-01-01 00:00:00 + kHour, + /// Always produces `null` + kVoid, + /// Used to represent some customized transform that can't be recognized or supported now. + kUnknown, +}; + +/// \brief Get the relative transform name +constexpr std::string_view ToString(TransformType type) { + /// Transform names used for formatting + constexpr std::string_view TransformTypeNames[] = { + "Identity", "Bucket", "Truncate", "Year", "Month", "Day", "Hour", "Void", "Unknown" + }; + return TransformTypeNames[static_cast<size_t>(type)]; +} + +/// \brief A transform function used for partitioning. +class ICEBERG_EXPORT TransformFunction : public util::Formattable { + public: + explicit TransformFunction(TransformType type); + /// \brief Transform an input array to a new array + [[nodiscard]] virtual expected<std::unique_ptr<ArrowArray>, Error> Transform(const ArrowArray& inputArrowArray) = 0; + /// \brief Get the transform type + [[nodiscard]] virtual TransformType transform_type() const; + [[nodiscard]] std::string ToString() const override; Review Comment: ```suggestion virtual TransformType transform_type() const; std::string ToString() const override; ``` I don't think we need to add nodiscard to these functions. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org