wgtmac commented on code in PR #238:
URL: https://github.com/apache/iceberg-cpp/pull/238#discussion_r2438008542


##########
src/iceberg/expression/literal.cc:
##########
@@ -331,12 +339,50 @@ std::strong_ordering CompareFloat(T lhs, T rhs) {
   return lhs_is_negative <=> rhs_is_negative;
 }
 
+std::partial_ordering CompareDecimal(Literal const& lhs, Literal const& rhs) {
+  ICEBERG_DCHECK(std::holds_alternative<Decimal>(lhs.value()),
+                 "LHS of decimal comparison must hold Decimal");
+  ICEBERG_DCHECK(std::holds_alternative<Decimal>(rhs.value()),
+                 "RHS of decimal comparison must hold decimal");

Review Comment:
   ```suggestion
     ICEBERG_DCHECK(std::holds_alternative<Decimal>(lhs.value()) && 
std::holds_alternative<Decimal>(rhs.value()),
                    "Cannot compare non-decimal literals!");
   ```



##########
src/iceberg/expression/literal.cc:
##########
@@ -440,6 +490,13 @@ std::string Literal::ToString() const {
     case TypeId::kDouble: {
       return std::to_string(std::get<double>(value_));
     }
+    case TypeId::kDecimal: {
+      auto decimal_type = internal::checked_pointer_cast<DecimalType>(type_);

Review Comment:
   Same thing here. Let's avoid shared_pointer operation as much as possible.



##########
src/iceberg/expression/literal.h:
##########
@@ -56,10 +58,10 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
                              int64_t,         // for long, timestamp, 
timestamp_tz, time
                              float,           // for float
                              double,          // for double
-                             std::string,     // for string
-                             Uuid,            // for uuid
-                             std::vector<uint8_t>,     // for binary, fixed
-                             std::array<uint8_t, 16>,  // for decimal
+                             ::iceberg::Decimal,    // for decimal

Review Comment:
   What about putting specialized types like Decimal and Uuid to the end before 
BelowXXX?



##########
src/iceberg/expression/literal.cc:
##########
@@ -24,9 +24,10 @@
 #include <cstdint>
 #include <string>
 
-#include "iceberg/type_fwd.h"
+#include "iceberg/exception.h"

Review Comment:
   ```suggestion
   ```
   
   Let's fix decimal ToString not to throw and remove this.



##########
src/iceberg/result.h:
##########
@@ -47,6 +47,7 @@ enum class ErrorKind {
   kNotFound,
   kNotImplemented,
   kNotSupported,
+  kRescaleDataLoss,

Review Comment:
   See my other comment to remove this.



##########
src/iceberg/util/decimal.cc:
##########
@@ -522,8 +552,8 @@ Result<Decimal> Decimal::Rescale(int32_t orig_scale, 
int32_t new_scale) const {
       RescaleWouldCauseDataLoss(*this, delta_scale, multiplier, &out);
 
   if (rescale_would_cause_data_loss) {
-    return Invalid("Rescale {} from {} to {} would cause data loss", 
ToIntegerString(),
-                   orig_scale, new_scale);
+    return RescaleDataLoss("Rescale {} from {} to {} would cause data loss",

Review Comment:
   Perhaps we need still use Invalid.



##########
src/iceberg/expression/literal.cc:
##########
@@ -193,12 +205,44 @@ std::strong_ordering CompareFloat(T lhs, T rhs) {
   return lhs_is_negative <=> rhs_is_negative;
 }
 
+std::strong_ordering CompareDecimal(Literal const& lhs, Literal const& rhs) {
+  ICEBERG_DCHECK(std::holds_alternative<Decimal>(lhs.value()),
+                 "LHS of decimal comparison must hold Decimal");
+  ICEBERG_DCHECK(std::holds_alternative<Decimal>(rhs.value()),
+                 "RHS of decimal comparison must hold decimal");
+  const auto& lhs_type = std::dynamic_pointer_cast<DecimalType>(lhs.type());
+  const auto& rhs_type = std::dynamic_pointer_cast<DecimalType>(rhs.type());
+  ICEBERG_DCHECK(lhs_type != nullptr, "LHS type must be DecimalType");
+  ICEBERG_DCHECK(rhs_type != nullptr, "RHS type must be DecimalType");
+  auto lhs_decimal = std::get<Decimal>(lhs.value());
+  auto rhs_decimal = std::get<Decimal>(rhs.value());
+  if (lhs_type->scale() == rhs_type->scale()) {
+    return lhs_decimal <=> rhs_decimal;
+  } else if (lhs_type->scale() > rhs_type->scale()) {
+    // Rescale to larger scale
+    auto rhs_res = rhs_decimal.Rescale(rhs_type->scale(), lhs_type->scale());
+    if (!rhs_res) {
+      // Rescale would cause data loss, so lhs is definitely less than rhs
+      return std::strong_ordering::less;
+    }
+    return lhs_decimal <=> rhs_res.value();
+  } else {
+    // Rescale to larger scale
+    auto lhs_res = lhs_decimal.Rescale(lhs_type->scale(), rhs_type->scale());
+    if (!lhs_res) {
+      // Rescale would cause data loss, so lhs is definitely greater than rhs
+      return std::strong_ordering::greater;
+    }
+    return lhs_res.value() <=> rhs_decimal;
+  }
+}
+
 bool Literal::operator==(const Literal& other) const { return (*this <=> 
other) == 0; }
 
 // Three-way comparison operator
 std::partial_ordering Literal::operator<=>(const Literal& other) const {
   // If types are different, comparison is unordered
-  if (*type_ != *other.type_) {
+  if (type_->type_id() != other.type_->type_id()) {

Review Comment:
   Then perhaps revert this line so all types still require strong type 
equality?



##########
src/iceberg/expression/literal.cc:
##########
@@ -331,12 +339,50 @@ std::strong_ordering CompareFloat(T lhs, T rhs) {
   return lhs_is_negative <=> rhs_is_negative;
 }
 
+std::partial_ordering CompareDecimal(Literal const& lhs, Literal const& rhs) {
+  ICEBERG_DCHECK(std::holds_alternative<Decimal>(lhs.value()),
+                 "LHS of decimal comparison must hold Decimal");
+  ICEBERG_DCHECK(std::holds_alternative<Decimal>(rhs.value()),
+                 "RHS of decimal comparison must hold decimal");
+  auto lhs_type = internal::checked_pointer_cast<DecimalType>(lhs.type());
+  auto rhs_type = internal::checked_pointer_cast<DecimalType>(rhs.type());

Review Comment:
   ```suggestion
     const auto& lhs_type = internal::checked_cast<DecimalType>(*lhs.type());
     const auto& rhs_type = internal::checked_cast<DecimalType>(*rhs.type());
   ```
   
   Can we avoid unnecessary shared_ptr operation?



##########
src/iceberg/expression/literal.cc:
##########
@@ -331,12 +339,50 @@ std::strong_ordering CompareFloat(T lhs, T rhs) {
   return lhs_is_negative <=> rhs_is_negative;
 }
 
+std::partial_ordering CompareDecimal(Literal const& lhs, Literal const& rhs) {
+  ICEBERG_DCHECK(std::holds_alternative<Decimal>(lhs.value()),
+                 "LHS of decimal comparison must hold Decimal");
+  ICEBERG_DCHECK(std::holds_alternative<Decimal>(rhs.value()),
+                 "RHS of decimal comparison must hold decimal");
+  auto lhs_type = internal::checked_pointer_cast<DecimalType>(lhs.type());
+  auto rhs_type = internal::checked_pointer_cast<DecimalType>(rhs.type());
+  auto lhs_decimal = std::get<Decimal>(lhs.value());
+  auto rhs_decimal = std::get<Decimal>(rhs.value());
+  if (lhs_type->scale() == rhs_type->scale()) {
+    return lhs_decimal <=> rhs_decimal;
+  } else if (lhs_type->scale() > rhs_type->scale()) {
+    // Rescale to larger scale
+    auto rhs_res = rhs_decimal.Rescale(rhs_type->scale(), lhs_type->scale());
+    if (!rhs_res.has_value()) {
+      if (rhs_res.error().kind == ErrorKind::kRescaleDataLoss) {

Review Comment:
   IMHO, I don't think `ErrorKind::kRescaleDataLoss` is general enough to be 
added as a new error kind. Perhaps we can add a `static std::partial_ordering 
Decimal::Compare(...)` for this?



##########
src/iceberg/type_fwd.h:
##########
@@ -115,6 +115,7 @@ class NameMapping;
 enum class SnapshotRefType;
 enum class TransformType;
 
+class Decimal;

Review Comment:
   What aboud `Uuid`?



##########
src/iceberg/test/bucket_util_test.cc:
##########
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/bucket_util.h"
+
+#include <chrono>
+
+#include <gtest/gtest.h>
+
+namespace iceberg {
+
+// The following tests are from
+// https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements
+TEST(BucketUtilsTest, HashHelper) {
+  // int and long
+  EXPECT_EQ(BucketUtils::HashInt(34), 2017239379);
+  EXPECT_EQ(BucketUtils::HashLong(34L), 2017239379);
+
+  // decimal hash
+  auto decimal = Decimal::FromString("14.20");
+  ASSERT_TRUE(decimal.has_value());
+  EXPECT_EQ(BucketUtils::HashBytes(Decimal::ToBigEndian(decimal->value())), 
-500754589);
+
+  // date hash
+  // 2017-11-16
+  std::chrono::sys_days sd = std::chrono::year{2017} / 11 / 16;
+  std::chrono::sys_days epoch{std::chrono::year{1970} / 1 / 1};
+  int32_t days = (sd - epoch).count();
+  std::cout << "days: " << days << std::endl;
+  EXPECT_EQ(BucketUtils::HashInt(days), -653330422);
+
+  // time
+  // 22:31:08 in microseconds
+  int64_t time_micros = (22 * 3600 + 31 * 60 + 8) * 1000000LL;
+  std::cout << "time micros: " << time_micros << std::endl;
+  EXPECT_EQ(BucketUtils::HashLong(time_micros), -662762989);
+
+  // timestamp
+  // 2017-11-16T22:31:08 in microseconds
+  std::chrono::system_clock::time_point tp =

Review Comment:
   No problem!



##########
src/iceberg/expression/literal.cc:
##########
@@ -440,6 +490,13 @@ std::string Literal::ToString() const {
     case TypeId::kDouble: {
       return std::to_string(std::get<double>(value_));
     }
+    case TypeId::kDecimal: {
+      auto decimal_type = internal::checked_pointer_cast<DecimalType>(type_);
+      auto decimal = std::get<::iceberg::Decimal>(value_);
+      auto result = decimal.ToString(decimal_type->scale());
+      ICEBERG_CHECK(result, "Decimal ToString failed");
+      return *result;

Review Comment:
   ```suggestion
         return decimal.ToString(decimal_type->scale()).value_or("invalid 
literal of type decimal");
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to