Copilot commented on code in PR #60996:
URL: https://github.com/apache/doris/pull/60996#discussion_r2877915252


##########
be/src/vec/functions/function_hll.cpp:
##########
@@ -259,6 +259,47 @@ struct HLLHash {
     }
 };
 
+struct ToHll {
+    static constexpr auto name = "to_hll";
+
+    using ReturnType = DataTypeHLL;
+    template <typename ColumnType>
+    static Status vector(const ColumnType* col, MutableColumnPtr& col_res) {
+        return execute<ColumnType, false>(col, nullptr, col_res);
+    }
+    template <typename ColumnType>
+    static Status vector_nullable(const ColumnType* col, const NullMap& 
nullmap,
+                                  MutableColumnPtr& col_res) {
+        return execute<ColumnType, true>(col, &nullmap, col_res);
+    }
+    template <typename ColumnType, bool arg_is_nullable>
+    static Status execute(const ColumnType* col, const NullMap* nullmap,
+                          MutableColumnPtr& col_res) {
+        if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
+            auto* res_column = reinterpret_cast<ColumnHLL*>(col_res.get());
+            auto& res_data = res_column->get_data();
+            size_t size = col->size();
+            const auto& data = col->get_data();
+
+            for (size_t i = 0; i < size; ++i) {
+                if (arg_is_nullable && (*nullmap)[i]) {
+                    continue;
+                } else {
+                    int64_t value = data[i];
+                    if (value < 0) {
+                        return Status::InvalidArgument(
+                                "to_hll does not support negative bigint 
value: {}", value);
+                    }
+                    res_data[i].update(static_cast<uint64_t>(value));
+                }

Review Comment:
   In the non-nullable instantiation, `nullmap` is passed as `nullptr`, but the 
loop still references `(*nullmap)[i]` inside a normal `if` condition. While 
short-circuiting avoids evaluation when `arg_is_nullable` is `false`, rewriting 
this as an `if constexpr (arg_is_nullable)` block (or splitting 
nullable/non-nullable loops) avoids relying on that subtlety and keeps static 
analyzers/sanitizers from flagging a potential null dereference.
   ```suggestion
                   if constexpr (arg_is_nullable) {
                       if ((*nullmap)[i]) {
                           continue;
                       }
                   }
                   int64_t value = data[i];
                   if (value < 0) {
                       return Status::InvalidArgument(
                               "to_hll does not support negative bigint value: 
{}", value);
                   }
                   res_data[i].update(static_cast<uint64_t>(value));
   ```



##########
be/test/vec/function/function_hll_test.cpp:
##########
@@ -133,4 +133,25 @@ TEST(function_hll_test, function_hll_from_base64_test) {
 
     static_cast<void>(check_function<DataTypeHLL, true>(func_name, 
input_types, data_set));
 }
+
+TEST(function_hll_test, function_to_hll_test) {
+    std::string func_name = "to_hll";
+    InputTypeSet input_types = {PrimitiveType::TYPE_BIGINT};
+
+    HyperLogLog hll1;
+    hll1.update(1);
+    HyperLogLog hll2;
+    hll2.update(2);
+    HyperLogLog hll3;
+    hll3.update(100);
+    HyperLogLog hll4;
+    hll4.update(0);
+
+    DataSet data_set = {{{(int64_t)1}, &hll1},
+                        {{(int64_t)2}, &hll2},
+                        {{(int64_t)100}, &hll3},
+                        {{(int64_t)0}, &hll4}};
+
+    static_cast<void>(check_function<DataTypeHLL>(func_name, input_types, 
data_set));
+}

Review Comment:
   The new `to_hll` behavior has an explicit error path for negative BIGINT 
values (returns `Status::InvalidArgument`) and a special null-handling path 
(skips update and returns a default/empty HLL). The unit test currently only 
covers a few positive values; please add coverage for at least one negative 
input (expect execute failure) and a NULL input row (verify it returns an empty 
HLL rather than NULL).
   ```suggestion
       HyperLogLog empty_hll;
   
       DataSet data_set = {{{(int64_t)1}, &hll1},
                           {{(int64_t)2}, &hll2},
                           {{(int64_t)100}, &hll3},
                           {{(int64_t)0}, &hll4},
                           {{Null()}, &empty_hll}};
   
       static_cast<void>(check_function<DataTypeHLL, true>(func_name, 
input_types, data_set));
   }
   
   TEST(function_hll_test, function_to_hll_negative_input_test) {
       std::string func_name = "to_hll";
       InputTypeSet input_types = {PrimitiveType::TYPE_BIGINT};
   
       HyperLogLog dummy_hll;
   
       // Negative BIGINT input should cause function execution to fail.
       DataSet data_set = {{{(int64_t)-1}, &dummy_hll}};
   
       EXPECT_FALSE(check_function<DataTypeHLL>(func_name, input_types, 
data_set));
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to