Copilot commented on code in PR #60996:
URL: https://github.com/apache/doris/pull/60996#discussion_r2877915252
##########
be/src/vec/functions/function_hll.cpp:
##########
@@ -259,6 +259,47 @@ struct HLLHash {
}
};
+struct ToHll {
+ static constexpr auto name = "to_hll";
+
+ using ReturnType = DataTypeHLL;
+ template <typename ColumnType>
+ static Status vector(const ColumnType* col, MutableColumnPtr& col_res) {
+ return execute<ColumnType, false>(col, nullptr, col_res);
+ }
+ template <typename ColumnType>
+ static Status vector_nullable(const ColumnType* col, const NullMap&
nullmap,
+ MutableColumnPtr& col_res) {
+ return execute<ColumnType, true>(col, &nullmap, col_res);
+ }
+ template <typename ColumnType, bool arg_is_nullable>
+ static Status execute(const ColumnType* col, const NullMap* nullmap,
+ MutableColumnPtr& col_res) {
+ if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
+ auto* res_column = reinterpret_cast<ColumnHLL*>(col_res.get());
+ auto& res_data = res_column->get_data();
+ size_t size = col->size();
+ const auto& data = col->get_data();
+
+ for (size_t i = 0; i < size; ++i) {
+ if (arg_is_nullable && (*nullmap)[i]) {
+ continue;
+ } else {
+ int64_t value = data[i];
+ if (value < 0) {
+ return Status::InvalidArgument(
+ "to_hll does not support negative bigint
value: {}", value);
+ }
+ res_data[i].update(static_cast<uint64_t>(value));
+ }
Review Comment:
In the non-nullable instantiation, `nullmap` is passed as `nullptr`, but the
loop still references `(*nullmap)[i]` inside a normal `if` condition. While
short-circuiting avoids evaluation when `arg_is_nullable` is `false`, rewriting
this as an `if constexpr (arg_is_nullable)` block (or splitting
nullable/non-nullable loops) avoids relying on that subtlety and keeps static
analyzers/sanitizers from flagging a potential null dereference.
```suggestion
if constexpr (arg_is_nullable) {
if ((*nullmap)[i]) {
continue;
}
}
int64_t value = data[i];
if (value < 0) {
return Status::InvalidArgument(
"to_hll does not support negative bigint value:
{}", value);
}
res_data[i].update(static_cast<uint64_t>(value));
```
##########
be/test/vec/function/function_hll_test.cpp:
##########
@@ -133,4 +133,25 @@ TEST(function_hll_test, function_hll_from_base64_test) {
static_cast<void>(check_function<DataTypeHLL, true>(func_name,
input_types, data_set));
}
+
+TEST(function_hll_test, function_to_hll_test) {
+ std::string func_name = "to_hll";
+ InputTypeSet input_types = {PrimitiveType::TYPE_BIGINT};
+
+ HyperLogLog hll1;
+ hll1.update(1);
+ HyperLogLog hll2;
+ hll2.update(2);
+ HyperLogLog hll3;
+ hll3.update(100);
+ HyperLogLog hll4;
+ hll4.update(0);
+
+ DataSet data_set = {{{(int64_t)1}, &hll1},
+ {{(int64_t)2}, &hll2},
+ {{(int64_t)100}, &hll3},
+ {{(int64_t)0}, &hll4}};
+
+ static_cast<void>(check_function<DataTypeHLL>(func_name, input_types,
data_set));
+}
Review Comment:
The new `to_hll` behavior has an explicit error path for negative BIGINT
values (returns `Status::InvalidArgument`) and a special null-handling path
(skips update and returns a default/empty HLL). The unit test currently only
covers a few positive values; please add coverage for at least one negative
input (expect execute failure) and a NULL input row (verify it returns an empty
HLL rather than NULL).
```suggestion
HyperLogLog empty_hll;
DataSet data_set = {{{(int64_t)1}, &hll1},
{{(int64_t)2}, &hll2},
{{(int64_t)100}, &hll3},
{{(int64_t)0}, &hll4},
{{Null()}, &empty_hll}};
static_cast<void>(check_function<DataTypeHLL, true>(func_name,
input_types, data_set));
}
TEST(function_hll_test, function_to_hll_negative_input_test) {
std::string func_name = "to_hll";
InputTypeSet input_types = {PrimitiveType::TYPE_BIGINT};
HyperLogLog dummy_hll;
// Negative BIGINT input should cause function execution to fail.
DataSet data_set = {{{(int64_t)-1}, &dummy_hll}};
EXPECT_FALSE(check_function<DataTypeHLL>(func_name, input_types,
data_set));
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]