This is an automated email from the ASF dual-hosted git repository.
zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5c849ed5351 [Fix](function) fix potential buffer write overflow in
function date_format (#55389)
5c849ed5351 is described below
commit 5c849ed53510e9adad0111222829ac85cfd0c638
Author: zclllyybb <[email protected]>
AuthorDate: Fri Aug 29 16:00:47 2025 +0800
[Fix](function) fix potential buffer write overflow in function date_format
(#55389)
Related PR: https://github.com/apache/doris/issues/53742
Problem Summary:
### Release note
for some special format like `%Y-%m-%d`, we wrongly think its length
would be less than the format's length. sometimes it will lead to buffer
overflow. but because of our PODArray's memory alignment during
reserving, only an extremely small number of extreme cases can reproduce
this problem.
---
be/src/util/asan_util.h | 10 +++++-----
.../vec/functions/cast/cast_to_date_or_datetime_impl.hpp | 4 ++--
be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp | 4 ++--
be/src/vec/functions/cast/cast_to_datev2_impl.hpp | 4 ++--
be/src/vec/functions/date_format_type.h | 12 +++++++++++-
be/src/vec/functions/function_datetime_string_to_string.h | 14 +++++++++-----
.../datetime_functions/test_date_function.groovy | 10 ++++++++++
7 files changed, 41 insertions(+), 17 deletions(-)
diff --git a/be/src/util/asan_util.h b/be/src/util/asan_util.h
index 4a2789553e7..d9aec88c0b9 100644
--- a/be/src/util/asan_util.h
+++ b/be/src/util/asan_util.h
@@ -19,16 +19,16 @@
#include <sanitizer/asan_interface.h>
-class AsanPoisonDefer {
+class AsanPoisonGuard {
#ifdef ADDRESS_SANITIZER
public:
// Poison the memory region to prevent accidental access
// during the lifetime of this object.
- AsanPoisonDefer(const void* start, size_t len) : start(start), len(len) {
+ AsanPoisonGuard(const void* start, size_t len) : start(start), len(len) {
ASAN_POISON_MEMORY_REGION(start, len);
}
// Unpoison the memory region when this object goes out of scope.
- ~AsanPoisonDefer() { ASAN_UNPOISON_MEMORY_REGION(start, len); }
+ ~AsanPoisonGuard() { ASAN_UNPOISON_MEMORY_REGION(start, len); }
private:
const void* start;
@@ -36,7 +36,7 @@ private:
#else
public:
// No-op for platforms without ASAN_DEFINE_REGION_MACROS
- AsanPoisonDefer(const void*, size_t) {}
- ~AsanPoisonDefer() = default;
+ AsanPoisonGuard(const void*, size_t) {}
+ ~AsanPoisonGuard() = default;
#endif
};
diff --git a/be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp
b/be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp
index 34ba87cdf1f..b03140dfdad 100644
--- a/be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp
+++ b/be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp
@@ -333,7 +333,7 @@ inline bool
CastToDateOrDatetime::from_string_strict_mode(const StringRef& str,
CastParameters&
params) {
const char* ptr = str.data;
const char* end = ptr + str.size;
- AsanPoisonDefer defer(end, 1);
+ AsanPoisonGuard defer(end, 1);
uint32_t part[4];
bool has_second = false;
@@ -699,7 +699,7 @@ inline bool
CastToDateOrDatetime::from_string_non_strict_mode_impl(
constexpr bool IsStrict = false;
const char* ptr = str.data;
const char* end = ptr + str.size;
- AsanPoisonDefer defer(end, 1);
+ AsanPoisonGuard defer(end, 1);
// skip leading whitespace
static_cast<void>(skip_any_whitespace(ptr, end));
diff --git a/be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp
b/be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp
index e85700c6572..b21c6defbf0 100644
--- a/be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp
+++ b/be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp
@@ -324,7 +324,7 @@ inline bool CastToDatetimeV2::from_string_strict_mode(const
StringRef& str,
uint32_t to_scale,
CastParameters& params) {
const char* ptr = str.data;
const char* end = ptr + str.size;
- AsanPoisonDefer defer(end, 1);
+ AsanPoisonGuard defer(end, 1);
uint32_t part[4];
bool has_second = false;
@@ -695,7 +695,7 @@ inline bool
CastToDatetimeV2::from_string_non_strict_mode_impl(
constexpr bool IsStrict = false;
const char* ptr = str.data;
const char* end = ptr + str.size;
- AsanPoisonDefer defer(end, 1);
+ AsanPoisonGuard defer(end, 1);
// skip leading whitespace
static_cast<void>(skip_any_whitespace(ptr, end));
diff --git a/be/src/vec/functions/cast/cast_to_datev2_impl.hpp
b/be/src/vec/functions/cast/cast_to_datev2_impl.hpp
index 68ad64ee3aa..3ebe4fc2b75 100644
--- a/be/src/vec/functions/cast/cast_to_datev2_impl.hpp
+++ b/be/src/vec/functions/cast/cast_to_datev2_impl.hpp
@@ -264,7 +264,7 @@ inline bool CastToDateV2::from_string_strict_mode(const
StringRef& str,
CastParameters& params) {
const char* ptr = str.data;
const char* end = ptr + str.size;
- AsanPoisonDefer defer(end, 1);
+ AsanPoisonGuard defer(end, 1);
uint32_t part[4];
bool has_second = false;
@@ -580,7 +580,7 @@ inline bool
CastToDateV2::from_string_non_strict_mode_impl(const StringRef& str,
constexpr bool IsStrict = false;
const char* ptr = str.data;
const char* end = ptr + str.size;
- AsanPoisonDefer defer(end, 1);
+ AsanPoisonGuard defer(end, 1);
// skip leading whitespace
static_cast<void>(skip_any_whitespace(ptr, end));
diff --git a/be/src/vec/functions/date_format_type.h
b/be/src/vec/functions/date_format_type.h
index db75478704e..9b66d5d8fb5 100644
--- a/be/src/vec/functions/date_format_type.h
+++ b/be/src/vec/functions/date_format_type.h
@@ -60,9 +60,13 @@ void put_two_digits(T m, char* buf, int& i) {
}
// UserDefinedImpl indicates that no specific optimization has been applied,
and the general logic is used for processing.
-struct UserDefinedImpl {};
+struct UserDefinedImpl {
+ // Pre-allocated size for performance.
+ constexpr static size_t row_size = 26;
+};
struct yyyyMMddImpl {
+ constexpr static size_t row_size = 8;
template <typename DateType>
size_t static date_to_str(const DateType& date_value, char* buf) {
int i = 0;
@@ -74,6 +78,7 @@ struct yyyyMMddImpl {
};
struct yyyy_MM_ddImpl {
+ constexpr static size_t row_size = 10;
template <typename DateType>
size_t static date_to_str(const DateType& date_value, char* buf) {
int i = 0;
@@ -87,6 +92,7 @@ struct yyyy_MM_ddImpl {
};
struct yyyy_MM_dd_HH_mm_ssImpl {
+ constexpr static size_t row_size = 19;
template <typename DateType>
size_t static date_to_str(const DateType& date_value, char* buf) {
int i = 0;
@@ -106,6 +112,7 @@ struct yyyy_MM_dd_HH_mm_ssImpl {
};
struct yyyy_MM_dd_HH_mm_ss_SSSSSSImpl {
+ constexpr static size_t row_size = 26;
size_t static date_to_str(const DateV2Value<DateTimeV2ValueType>&
date_value, char* buf) {
int i = 0;
put_year(date_value.year(), buf, i);
@@ -128,6 +135,7 @@ struct yyyy_MM_dd_HH_mm_ss_SSSSSSImpl {
};
struct yyyy_MMImpl {
+ constexpr static size_t row_size = 7;
template <typename DateType>
size_t static date_to_str(const DateType& date_value, char* buf) {
int i = 0;
@@ -138,6 +146,7 @@ struct yyyy_MMImpl {
}
};
struct yyyyMMImpl {
+ constexpr static size_t row_size = 6;
template <typename DateType>
size_t static date_to_str(const DateType& date_value, char* buf) {
int i = 0;
@@ -148,6 +157,7 @@ struct yyyyMMImpl {
};
struct yyyyImpl {
+ constexpr static size_t row_size = 4;
template <typename DateType>
size_t static date_to_str(const DateType& date_value, char* buf) {
int i = 0;
diff --git a/be/src/vec/functions/function_datetime_string_to_string.h
b/be/src/vec/functions/function_datetime_string_to_string.h
index 00f8993c217..4f426a2bf4a 100644
--- a/be/src/vec/functions/function_datetime_string_to_string.h
+++ b/be/src/vec/functions/function_datetime_string_to_string.h
@@ -28,6 +28,7 @@
#include "common/cast_set.h"
#include "common/status.h"
#include "runtime/runtime_state.h"
+#include "util/asan_util.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/columns/column_nullable.h"
@@ -116,10 +117,11 @@ public:
}
string_vale = string_vale.trim();
- auto format_str =
- time_format_type::rewrite_specific_format(string_vale.data,
string_vale.size);
+ // no need to rewrite, we choose implement by format_type.
format_type's check has compatible logic about
+ // special format string.
+ auto format_str = StringRef(string_vale.data, string_vale.size);
if (format_str.size > 128) {
- // exceeds the length limit.
+ // exceeds the length limit.
state->is_valid = false;
return IFunction::open(context, scope);
}
@@ -181,6 +183,8 @@ public:
}
StringRef format(format_state->format_str);
+ auto result_row_length = std::visit([](auto type) { return
decltype(type)::row_size; },
+ format_state->format_type);
const auto& pod_array = col->get_data();
const auto len = pod_array.size();
@@ -191,11 +195,11 @@ public:
return Status::OK();
}
res_offsets.resize(len);
- res_data.reserve(len * format.size + len);
+ res_data.reserve(len * result_row_length);
null_map.resize_fill(len, false);
if constexpr (IsDecimal) {
- // FromUnixTimeDecimalImpl
+ // FromUnixTimeDecimalImpl. may use UserDefinedImpl or
yyyy_MM_dd_HH_mm_ss_SSSSSSImpl.
size_t offset = 0;
if (format_state->format_str ==
time_format_type::DEFAULT_FORMAT_DECIMAL) {
for (int i = 0; i < len; ++i) {
diff --git
a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy
b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy
index 385ed1e11d7..7382007beb9 100644
---
a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy
+++
b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy
@@ -577,6 +577,16 @@ suite("test_date_function") {
qt_sql """ select date_format('2022-08-04', '%X %V %w'); """
qt_sql_date_format_long """ select date_format(cast('2011-06-24' as
DATETIMEV2(0)), '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V
%f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b') """
qt_sql_date_format_long """ select date_format(null, '%f %V %f %l %V %I %S
%p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j
%a %h %s %m %a %v %u %b') """
+ sql " drop table if exists dtfmt "
+ sql """
+ create table dtfmt(
+ k0 datetime(3) null
+ )
+ DISTRIBUTED BY HASH(`k0`) BUCKETS auto
+ properties("replication_num" = "1");
+ """
+ sql """insert into dtfmt select "2024-06-10 12:34:56.789" from
numbers("number"="5000");"""
+ sql "select date_format(k0, '%Y-%m-%d') from dtfmt;"
qt_sql """ select STR_TO_DATE('Tue Jul 12 20:00:45 CST 2022', '%a %b %e
%H:%i:%s %Y'); """
qt_sql """ select STR_TO_DATE('Tue Jul 12 20:00:45 CST 2022', '%a %b %e %T
CST %Y'); """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]