This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e21ffac419 [Improvement](dateformat) Improve efficiency for function `date_format` (#12811) e21ffac419 is described below commit e21ffac4196c6a575bfa8b86d5e1c2558021accb Author: Gabriel <gabrielleeb...@gmail.com> AuthorDate: Wed Sep 21 22:38:16 2022 +0800 [Improvement](dateformat) Improve efficiency for function `date_format` (#12811) --- be/src/vec/functions/date_time_transforms.h | 1 + be/src/vec/runtime/vdatetime_value.cpp | 341 ++++++++++++++++------------ 2 files changed, 201 insertions(+), 141 deletions(-) diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 6f733cc218..8255713221 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -338,6 +338,7 @@ struct TransformerToStringTwoArgument { PaddedPODArray<UInt8>& null_map) { auto len = ts.size(); res_offsets.resize(len); + res_data.reserve(len * format.size() + len); null_map.resize_fill(len, false); size_t offset = 0; diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 5d5c71ac32..1696e57d89 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -587,8 +587,35 @@ int VecDateTimeValue::compute_format_len(const char* format, int len) { return size; } +static const char digits100[201] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + +char* write_two_digits_to_string(int number, char* dst) { + memcpy(dst, &digits100[number * 2], 2); + return dst + 2; +} + +char* write_four_digits_to_string(int number, char* dst) { + memcpy(dst, &digits100[(number / 100) * 2], 2); + memcpy(dst + 2, &digits100[(number % 100) * 2], 2); + return dst + 4; +} + bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) const { + if (check_range(_year, _month, _day, _hour, _minute, _second, _type)) { + return false; + } char buf[64]; + char* cursor = buf; char* pos = NULL; const char* ptr = format; const char* end = format + len; @@ -602,6 +629,54 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c // Skip '%' ptr++; switch (ch = *ptr++) { + case 'y': + // Year, numeric (two digits) + to = write_two_digits_to_string(_year % 100, to); + cursor += 2; + pos = cursor; + break; + case 'Y': + // Year, numeric, four digits + to = write_four_digits_to_string(_year, to); + cursor += 4; + pos = cursor; + break; + case 'd': + // Day of month (00...31) + to = write_two_digits_to_string(_day, to); + cursor += 2; + pos = cursor; + break; + case 'H': + to = write_two_digits_to_string(_hour, to); + cursor += 2; + pos = cursor; + break; + case 'i': + // Minutes, numeric (00..59) + to = write_two_digits_to_string(_minute, to); + cursor += 2; + pos = cursor; + break; + case 'm': + to = write_two_digits_to_string(_month, to); + cursor += 2; + pos = cursor; + break; + case 'h': + case 'I': + // Hour (01..12) + to = write_two_digits_to_string((_hour % 24 + 11) % 12 + 1, to); + cursor += 2; + pos = cursor; + break; + case 's': + case 'S': + // Seconds (00..59) + to = write_two_digits_to_string(_second, to); + cursor += 2; + pos = cursor; + break; case 'a': // Abbreviated weekday name if (_type == TIME_TIME || (_year == 0 && _month == 0)) { @@ -618,18 +693,13 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c break; case 'c': // Month, numeric (0...12) - pos = int_to_str(_month, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); - break; - case 'd': - // Day of month (00...31) - pos = int_to_str(_day, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str(_month, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'D': // Day of the month with English suffix (0th, 1st, ...) - pos = int_to_str(_day, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(_day, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); if (_day >= 10 && _day <= 19) { to = append_string("th", to); } else { @@ -651,49 +721,28 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c break; case 'e': // Day of the month, numeric (0..31) - pos = int_to_str(_day, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(_day, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'f': // Microseconds (000000..999999) - pos = int_to_str(0, buf); - to = append_with_prefix(buf, pos - buf, '0', 6, to); - break; - case 'h': - case 'I': - // Hour (01..12) - pos = int_to_str((_hour % 24 + 11) % 12 + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'H': - // Hour (00..23) - pos = int_to_str(_hour, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'i': - // Minutes, numeric (00..59) - pos = int_to_str(_minute, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str(0, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 6, to); break; case 'j': // Day of year (001..366) - pos = int_to_str(daynr() - doris::calc_daynr(_year, 1, 1) + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 3, to); + pos = int_to_str(daynr() - doris::calc_daynr(_year, 1, 1) + 1, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 3, to); break; case 'k': // Hour (0..23) - pos = int_to_str(_hour, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(_hour, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'l': // Hour (1..12) - pos = int_to_str((_hour % 24 + 11) % 12 + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); - break; - case 'm': - // Month, numeric (00..12) - pos = int_to_str(_month, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str((_hour % 24 + 11) % 12 + 1, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'M': // Month name (January..December) @@ -728,12 +777,6 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c to = append_string(" AM", to); } break; - case 's': - case 'S': - // Seconds (00..59) - pos = int_to_str(_second, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; case 'T': // Time, 24-hour (hh:mm:ss) *to++ = (char)('0' + ((_hour % 24) / 10)); @@ -753,8 +796,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c if (_type == TIME_TIME) { return false; } - pos = int_to_str(week(mysql_week_mode(1)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(1)), to); + cursor += 2; + pos = cursor; break; case 'U': // Week (00..53), where Sunday is the first day of the week; @@ -762,8 +806,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c if (_type == TIME_TIME) { return false; } - pos = int_to_str(week(mysql_week_mode(0)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(0)), to); + cursor += 2; + pos = cursor; break; case 'v': // Week (01..53), where Monday is the first day of the week; @@ -771,8 +816,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c if (_type == TIME_TIME) { return false; } - pos = int_to_str(week(mysql_week_mode(3)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(3)), to); + cursor += 2; + pos = cursor; break; case 'V': // Week (01..53), where Sunday is the first day of the week; @@ -780,16 +826,17 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c if (_type == TIME_TIME) { return false; } - pos = int_to_str(week(mysql_week_mode(2)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(2)), to); + cursor += 2; + pos = cursor; break; case 'w': // Day of the week (0=Sunday..6=Saturday) if (_type == TIME_TIME || (_month == 0 && _year == 0)) { return false; } - pos = int_to_str(doris::calc_weekday(daynr(), true), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(doris::calc_weekday(daynr(), true), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'W': // Weekday name (Sunday..Saturday) @@ -803,8 +850,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c } uint32_t year = 0; calc_week(*this, mysql_week_mode(3), &year, true); - pos = int_to_str(year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); + to = write_four_digits_to_string(year, to); + cursor += 4; + pos = cursor; break; } case 'X': { @@ -815,20 +863,11 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c } uint32_t year = 0; calc_week(*this, mysql_week_mode(2), &year); - pos = int_to_str(year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); + to = write_four_digits_to_string(year, to); + cursor += 4; + pos = cursor; break; } - case 'y': - // Year, numeric (two digits) - pos = int_to_str(_year % 100, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'Y': - // Year, numeric, four digits - pos = int_to_str(_year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); - break; default: *to++ = ch; break; @@ -2686,8 +2725,12 @@ void DateV2Value<T>::set_microsecond(uint32_t microsecond) { template <typename T> bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) const { + if (is_invalid(year(), month(), day(), hour(), minute(), second(), microsecond())) { + return false; + } char buf[64]; char* pos = nullptr; + char* cursor = buf; const char* ptr = format; const char* end = format + len; char ch = '\0'; @@ -2700,6 +2743,54 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con // Skip '%' ptr++; switch (ch = *ptr++) { + case 'y': + // Year, numeric (two digits) + to = write_two_digits_to_string(this->year() % 100, to); + cursor += 2; + pos = cursor; + break; + case 'Y': + // Year, numeric, four digits + to = write_four_digits_to_string(this->year(), to); + cursor += 4; + pos = cursor; + break; + case 'd': + // Day of month (00...31) + to = write_two_digits_to_string(this->day(), to); + cursor += 2; + pos = cursor; + break; + case 'H': + to = write_two_digits_to_string(this->hour(), to); + cursor += 2; + pos = cursor; + break; + case 'i': + // Minutes, numeric (00..59) + to = write_two_digits_to_string(this->minute(), to); + cursor += 2; + pos = cursor; + break; + case 'm': + to = write_two_digits_to_string(this->month(), to); + cursor += 2; + pos = cursor; + break; + case 'h': + case 'I': + // Hour (01..12) + to = write_two_digits_to_string((this->hour() % 24 + 11) % 12 + 1, to); + cursor += 2; + pos = cursor; + break; + case 's': + case 'S': + // Seconds (00..59) + to = write_two_digits_to_string(this->second(), to); + cursor += 2; + pos = cursor; + break; case 'a': // Abbreviated weekday name if (this->year() == 0 && this->month() == 0) { @@ -2716,18 +2807,13 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con break; case 'c': // Month, numeric (0...12) - pos = int_to_str(this->month(), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); - break; - case 'd': - // Day of month (00...31) - pos = int_to_str(this->day(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str(this->month(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'D': // Day of the month with English suffix (0th, 1st, ...) - pos = int_to_str(this->day(), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(this->day(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); if (this->day() >= 10 && this->day() <= 19) { to = append_string("th", to); } else { @@ -2749,49 +2835,28 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con break; case 'e': // Day of the month, numeric (0..31) - pos = int_to_str(this->day(), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(this->day(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'f': // Microseconds (000000..999999) - pos = int_to_str(this->microsecond(), buf); - to = append_with_prefix(buf, pos - buf, '0', 6, to); - break; - case 'h': - case 'I': - // Hour (01..12) - int_to_str((this->hour() % 24 + 11) % 12 + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'H': - // Hour (00..23) - pos = int_to_str(this->hour(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'i': - // Minutes, numeric (00..59) - pos = int_to_str(this->minute(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str(this->microsecond(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 6, to); break; case 'j': // Day of year (001..366) - pos = int_to_str(daynr() - doris::calc_daynr(this->year(), 1, 1) + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 3, to); + pos = int_to_str(daynr() - doris::calc_daynr(this->year(), 1, 1) + 1, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 3, to); break; case 'k': // Hour (0..23) - pos = int_to_str(this->hour(), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(this->hour(), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'l': // Hour (1..12) - pos = int_to_str((this->hour() % 12) + 1, buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); - break; - case 'm': - // Month, numeric (00..12) - pos = int_to_str(this->month(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + pos = int_to_str((this->hour() % 24 + 11) % 12 + 1, cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'M': // Month name (January..December) @@ -2802,7 +2867,11 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con break; case 'p': // AM or PM - to = append_string("AM", to); + if ((this->hour() % 24) >= 12) { + to = append_string("PM", to); + } else { + to = append_string("AM", to); + } break; case 'r': { // Time, 12-hour (hh:mm:ss followed by AM or PM) @@ -2823,12 +2892,6 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con } break; } - case 's': - case 'S': - // Seconds (00..59) - pos = int_to_str(this->second(), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; case 'T': { // Time, 24-hour (hh:mm:ss) *to++ = (char)('0' + ((this->hour() % 24) / 10)); @@ -2846,34 +2909,38 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con case 'u': // Week (00..53), where Monday is the first day of the week; // WEEK() mode 1 - pos = int_to_str(week(mysql_week_mode(1)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(1)), to); + cursor += 2; + pos = cursor; break; case 'U': // Week (00..53), where Sunday is the first day of the week; // WEEK() mode 0 - pos = int_to_str(week(mysql_week_mode(0)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(0)), to); + cursor += 2; + pos = cursor; break; case 'v': // Week (01..53), where Monday is the first day of the week; // WEEK() mode 3; used with %x - pos = int_to_str(week(mysql_week_mode(3)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(3)), to); + cursor += 2; + pos = cursor; break; case 'V': // Week (01..53), where Sunday is the first day of the week; // WEEK() mode 2; used with %X - pos = int_to_str(week(mysql_week_mode(2)), buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); + to = write_two_digits_to_string(week(mysql_week_mode(2)), to); + cursor += 2; + pos = cursor; break; case 'w': // Day of the week (0=Sunday..6=Saturday) if (this->month() == 0 && this->year() == 0) { return false; } - pos = int_to_str(doris::calc_weekday(daynr(), true), buf); - to = append_with_prefix(buf, pos - buf, '0', 1, to); + pos = int_to_str(doris::calc_weekday(daynr(), true), cursor); + to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'W': // Weekday name (Sunday..Saturday) @@ -2885,8 +2952,9 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con uint16_t year = 0; calc_week(this->daynr(), this->year(), this->month(), this->day(), mysql_week_mode(3), &year, true); - pos = int_to_str(year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); + to = write_four_digits_to_string(year, to); + cursor += 4; + pos = cursor; break; } case 'X': { @@ -2895,20 +2963,11 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con uint16_t year = 0; calc_week(this->daynr(), this->year(), this->month(), this->day(), mysql_week_mode(2), &year); - pos = int_to_str(year, buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); + to = write_four_digits_to_string(year, to); + cursor += 4; + pos = cursor; break; } - case 'y': - // Year, numeric (two digits) - pos = int_to_str(this->year() % 100, buf); - to = append_with_prefix(buf, pos - buf, '0', 2, to); - break; - case 'Y': - // Year, numeric, four digits - pos = int_to_str(this->year(), buf); - to = append_with_prefix(buf, pos - buf, '0', 4, to); - break; default: *to++ = ch; break; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org