github-actions[bot] commented on code in PR #15925: URL: https://github.com/apache/doris/pull/15925#discussion_r1070227998
########## be/src/vec/common/hash_table/hash.h: ########## @@ -100,7 +100,7 @@ }; template <> -struct DefaultHash<StringRef> : public StringRefHash {}; +struct DefaultHash<doris::StringRef> : public doris::StringRefHash {}; Review Comment: warning: unknown class name 'StringRefHash'; did you mean 'StringRef'? [clang-diagnostic-error] ```suggestion struct DefaultHash<doris::StringRef> : public doris::StringRef {}; ``` **be/src/udf/udf.h:34:** 'StringRef' declared here ```cpp struct StringRef; ^ ``` ########## be/src/olap/like_column_predicate.h: ########## @@ -48,7 +48,11 @@ bool* flags) const override; std::string get_search_str() const override { - return std::string(reinterpret_cast<char*>(pattern.ptr), pattern.len); + if constexpr (std::is_same_v<PatternType, StringRef>) + return std::string(reinterpret_cast<const char*>(pattern.data), pattern.size); + else if constexpr (std::is_same_v<PatternType, StringVal>) + return std::string(reinterpret_cast<const char*>(pattern.ptr), pattern.len); Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion else if constexpr (std::is_same_v<PatternType, StringVal>) { return std::string(reinterpret_cast<const char*>(pattern.ptr), pattern.len); } ``` ########## be/src/vec/common/string_ref.cpp: ########## @@ -0,0 +1,59 @@ +#include "string_ref.h" + +namespace doris { + +inline StringRef StringRef::trim() const { + // Remove leading and trailing spaces. + int32_t begin = 0; + + while (begin < size && data[begin] == ' ') { + ++begin; + } + + int32_t end = size - 1; + + while (end > begin && data[end] == ' ') { + --end; + } + + return StringRef(data + begin, end - begin + 1); +} + +// TODO: rewrite in AVX2 +inline size_t StringRef::find_first_of(char c) const { + const char* p = static_cast<const char*>(memchr(data, c, size)); + return p == nullptr ? -1 : p - data; +} + +StringRef StringRef::min_string_val() { + return StringRef((char*)(&StringRef::MIN_CHAR), 1); +} + +StringRef StringRef::max_string_val() { + return StringRef((char*)(&StringRef::MAX_CHAR), 1); +} + +bool StringRef::start_with(const StringRef& search_string) const { + DCHECK(size >= search_string.size); + if (search_string.size == 0) + return true; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (search_string.size == 0) { return true; } ``` ########## be/src/vec/functions/like.cpp: ########## @@ -124,7 +123,7 @@ Status FunctionLikeBase::constant_ends_with_fn_predicate( LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val, - const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { + const StringRef& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { Review Comment: warning: pointer parameter 'sel' can be pointer to const [readability-non-const-parameter] ```suggestion const StringRef& pattern, ColumnUInt8::Container& result, const uint16_t* sel, size_t sz) { ``` be/src/vec/functions/like.h:182: ```diff - ColumnUInt8::Container& result, uint16_t* sel, + ColumnUInt8::Container& result, const uint16_t* sel, ``` ########## be/src/vec/common/hash_table/hash.h: ########## @@ -100,7 +100,7 @@ struct DefaultHash<doris::vectorized::Int128I> { }; template <> -struct DefaultHash<StringRef> : public StringRefHash {}; +struct DefaultHash<doris::StringRef> : public doris::StringRefHash {}; Review Comment: warning: base class has incomplete type [clang-diagnostic-error] ```cpp struct DefaultHash<doris::StringRef> : public doris::StringRefHash {}; ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/vec/common/string_ref.cpp: ########## @@ -0,0 +1,59 @@ +#include "string_ref.h" + +namespace doris { + +inline StringRef StringRef::trim() const { + // Remove leading and trailing spaces. + int32_t begin = 0; + + while (begin < size && data[begin] == ' ') { + ++begin; + } + + int32_t end = size - 1; + + while (end > begin && data[end] == ' ') { + --end; + } + + return StringRef(data + begin, end - begin + 1); +} + +// TODO: rewrite in AVX2 +inline size_t StringRef::find_first_of(char c) const { + const char* p = static_cast<const char*>(memchr(data, c, size)); + return p == nullptr ? -1 : p - data; +} + +StringRef StringRef::min_string_val() { + return StringRef((char*)(&StringRef::MIN_CHAR), 1); +} + +StringRef StringRef::max_string_val() { + return StringRef((char*)(&StringRef::MAX_CHAR), 1); +} + +bool StringRef::start_with(const StringRef& search_string) const { + DCHECK(size >= search_string.size); + if (search_string.size == 0) + return true; + +#if defined(__SSE2__) || defined(__aarch64__) + return memequalSSE2Wide(data, search_string.data, search_string.size); +#else + return 0 == memcmp(data, search_string.data, search_string.size); +#endif +} +bool StringRef::end_with(const StringRef& search_string) const { + DCHECK(size >= search_string.size); + if (search_string.size == 0) + return true; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (search_string.size == 0) { return true; } ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -35,15 +35,15 @@ bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } }; -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { assert(n != 0); return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)}; Review Comment: warning: initialization of incomplete type 'doris::StringRef' [clang-diagnostic-error] ```cpp return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)}; ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -79,7 +79,9 @@ return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 24); } #endif - size_t ALWAYS_INLINE operator()(StringRef key) const { return StringRefHash()(key); } + size_t ALWAYS_INLINE operator()(doris::StringRef key) const { Review Comment: warning: variable has incomplete type 'doris::StringRef' [clang-diagnostic-error] ```cpp size_t ALWAYS_INLINE operator()(doris::StringRef key) const { ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -35,15 +35,15 @@ struct StringKey24 { bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } }; -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { Review Comment: warning: incomplete result type 'doris::StringRef' in function definition [clang-diagnostic-error] ```cpp inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/olap/like_column_predicate.h: ########## @@ -48,7 +48,11 @@ class LikeColumnPredicate : public ColumnPredicate { bool* flags) const override; std::string get_search_str() const override { - return std::string(reinterpret_cast<char*>(pattern.ptr), pattern.len); + if constexpr (std::is_same_v<PatternType, StringRef>) Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if constexpr (std::is_same_v<PatternType, StringRef>) { ``` be/src/olap/like_column_predicate.h:52: ```diff - else if constexpr (std::is_same_v<PatternType, StringVal>) + } else if constexpr (std::is_same_v<PatternType, StringVal>) ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -35,15 +35,15 @@ bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } }; -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { assert(n != 0); return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)}; } -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { assert(n.high != 0); return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) >> 3)}; } -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) { Review Comment: warning: incomplete result type 'doris::StringRef' in function definition [clang-diagnostic-error] ```cpp inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) { ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -35,15 +35,15 @@ bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } }; -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { assert(n != 0); return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)}; } -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { Review Comment: warning: incomplete result type 'doris::StringRef' in function definition [clang-diagnostic-error] ```cpp inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -35,15 +35,15 @@ bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } }; -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { assert(n != 0); return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)}; } -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { assert(n.high != 0); return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) >> 3)}; } -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) { assert(n.c != 0); return {reinterpret_cast<const char*>(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; Review Comment: warning: initialization of incomplete type 'doris::StringRef' [clang-diagnostic-error] ```cpp return {reinterpret_cast<const char*>(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -35,15 +35,15 @@ bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } }; -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { assert(n != 0); return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)}; } -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { assert(n.high != 0); return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) >> 3)}; Review Comment: warning: initialization of incomplete type 'doris::StringRef' [clang-diagnostic-error] ```cpp return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) >> 3)}; ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -487,7 +489,7 @@ template <typename Self, typename KeyHolder, typename Func> static auto ALWAYS_INLINE dispatch(Self& self, KeyHolder&& key_holder, Func&& func) { StringHashTableHash hash; - const StringRef& x = key_holder_get_key(key_holder); + const doris::StringRef& x = key_holder_get_key(key_holder); const size_t sz = x.size; Review Comment: warning: member access into incomplete type 'const doris::StringRef' [clang-diagnostic-error] ```cpp const size_t sz = x.size; ^ ``` **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef' ```cpp struct StringRef; ^ ``` ########## be/src/vec/common/hash_table/string_hash_table.h: ########## @@ -79,7 +79,9 @@ return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 24); } #endif - size_t ALWAYS_INLINE operator()(StringRef key) const { return StringRefHash()(key); } + size_t ALWAYS_INLINE operator()(doris::StringRef key) const { + return doris::StringRefHash()(key); Review Comment: warning: no member named 'StringRefHash' in namespace 'doris' [clang-diagnostic-error] ```cpp return doris::StringRefHash()(key); ^ ``` ########## be/src/vec/functions/like.cpp: ########## @@ -150,7 +149,7 @@ Status FunctionLikeBase::constant_substring_fn_predicate( LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val, - const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { + const StringRef& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { Review Comment: warning: pointer parameter 'sel' can be pointer to const [readability-non-const-parameter] ```suggestion const StringRef& pattern, ColumnUInt8::Container& result, const uint16_t* sel, size_t sz) { ``` be/src/vec/functions/like.h:194: ```diff - ColumnUInt8::Container& result, uint16_t* sel, + ColumnUInt8::Container& result, const uint16_t* sel, ``` ########## be/src/vec/common/string_ref.h: ########## @@ -132,90 +142,183 @@ #endif +// Compare two strings using sse4.2 intrinsics if they are available. This code assumes +// that the trivial cases are already handled (i.e. one string is empty). +// Returns: +// < 0 if s1 < s2 +// 0 if s1 == s2 +// > 0 if s1 > s2 +// The SSE code path is just under 2x faster than the non-sse code path. +// - s1/n1: ptr/len for the first string +// - s2/n2: ptr/len for the second string +// - len: min(n1, n2) - this can be more cheaply passed in by the caller +inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, int64_t len) { + DCHECK_EQ(len, std::min(n1, n2)); +#ifdef __SSE4_2__ + while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) { + __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1)); + __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2)); + int chars_match = _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1, + sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE); + if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) { + return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match]; + } + len -= sse_util::CHARS_PER_128_BIT_REGISTER; + s1 += sse_util::CHARS_PER_128_BIT_REGISTER; + s2 += sse_util::CHARS_PER_128_BIT_REGISTER; + } +#endif + unsigned char u1, u2; + while (len-- > 0) { + u1 = (unsigned char)*s1++; + u2 = (unsigned char)*s2++; + if (u1 != u2) return u1 - u2; + if (u1 == '\0') return n1 - n2; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (u1 == '\0') { return n1 - n2; } ``` ########## be/src/vec/common/string_ref.h: ########## @@ -132,90 +142,183 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) { #endif +// Compare two strings using sse4.2 intrinsics if they are available. This code assumes +// that the trivial cases are already handled (i.e. one string is empty). +// Returns: +// < 0 if s1 < s2 +// 0 if s1 == s2 +// > 0 if s1 > s2 +// The SSE code path is just under 2x faster than the non-sse code path. +// - s1/n1: ptr/len for the first string +// - s2/n2: ptr/len for the second string +// - len: min(n1, n2) - this can be more cheaply passed in by the caller +inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, int64_t len) { + DCHECK_EQ(len, std::min(n1, n2)); +#ifdef __SSE4_2__ + while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) { + __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1)); + __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2)); + int chars_match = _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1, + sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE); + if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) { + return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match]; + } + len -= sse_util::CHARS_PER_128_BIT_REGISTER; + s1 += sse_util::CHARS_PER_128_BIT_REGISTER; + s2 += sse_util::CHARS_PER_128_BIT_REGISTER; + } +#endif + unsigned char u1, u2; + while (len-- > 0) { + u1 = (unsigned char)*s1++; + u2 = (unsigned char)*s2++; + if (u1 != u2) return u1 - u2; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (u1 != u2) { return u1 - u2; } ``` ########## be/src/vec/functions/like.cpp: ########## @@ -112,7 +111,7 @@ Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const Col Status FunctionLikeBase::constant_starts_with_fn_predicate( LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val, - const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { + const StringRef& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { Review Comment: warning: pointer parameter 'sel' can be pointer to const [readability-non-const-parameter] ```suggestion const StringRef& pattern, ColumnUInt8::Container& result, const uint16_t* sel, size_t sz) { ``` be/src/vec/functions/like.h:176: ```diff - ColumnUInt8::Container& result, uint16_t* sel, + ColumnUInt8::Container& result, const uint16_t* sel, ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org