github-actions[bot] commented on code in PR #15925:
URL: https://github.com/apache/doris/pull/15925#discussion_r1070227998


##########
be/src/vec/common/hash_table/hash.h:
##########
@@ -100,7 +100,7 @@
 };
 
 template <>
-struct DefaultHash<StringRef> : public StringRefHash {};
+struct DefaultHash<doris::StringRef> : public doris::StringRefHash {};

Review Comment:
   warning: unknown class name 'StringRefHash'; did you mean 'StringRef'? 
[clang-diagnostic-error]
   
   ```suggestion
   struct DefaultHash<doris::StringRef> : public doris::StringRef {};
   ```
   **be/src/udf/udf.h:34:** 'StringRef' declared here
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/olap/like_column_predicate.h:
##########
@@ -48,7 +48,11 @@
                           bool* flags) const override;
 
     std::string get_search_str() const override {
-        return std::string(reinterpret_cast<char*>(pattern.ptr), pattern.len);
+        if constexpr (std::is_same_v<PatternType, StringRef>)
+            return std::string(reinterpret_cast<const char*>(pattern.data), 
pattern.size);
+        else if constexpr (std::is_same_v<PatternType, StringVal>)
+            return std::string(reinterpret_cast<const char*>(pattern.ptr), 
pattern.len);

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           else if constexpr (std::is_same_v<PatternType, StringVal>) {
               return std::string(reinterpret_cast<const char*>(pattern.ptr), 
pattern.len);
   }
   ```
   



##########
be/src/vec/common/string_ref.cpp:
##########
@@ -0,0 +1,59 @@
+#include "string_ref.h"
+
+namespace doris {
+
+inline StringRef StringRef::trim() const {
+    // Remove leading and trailing spaces.
+    int32_t begin = 0;
+
+    while (begin < size && data[begin] == ' ') {
+        ++begin;
+    }
+
+    int32_t end = size - 1;
+
+    while (end > begin && data[end] == ' ') {
+        --end;
+    }
+
+    return StringRef(data + begin, end - begin + 1);
+}
+
+// TODO: rewrite in AVX2
+inline size_t StringRef::find_first_of(char c) const {
+    const char* p = static_cast<const char*>(memchr(data, c, size));
+    return p == nullptr ? -1 : p - data;
+}
+
+StringRef StringRef::min_string_val() {
+    return StringRef((char*)(&StringRef::MIN_CHAR), 1);
+}
+
+StringRef StringRef::max_string_val() {
+    return StringRef((char*)(&StringRef::MAX_CHAR), 1);
+}
+
+bool StringRef::start_with(const StringRef& search_string) const {
+    DCHECK(size >= search_string.size);
+    if (search_string.size == 0)
+        return true;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (search_string.size == 0) {
           return true;
   }
   ```
   



##########
be/src/vec/functions/like.cpp:
##########
@@ -124,7 +123,7 @@
 
 Status FunctionLikeBase::constant_ends_with_fn_predicate(
         LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
-        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* 
sel, size_t sz) {
+        const StringRef& pattern, ColumnUInt8::Container& result, uint16_t* 
sel, size_t sz) {

Review Comment:
   warning: pointer parameter 'sel' can be pointer to const 
[readability-non-const-parameter]
   
   ```suggestion
           const StringRef& pattern, ColumnUInt8::Container& result, const 
uint16_t* sel, size_t sz) {
   ```
   
   be/src/vec/functions/like.h:182:
   ```diff
   -                                                   ColumnUInt8::Container& 
result, uint16_t* sel,
   +                                                   ColumnUInt8::Container& 
result, const uint16_t* sel,
   ```
   



##########
be/src/vec/common/hash_table/hash.h:
##########
@@ -100,7 +100,7 @@ struct DefaultHash<doris::vectorized::Int128I> {
 };
 
 template <>
-struct DefaultHash<StringRef> : public StringRefHash {};
+struct DefaultHash<doris::StringRef> : public doris::StringRefHash {};

Review Comment:
   warning: base class has incomplete type [clang-diagnostic-error]
   ```cpp
   struct DefaultHash<doris::StringRef> : public doris::StringRefHash {};
                                                 ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/vec/common/string_ref.cpp:
##########
@@ -0,0 +1,59 @@
+#include "string_ref.h"
+
+namespace doris {
+
+inline StringRef StringRef::trim() const {
+    // Remove leading and trailing spaces.
+    int32_t begin = 0;
+
+    while (begin < size && data[begin] == ' ') {
+        ++begin;
+    }
+
+    int32_t end = size - 1;
+
+    while (end > begin && data[end] == ' ') {
+        --end;
+    }
+
+    return StringRef(data + begin, end - begin + 1);
+}
+
+// TODO: rewrite in AVX2
+inline size_t StringRef::find_first_of(char c) const {
+    const char* p = static_cast<const char*>(memchr(data, c, size));
+    return p == nullptr ? -1 : p - data;
+}
+
+StringRef StringRef::min_string_val() {
+    return StringRef((char*)(&StringRef::MIN_CHAR), 1);
+}
+
+StringRef StringRef::max_string_val() {
+    return StringRef((char*)(&StringRef::MAX_CHAR), 1);
+}
+
+bool StringRef::start_with(const StringRef& search_string) const {
+    DCHECK(size >= search_string.size);
+    if (search_string.size == 0)
+        return true;
+
+#if defined(__SSE2__) || defined(__aarch64__)
+    return memequalSSE2Wide(data, search_string.data, search_string.size);
+#else
+    return 0 == memcmp(data, search_string.data, search_string.size);
+#endif
+}
+bool StringRef::end_with(const StringRef& search_string) const {
+    DCHECK(size >= search_string.size);
+    if (search_string.size == 0)
+        return true;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (search_string.size == 0) {
           return true;
   }
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -35,15 +35,15 @@
     bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == 
rhs.b && c == rhs.c; }
 };
 
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
     assert(n != 0);
     return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 
3)};

Review Comment:
   warning: initialization of incomplete type 'doris::StringRef' 
[clang-diagnostic-error]
   ```cpp
       return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 
3)};
              ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -79,7 +79,9 @@
         return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 24);
     }
 #endif
-    size_t ALWAYS_INLINE operator()(StringRef key) const { return 
StringRefHash()(key); }
+    size_t ALWAYS_INLINE operator()(doris::StringRef key) const {

Review Comment:
   warning: variable has incomplete type 'doris::StringRef' 
[clang-diagnostic-error]
   ```cpp
       size_t ALWAYS_INLINE operator()(doris::StringRef key) const {
                                                        ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -35,15 +35,15 @@ struct StringKey24 {
     bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == 
rhs.b && c == rhs.c; }
 };
 
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {

Review Comment:
   warning: incomplete result type 'doris::StringRef' in function definition 
[clang-diagnostic-error]
   ```cpp
   inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
                                         ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/olap/like_column_predicate.h:
##########
@@ -48,7 +48,11 @@ class LikeColumnPredicate : public ColumnPredicate {
                           bool* flags) const override;
 
     std::string get_search_str() const override {
-        return std::string(reinterpret_cast<char*>(pattern.ptr), pattern.len);
+        if constexpr (std::is_same_v<PatternType, StringRef>)

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if constexpr (std::is_same_v<PatternType, StringRef>) {
   ```
   
   be/src/olap/like_column_predicate.h:52:
   ```diff
   -         else if constexpr (std::is_same_v<PatternType, StringVal>)
   +         } else if constexpr (std::is_same_v<PatternType, StringVal>)
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -35,15 +35,15 @@
     bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == 
rhs.b && c == rhs.c; }
 };
 
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
     assert(n != 0);
     return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 
3)};
 }
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
     assert(n.high != 0);
     return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) 
>> 3)};
 }
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) {

Review Comment:
   warning: incomplete result type 'doris::StringRef' in function definition 
[clang-diagnostic-error]
   ```cpp
   inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) {
                                         ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -35,15 +35,15 @@
     bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == 
rhs.b && c == rhs.c; }
 };
 
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
     assert(n != 0);
     return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 
3)};
 }
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {

Review Comment:
   warning: incomplete result type 'doris::StringRef' in function definition 
[clang-diagnostic-error]
   ```cpp
   inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
                                         ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -35,15 +35,15 @@
     bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == 
rhs.b && c == rhs.c; }
 };
 
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
     assert(n != 0);
     return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 
3)};
 }
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
     assert(n.high != 0);
     return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) 
>> 3)};
 }
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) {
     assert(n.c != 0);
     return {reinterpret_cast<const char*>(&n), 24ul - (__builtin_clzll(n.c) >> 
3)};

Review Comment:
   warning: initialization of incomplete type 'doris::StringRef' 
[clang-diagnostic-error]
   ```cpp
       return {reinterpret_cast<const char*>(&n), 24ul - (__builtin_clzll(n.c) 
>> 3)};
              ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -35,15 +35,15 @@
     bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == 
rhs.b && c == rhs.c; }
 };
 
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
     assert(n != 0);
     return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 
3)};
 }
-inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
     assert(n.high != 0);
     return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) 
>> 3)};

Review Comment:
   warning: initialization of incomplete type 'doris::StringRef' 
[clang-diagnostic-error]
   ```cpp
       return {reinterpret_cast<const char*>(&n), 16ul - 
(__builtin_clzll(n.high) >> 3)};
              ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -487,7 +489,7 @@
     template <typename Self, typename KeyHolder, typename Func>
     static auto ALWAYS_INLINE dispatch(Self& self, KeyHolder&& key_holder, 
Func&& func) {
         StringHashTableHash hash;
-        const StringRef& x = key_holder_get_key(key_holder);
+        const doris::StringRef& x = key_holder_get_key(key_holder);
         const size_t sz = x.size;

Review Comment:
   warning: member access into incomplete type 'const doris::StringRef' 
[clang-diagnostic-error]
   ```cpp
           const size_t sz = x.size;
                              ^
   ```
   **be/src/udf/udf.h:34:** forward declaration of 'doris::StringRef'
   ```cpp
   struct StringRef;
          ^
   ```
   



##########
be/src/vec/common/hash_table/string_hash_table.h:
##########
@@ -79,7 +79,9 @@
         return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 24);
     }
 #endif
-    size_t ALWAYS_INLINE operator()(StringRef key) const { return 
StringRefHash()(key); }
+    size_t ALWAYS_INLINE operator()(doris::StringRef key) const {
+        return doris::StringRefHash()(key);

Review Comment:
   warning: no member named 'StringRefHash' in namespace 'doris' 
[clang-diagnostic-error]
   ```cpp
           return doris::StringRefHash()(key);
                         ^
   ```
   



##########
be/src/vec/functions/like.cpp:
##########
@@ -150,7 +149,7 @@
 
 Status FunctionLikeBase::constant_substring_fn_predicate(
         LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
-        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* 
sel, size_t sz) {
+        const StringRef& pattern, ColumnUInt8::Container& result, uint16_t* 
sel, size_t sz) {

Review Comment:
   warning: pointer parameter 'sel' can be pointer to const 
[readability-non-const-parameter]
   
   ```suggestion
           const StringRef& pattern, ColumnUInt8::Container& result, const 
uint16_t* sel, size_t sz) {
   ```
   
   be/src/vec/functions/like.h:194:
   ```diff
   -                                                   ColumnUInt8::Container& 
result, uint16_t* sel,
   +                                                   ColumnUInt8::Container& 
result, const uint16_t* sel,
   ```
   



##########
be/src/vec/common/string_ref.h:
##########
@@ -132,90 +142,183 @@
 
 #endif
 
+// Compare two strings using sse4.2 intrinsics if they are available. This 
code assumes
+// that the trivial cases are already handled (i.e. one string is empty).
+// Returns:
+//   < 0 if s1 < s2
+//   0 if s1 == s2
+//   > 0 if s1 > s2
+// The SSE code path is just under 2x faster than the non-sse code path.
+//   - s1/n1: ptr/len for the first string
+//   - s2/n2: ptr/len for the second string
+//   - len: min(n1, n2) - this can be more cheaply passed in by the caller
+inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t 
n2, int64_t len) {
+    DCHECK_EQ(len, std::min(n1, n2));
+#ifdef __SSE4_2__
+    while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) {
+        __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
+        __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
+        int chars_match = _mm_cmpestri(xmm0, 
sse_util::CHARS_PER_128_BIT_REGISTER, xmm1,
+                                       sse_util::CHARS_PER_128_BIT_REGISTER, 
sse_util::STRCMP_MODE);
+        if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) {
+            return (unsigned char)s1[chars_match] - (unsigned 
char)s2[chars_match];
+        }
+        len -= sse_util::CHARS_PER_128_BIT_REGISTER;
+        s1 += sse_util::CHARS_PER_128_BIT_REGISTER;
+        s2 += sse_util::CHARS_PER_128_BIT_REGISTER;
+    }
+#endif
+    unsigned char u1, u2;
+    while (len-- > 0) {
+        u1 = (unsigned char)*s1++;
+        u2 = (unsigned char)*s2++;
+        if (u1 != u2) return u1 - u2;
+        if (u1 == '\0') return n1 - n2;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (u1 == '\0') { return n1 - n2;
   }
   ```
   



##########
be/src/vec/common/string_ref.h:
##########
@@ -132,90 +142,183 @@ inline bool memequalSSE2Wide(const char* p1, const char* 
p2, size_t size) {
 
 #endif
 
+// Compare two strings using sse4.2 intrinsics if they are available. This 
code assumes
+// that the trivial cases are already handled (i.e. one string is empty).
+// Returns:
+//   < 0 if s1 < s2
+//   0 if s1 == s2
+//   > 0 if s1 > s2
+// The SSE code path is just under 2x faster than the non-sse code path.
+//   - s1/n1: ptr/len for the first string
+//   - s2/n2: ptr/len for the second string
+//   - len: min(n1, n2) - this can be more cheaply passed in by the caller
+inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t 
n2, int64_t len) {
+    DCHECK_EQ(len, std::min(n1, n2));
+#ifdef __SSE4_2__
+    while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) {
+        __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
+        __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
+        int chars_match = _mm_cmpestri(xmm0, 
sse_util::CHARS_PER_128_BIT_REGISTER, xmm1,
+                                       sse_util::CHARS_PER_128_BIT_REGISTER, 
sse_util::STRCMP_MODE);
+        if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) {
+            return (unsigned char)s1[chars_match] - (unsigned 
char)s2[chars_match];
+        }
+        len -= sse_util::CHARS_PER_128_BIT_REGISTER;
+        s1 += sse_util::CHARS_PER_128_BIT_REGISTER;
+        s2 += sse_util::CHARS_PER_128_BIT_REGISTER;
+    }
+#endif
+    unsigned char u1, u2;
+    while (len-- > 0) {
+        u1 = (unsigned char)*s1++;
+        u2 = (unsigned char)*s2++;
+        if (u1 != u2) return u1 - u2;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (u1 != u2) { return u1 - u2;
   }
   ```
   



##########
be/src/vec/functions/like.cpp:
##########
@@ -112,7 +111,7 @@ Status 
FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const Col
 
 Status FunctionLikeBase::constant_starts_with_fn_predicate(
         LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
-        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* 
sel, size_t sz) {
+        const StringRef& pattern, ColumnUInt8::Container& result, uint16_t* 
sel, size_t sz) {

Review Comment:
   warning: pointer parameter 'sel' can be pointer to const 
[readability-non-const-parameter]
   
   ```suggestion
           const StringRef& pattern, ColumnUInt8::Container& result, const 
uint16_t* sel, size_t sz) {
   ```
   
   be/src/vec/functions/like.h:176:
   ```diff
   -                                                     
ColumnUInt8::Container& result, uint16_t* sel,
   +                                                     
ColumnUInt8::Container& result, const uint16_t* sel,
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to