This revision was automatically updated to reflect the committed changes.
Closed by commit rL290761: improve performance of string::find (authored by 
spop).

Changed prior to commit:
  https://reviews.llvm.org/D27068?vs=80134&id=82731#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D27068

Files:
  libcxx/trunk/benchmarks/string.bench.cpp
  libcxx/trunk/include/__string

Index: libcxx/trunk/include/__string
===================================================================
--- libcxx/trunk/include/__string
+++ libcxx/trunk/include/__string
@@ -538,19 +538,59 @@
     return static_cast<_SizeT>(__r - __p);
 }
 
+template <class _CharT, class _Traits>
+inline _LIBCPP_CONSTEXPR_AFTER_CXX11 const _CharT *
+__search_substring(const _CharT *__first1, const _CharT *__last1,
+                   const _CharT *__first2, const _CharT *__last2) {
+  // Take advantage of knowing source and pattern lengths.
+  // Stop short when source is smaller than pattern.
+  const ptrdiff_t __len2 = __last2 - __first2;
+  if (__len2 == 0)
+    return __first1;
+
+  ptrdiff_t __len1 = __last1 - __first1;
+  if (__len1 < __len2)
+    return __last1;
+
+  // First element of __first2 is loop invariant.
+  _CharT __f2 = *__first2;
+  while (true) {
+    __len1 = __last1 - __first1;
+    // Check whether __first1 still has at least __len2 bytes.
+    if (__len1 < __len2)
+      return __last1;
+
+    // Find __f2 the first byte matching in __first1.
+    __first1 = _Traits::find(__first1, __len1 - __len2 + 1, __f2);
+    if (__first1 == 0)
+      return __last1;
+
+    // It is faster to compare from the first byte of __first1 even if we
+    // already know that it matches the first byte of __first2: this is because
+    // __first2 is most likely aligned, as it is user's "pattern" string, and
+    // __first1 + 1 is most likely not aligned, as the match is in the middle of
+    // the string.
+    if (_Traits::compare(__first1, __first2, __len2) == 0)
+      return __first1;
+
+    ++__first1;
+  }
+}
+
 template<class _CharT, class _SizeT, class _Traits, _SizeT __npos>
 inline _SizeT _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
 __str_find(const _CharT *__p, _SizeT __sz, 
        const _CharT* __s, _SizeT __pos, _SizeT __n) _NOEXCEPT
 {
-    if (__pos > __sz || __sz - __pos < __n)
+    if (__pos > __sz)
         return __npos;
-    if (__n == 0)
+
+    if (__n == 0) // There is nothing to search, just return __pos.
         return __pos;
-    const _CharT* __r = 
-        _VSTD::__search(__p + __pos, __p + __sz,
-                        __s, __s + __n, _Traits::eq,
-                        random_access_iterator_tag(), random_access_iterator_tag()).first;
+
+    const _CharT *__r = __search_substring<_CharT, _Traits>(
+        __p + __pos, __p + __sz, __s, __s + __n);
+
     if (__r == __p + __sz)
         return __npos;
     return static_cast<_SizeT>(__r - __p);
Index: libcxx/trunk/benchmarks/string.bench.cpp
===================================================================
--- libcxx/trunk/benchmarks/string.bench.cpp
+++ libcxx/trunk/benchmarks/string.bench.cpp
@@ -0,0 +1,49 @@
+#include <unordered_set>
+#include <vector>
+#include <cstdint>
+
+#include "benchmark/benchmark_api.h"
+#include "GenerateInput.hpp"
+
+constexpr std::size_t MAX_STRING_LEN = 8 << 14;
+
+// Benchmark when there is no match.
+static void BM_StringFindNoMatch(benchmark::State &state) {
+  std::string s1(state.range(0), '-');
+  std::string s2(8, '*');
+  while (state.KeepRunning())
+    benchmark::DoNotOptimize(s1.find(s2));
+}
+BENCHMARK(BM_StringFindNoMatch)->Range(10, MAX_STRING_LEN);
+
+// Benchmark when the string matches first time.
+static void BM_StringFindAllMatch(benchmark::State &state) {
+  std::string s1(MAX_STRING_LEN, '-');
+  std::string s2(state.range(0), '-');
+  while (state.KeepRunning())
+    benchmark::DoNotOptimize(s1.find(s2));
+}
+BENCHMARK(BM_StringFindAllMatch)->Range(1, MAX_STRING_LEN);
+
+// Benchmark when the string matches somewhere in the end.
+static void BM_StringFindMatch1(benchmark::State &state) {
+  std::string s1(MAX_STRING_LEN / 2, '*');
+  s1 += std::string(state.range(0), '-');
+  std::string s2(state.range(0), '-');
+  while (state.KeepRunning())
+    benchmark::DoNotOptimize(s1.find(s2));
+}
+BENCHMARK(BM_StringFindMatch1)->Range(1, MAX_STRING_LEN / 4);
+
+// Benchmark when the string matches somewhere from middle to the end.
+static void BM_StringFindMatch2(benchmark::State &state) {
+  std::string s1(MAX_STRING_LEN / 2, '*');
+  s1 += std::string(state.range(0), '-');
+  s1 += std::string(state.range(0), '*');
+  std::string s2(state.range(0), '-');
+  while (state.KeepRunning())
+    benchmark::DoNotOptimize(s1.find(s2));
+}
+BENCHMARK(BM_StringFindMatch2)->Range(1, MAX_STRING_LEN / 4);
+
+BENCHMARK_MAIN()
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to