Ping.
From: Aditya Kumar <aditya...@samsung.com> Sent: Wednesday, December 7, 2016 11:46 AM To: libstd...@gcc.gnu.org Cc: gcc-patches@gcc.gnu.org; hiradi...@msn.com; Aditya Kumar Subject: [PATCH] improve string find algorithm Here is an improved version of basic_string::find. The idea is to split the string find in two parts: 1. search for the first match by using traits_type::find (this gets converted to memchr for x86) 2. see if there is a match (this gets converted to memcmp for x86) Passes bootstrap on x86-64. The patch results in good improvements on a synthetic test case I wrote using the google-benchmark. following are the results. Branch: master without patch $ ./bin/string.libcxx.out Run on (24 X 1899.12 MHz CPU s) 2016-12-06 16:41:55 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. Benchmark Time CPU Iterations ----------------------------------------------------------------- BM_StringFindNoMatch/10 8 ns 8 ns 81880927 BM_StringFindNoMatch/64 52 ns 52 ns 13235018 BM_StringFindNoMatch/512 355 ns 355 ns 1962488 BM_StringFindNoMatch/4k 2769 ns 2772 ns 249090 BM_StringFindNoMatch/32k 22598 ns 22619 ns 30984 BM_StringFindNoMatch/128k 89745 ns 89830 ns 7996 BM_StringFindAllMatch/1 7 ns 7 ns 102893835 BM_StringFindAllMatch/8 9 ns 9 ns 75403364 BM_StringFindAllMatch/64 12 ns 12 ns 60766893 BM_StringFindAllMatch/512 31 ns 31 ns 23163999 BM_StringFindAllMatch/4k 141 ns 141 ns 4980386 BM_StringFindAllMatch/32k 1402 ns 1403 ns 483581 BM_StringFindAllMatch/128k 5604 ns 5609 ns 126123 BM_StringFindMatch1/1 44430 ns 44473 ns 15804 BM_StringFindMatch1/8 44315 ns 44357 ns 15741 BM_StringFindMatch1/64 44689 ns 44731 ns 15712 BM_StringFindMatch1/512 44247 ns 44290 ns 15724 BM_StringFindMatch1/4k 45010 ns 45053 ns 15678 BM_StringFindMatch1/32k 45717 ns 45761 ns 15278 BM_StringFindMatch2/1 44307 ns 44349 ns 15730 BM_StringFindMatch2/8 44631 ns 44674 ns 15721 BM_StringFindMatch2/64 44300 ns 44342 ns 15750 BM_StringFindMatch2/512 44239 ns 44281 ns 15713 BM_StringFindMatch2/4k 44886 ns 44928 ns 15787 Branch: master with patch $ ./bin/string.libcxx.out Run on (24 X 2892.28 MHz CPU s) 2016-12-06 18:51:38 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. Benchmark Time CPU Iterations ----------------------------------------------------------------- BM_StringFindNoMatch/10 11 ns 11 ns 63049677 BM_StringFindNoMatch/64 12 ns 12 ns 57259381 BM_StringFindNoMatch/512 27 ns 27 ns 25495432 BM_StringFindNoMatch/4k 130 ns 130 ns 5301301 BM_StringFindNoMatch/32k 858 ns 859 ns 824048 BM_StringFindNoMatch/128k 4091 ns 4095 ns 171493 BM_StringFindAllMatch/1 14 ns 14 ns 53023977 BM_StringFindAllMatch/8 14 ns 14 ns 51516536 BM_StringFindAllMatch/64 17 ns 17 ns 40992668 BM_StringFindAllMatch/512 37 ns 37 ns 18503267 BM_StringFindAllMatch/4k 153 ns 153 ns 4494458 BM_StringFindAllMatch/32k 1460 ns 1461 ns 483380 BM_StringFindAllMatch/128k 5801 ns 5806 ns 120680 BM_StringFindMatch1/1 2062 ns 2064 ns 333144 BM_StringFindMatch1/8 2057 ns 2059 ns 335496 BM_StringFindMatch1/64 2083 ns 2085 ns 341469 BM_StringFindMatch1/512 2134 ns 2136 ns 336880 BM_StringFindMatch1/4k 2309 ns 2312 ns 308745 BM_StringFindMatch1/32k 3413 ns 3417 ns 208206 BM_StringFindMatch2/1 2053 ns 2055 ns 341523 BM_StringFindMatch2/8 2061 ns 2063 ns 343999 BM_StringFindMatch2/64 2075 ns 2077 ns 338479 BM_StringFindMatch2/512 2102 ns 2104 ns 332276 BM_StringFindMatch2/4k 2286 ns 2288 ns 300416 BM_StringFindMatch2/32k 3385 ns 3388 ns 204158 ChangeLog: 2016-12-07 Aditya Kumar <hiradi...@msn.com> * include/bits/basic_string.tcc(find(const _CharT* __s, size_type __pos, size_type __n) const)): Improve the algorithm --- libstdc++-v3/include/bits/basic_string.tcc | 31 ++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc index df1e8dd..7942ee6 100644 --- a/libstdc++-v3/include/bits/basic_string.tcc +++ b/libstdc++-v3/include/bits/basic_string.tcc @@ -1194,14 +1194,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__n == 0) return __pos <= __size ? __pos : npos; - if (__n <= __size) - { - for (; __pos <= __size - __n; ++__pos) - if (traits_type::eq(__data[__pos], __s[0]) - && traits_type::compare(__data + __pos + 1, - __s + 1, __n - 1) == 0) - return __pos; - } + if (__n > __size) + return npos; + + const _CharT __elem0 = __s[0]; + const _CharT* __first1 = __data; + const _CharT* __first2 = __s; + const _CharT* __last1 = __data + __size; + ptrdiff_t __len2 = __n - __pos; + + while (true) { + ptrdiff_t __len1 = __last1 - __first1; + if (__len1 < __len2) + return npos; + + // Find the first match. + __first1 = traits_type::find(__first1, __len1 - __len2 + 1, __elem0); + if (__first1 == 0) + return npos; + // Compare the full string when first match is found. + if (traits_type::compare(__first1, __first2, __len2) == 0) + return __first1 - __data; + } + return npos; } -- 2.6.3