[libcxx] r290761 - improve performance of string::find

Sebastian Pop via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 30 10:01:37 PST 2016


Author: spop
Date: Fri Dec 30 12:01:36 2016
New Revision: 290761

URL: http://llvm.org/viewvc/llvm-project?rev=290761&view=rev
Log:
improve performance of string::find

string::find used to call the generic algorithm ::find.  The patch special
case string::find such that it ultimately gets converted to calls to memchr
and memcmp.

The patch improves the performance of the string::find routine by about 20x.

Without the patch, the performance on an x86_64-linux 3400 MHz machine is:

Benchmark                           Time           CPU Iterations
-----------------------------------------------------------------
BM_StringFindNoMatch/10             4 ns          4 ns  166421326
BM_StringFindNoMatch/64            37 ns         37 ns   18754392
BM_StringFindNoMatch/512          268 ns        268 ns    2586060
BM_StringFindNoMatch/4k          2143 ns       2144 ns     328342
BM_StringFindNoMatch/32k        16910 ns      16917 ns      40623
BM_StringFindNoMatch/128k       67577 ns      67602 ns      10138
BM_StringFindAllMatch/1             3 ns          3 ns  265163471
BM_StringFindAllMatch/8             6 ns          6 ns  112582467
BM_StringFindAllMatch/64           36 ns         36 ns   19566457
BM_StringFindAllMatch/512         209 ns        209 ns    3318893
BM_StringFindAllMatch/4k         1618 ns       1618 ns     432963
BM_StringFindAllMatch/32k       12909 ns      12914 ns      54317
BM_StringFindAllMatch/128k      48342 ns      48361 ns      13922
BM_StringFindMatch1/1           33777 ns      33790 ns      20698
BM_StringFindMatch1/8           33940 ns      33953 ns      20619
BM_StringFindMatch1/64          34038 ns      34051 ns      20571
BM_StringFindMatch1/512         34217 ns      34230 ns      20480
BM_StringFindMatch1/4k          35510 ns      35524 ns      19752
BM_StringFindMatch1/32k         46438 ns      46456 ns      15030
BM_StringFindMatch2/1           33839 ns      33852 ns      20648
BM_StringFindMatch2/8           33950 ns      33963 ns      20594
BM_StringFindMatch2/64          33846 ns      33859 ns      20668
BM_StringFindMatch2/512         34023 ns      34036 ns      20279
BM_StringFindMatch2/4k          35422 ns      35436 ns      19716
BM_StringFindMatch2/32k         46570 ns      46588 ns      15027

With the patch applied

Benchmark                           Time           CPU Iterations
-----------------------------------------------------------------
BM_StringFindNoMatch/10             5 ns          5 ns  133724346
BM_StringFindNoMatch/64             6 ns          6 ns  119312184
BM_StringFindNoMatch/512           13 ns         13 ns   51539628
BM_StringFindNoMatch/4k            77 ns         77 ns    8935934
BM_StringFindNoMatch/32k          551 ns        551 ns    1222808
BM_StringFindNoMatch/128k        2684 ns       2685 ns     259957
BM_StringFindAllMatch/1             7 ns          7 ns   98017959
BM_StringFindAllMatch/8             7 ns          7 ns   91466911
BM_StringFindAllMatch/64            8 ns          8 ns   85707392
BM_StringFindAllMatch/512          20 ns         20 ns   34490895
BM_StringFindAllMatch/4k           93 ns         93 ns    7360375
BM_StringFindAllMatch/32k         827 ns        828 ns     829944
BM_StringFindAllMatch/128k       3593 ns       3594 ns     195815
BM_StringFindMatch1/1            1332 ns       1332 ns     516354
BM_StringFindMatch1/8            1336 ns       1336 ns     495876
BM_StringFindMatch1/64           1338 ns       1339 ns     516656
BM_StringFindMatch1/512          1357 ns       1357 ns     510717
BM_StringFindMatch1/4k           1485 ns       1486 ns     461228
BM_StringFindMatch1/32k          2235 ns       2236 ns     318253
BM_StringFindMatch2/1            1335 ns       1335 ns     517105
BM_StringFindMatch2/8            1336 ns       1337 ns     518004
BM_StringFindMatch2/64           1344 ns       1345 ns     511751
BM_StringFindMatch2/512          1361 ns       1361 ns     508150
BM_StringFindMatch2/4k           1611 ns       1611 ns     463388
BM_StringFindMatch2/32k          2187 ns       2187 ns     317532

Patch written by Aditya Kumar and Sebastian Pop.

Differential Revision: https://reviews.llvm.org/D27068

Added:
    libcxx/trunk/benchmarks/string.bench.cpp
Modified:
    libcxx/trunk/include/__string

Added: libcxx/trunk/benchmarks/string.bench.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/string.bench.cpp?rev=290761&view=auto
==============================================================================
--- libcxx/trunk/benchmarks/string.bench.cpp (added)
+++ libcxx/trunk/benchmarks/string.bench.cpp Fri Dec 30 12:01:36 2016
@@ -0,0 +1,49 @@
+#include <unordered_set>
+#include <vector>
+#include <cstdint>
+
+#include "benchmark/benchmark_api.h"
+#include "GenerateInput.hpp"
+
+constexpr std::size_t MAX_STRING_LEN = 8 << 14;
+
+// Benchmark when there is no match.
+static void BM_StringFindNoMatch(benchmark::State &state) {
+  std::string s1(state.range(0), '-');
+  std::string s2(8, '*');
+  while (state.KeepRunning())
+    benchmark::DoNotOptimize(s1.find(s2));
+}
+BENCHMARK(BM_StringFindNoMatch)->Range(10, MAX_STRING_LEN);
+
+// Benchmark when the string matches first time.
+static void BM_StringFindAllMatch(benchmark::State &state) {
+  std::string s1(MAX_STRING_LEN, '-');
+  std::string s2(state.range(0), '-');
+  while (state.KeepRunning())
+    benchmark::DoNotOptimize(s1.find(s2));
+}
+BENCHMARK(BM_StringFindAllMatch)->Range(1, MAX_STRING_LEN);
+
+// Benchmark when the string matches somewhere in the end.
+static void BM_StringFindMatch1(benchmark::State &state) {
+  std::string s1(MAX_STRING_LEN / 2, '*');
+  s1 += std::string(state.range(0), '-');
+  std::string s2(state.range(0), '-');
+  while (state.KeepRunning())
+    benchmark::DoNotOptimize(s1.find(s2));
+}
+BENCHMARK(BM_StringFindMatch1)->Range(1, MAX_STRING_LEN / 4);
+
+// Benchmark when the string matches somewhere from middle to the end.
+static void BM_StringFindMatch2(benchmark::State &state) {
+  std::string s1(MAX_STRING_LEN / 2, '*');
+  s1 += std::string(state.range(0), '-');
+  s1 += std::string(state.range(0), '*');
+  std::string s2(state.range(0), '-');
+  while (state.KeepRunning())
+    benchmark::DoNotOptimize(s1.find(s2));
+}
+BENCHMARK(BM_StringFindMatch2)->Range(1, MAX_STRING_LEN / 4);
+
+BENCHMARK_MAIN()

Modified: libcxx/trunk/include/__string
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/__string?rev=290761&r1=290760&r2=290761&view=diff
==============================================================================
--- libcxx/trunk/include/__string (original)
+++ libcxx/trunk/include/__string Fri Dec 30 12:01:36 2016
@@ -538,19 +538,59 @@ __str_find(const _CharT *__p, _SizeT __s
     return static_cast<_SizeT>(__r - __p);
 }
 
+template <class _CharT, class _Traits>
+inline _LIBCPP_CONSTEXPR_AFTER_CXX11 const _CharT *
+__search_substring(const _CharT *__first1, const _CharT *__last1,
+                   const _CharT *__first2, const _CharT *__last2) {
+  // Take advantage of knowing source and pattern lengths.
+  // Stop short when source is smaller than pattern.
+  const ptrdiff_t __len2 = __last2 - __first2;
+  if (__len2 == 0)
+    return __first1;
+
+  ptrdiff_t __len1 = __last1 - __first1;
+  if (__len1 < __len2)
+    return __last1;
+
+  // First element of __first2 is loop invariant.
+  _CharT __f2 = *__first2;
+  while (true) {
+    __len1 = __last1 - __first1;
+    // Check whether __first1 still has at least __len2 bytes.
+    if (__len1 < __len2)
+      return __last1;
+
+    // Find __f2 the first byte matching in __first1.
+    __first1 = _Traits::find(__first1, __len1 - __len2 + 1, __f2);
+    if (__first1 == 0)
+      return __last1;
+
+    // It is faster to compare from the first byte of __first1 even if we
+    // already know that it matches the first byte of __first2: this is because
+    // __first2 is most likely aligned, as it is user's "pattern" string, and
+    // __first1 + 1 is most likely not aligned, as the match is in the middle of
+    // the string.
+    if (_Traits::compare(__first1, __first2, __len2) == 0)
+      return __first1;
+
+    ++__first1;
+  }
+}
+
 template<class _CharT, class _SizeT, class _Traits, _SizeT __npos>
 inline _SizeT _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
 __str_find(const _CharT *__p, _SizeT __sz, 
        const _CharT* __s, _SizeT __pos, _SizeT __n) _NOEXCEPT
 {
-    if (__pos > __sz || __sz - __pos < __n)
+    if (__pos > __sz)
         return __npos;
-    if (__n == 0)
+
+    if (__n == 0) // There is nothing to search, just return __pos.
         return __pos;
-    const _CharT* __r = 
-        _VSTD::__search(__p + __pos, __p + __sz,
-                        __s, __s + __n, _Traits::eq,
-                        random_access_iterator_tag(), random_access_iterator_tag()).first;
+
+    const _CharT *__r = __search_substring<_CharT, _Traits>(
+        __p + __pos, __p + __sz, __s, __s + __n);
+
     if (__r == __p + __sz)
         return __npos;
     return static_cast<_SizeT>(__r - __p);




More information about the cfe-commits mailing list