[llvm] [llvm] Improve implementation of StringRef::find_last_of and cie (PR #71865)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 16 09:04:24 PST 2023
https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/71865
>From e8ed0425579eba8991b619e75a96e2903cf3c793 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Thu, 16 Nov 2023 18:03:32 +0100
Subject: [PATCH] [llvm] Improve implementation of StringRef::find_last_of for
the usual case of 2 chars
Almost all usage of StringRef::find_last_of in Clang/LLVM use a Needle
of 2 elements, which can be optimized using a generic vectorized
algorithm and a few bit hacks.
---
llvm/lib/Support/StringRef.cpp | 32 +++++++++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index feee47ca693b251..d240c475fe58691 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -268,17 +268,47 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
return npos;
}
+// See https://graphics.stanford.edu/~seander/bithacks.html#ValueInWord
+static inline uint64_t haszero(uint64_t v) {
+ return ((v)-0x0101010101010101UL) & ~(v) & 0x8080808080808080UL;
+}
+static inline uint64_t hasvalue(uint64_t x, char n) {
+ return haszero((x) ^ (~0UL / 255 * (n)));
+}
+
+/// This is a hot spot for some clangd operations, enough to be eligible to
+/// a vectorized implementation.
+static StringRef::size_type
+vectorized_find_last_of_specialized(const char *Data, size_t Sz, char C0,
+ char C1) {
+ do {
+ Sz = Sz < 8 ? 0 : Sz - 8;
+ uint64_t Buffer = 0;
+ std::memcpy((void *)&Buffer, (void *)(Data + Sz), sizeof(Buffer));
+ uint64_t Check0 = hasvalue(Buffer, C0);
+ uint64_t Check = Check0 ? Check0 : hasvalue(Buffer, C1);
+ if (Check)
+ return Sz + 7 - llvm::countl_zero(Check) / 8;
+ } while (Sz);
+ return -1;
+}
+
/// find_last_of - Find the last character in the string that is in \arg C,
/// or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_last_of(StringRef Chars,
size_t From) const {
+ size_type Sz = std::min(From, Length);
+
+ if (Chars.size() == 2)
+ return vectorized_find_last_of_specialized(Data, Sz, Chars[0], Chars[1]);
+
std::bitset<1 << CHAR_BIT> CharBits;
for (char C : Chars)
CharBits.set((unsigned char)C);
- for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
+ for (size_type i = Sz - 1, e = -1; i != e; --i)
if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
More information about the llvm-commits
mailing list