[compiler-rt] 46c59d9 - scudo: Use DC GZVA instruction in storeTags().
Peter Collingbourne via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 21 13:54:09 PDT 2021
Author: Peter Collingbourne
Date: 2021-04-21T13:53:26-07:00
New Revision: 46c59d91dc7a39cc98be7a68d6dc60f3e8a35df0
URL: https://github.com/llvm/llvm-project/commit/46c59d91dc7a39cc98be7a68d6dc60f3e8a35df0
DIFF: https://github.com/llvm/llvm-project/commit/46c59d91dc7a39cc98be7a68d6dc60f3e8a35df0.diff
LOG: scudo: Use DC GZVA instruction in storeTags().
DC GZVA can operate on multiple granules at a time (corresponding to
the CPU's cache line size) so we can generally expect it to be faster
than STZG in a loop.
Differential Revision: https://reviews.llvm.org/D100910
Added:
Modified:
compiler-rt/lib/scudo/standalone/memtag.h
Removed:
################################################################################
diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h
index c1b6b99d66f91..5245124179947 100644
--- a/compiler-rt/lib/scudo/standalone/memtag.h
+++ b/compiler-rt/lib/scudo/standalone/memtag.h
@@ -152,20 +152,65 @@ inline uptr addFixedTag(uptr Ptr, uptr Tag) { return Ptr | (Tag << 56); }
inline uptr storeTags(uptr Begin, uptr End) {
DCHECK(Begin % 16 == 0);
- if (Begin != End) {
- __asm__ __volatile__(
- R"(
- .arch_extension memtag
+ uptr LineSize, Next, Tmp;
+ __asm__ __volatile__(
+ R"(
+ .arch_extension memtag
- 1:
- stzg %[Cur], [%[Cur]], #16
- cmp %[Cur], %[End]
- b.lt 1b
- )"
- : [Cur] "+&r"(Begin)
- : [End] "r"(End)
- : "memory");
- }
+ // Compute the cache line size in bytes (DCZID_EL0 stores it as the log2
+ // of the number of 4-byte words) and bail out to the slow path if DCZID_EL0
+ // indicates that the DC instructions are unavailable.
+ DCZID .req %[Tmp]
+ mrs DCZID, dczid_el0
+ tbnz DCZID, #4, 3f
+ and DCZID, DCZID, #15
+ mov %[LineSize], #4
+ lsl %[LineSize], %[LineSize], DCZID
+ .unreq DCZID
+
+ // Our main loop doesn't handle the case where we don't need to perform any
+ // DC GZVA operations. If the size of our tagged region is less than
+ // twice the cache line size, bail out to the slow path since it's not
+ // guaranteed that we'll be able to do a DC GZVA.
+ Size .req %[Tmp]
+ sub Size, %[End], %[Cur]
+ cmp Size, %[LineSize], lsl #1
+ b.lt 3f
+ .unreq Size
+
+ LineMask .req %[Tmp]
+ sub LineMask, %[LineSize], #1
+
+ // STZG until the start of the next cache line.
+ orr %[Next], %[Cur], LineMask
+ 1:
+ stzg %[Cur], [%[Cur]], #16
+ cmp %[Cur], %[Next]
+ b.lt 1b
+
+ // DC GZVA cache lines until we have no more full cache lines.
+ bic %[Next], %[End], LineMask
+ .unreq LineMask
+ 2:
+ dc gzva, %[Cur]
+ add %[Cur], %[Cur], %[LineSize]
+ cmp %[Cur], %[Next]
+ b.lt 2b
+
+ // STZG until the end of the tagged region. This loop is also used to handle
+ // slow path cases.
+ 3:
+ cmp %[Cur], %[End]
+ b.ge 4f
+ stzg %[Cur], [%[Cur]], #16
+ b 3b
+
+ 4:
+ )"
+ : [Cur] "+&r"(Begin), [LineSize] "=&r"(LineSize), [Next] "=&r"(Next),
+ [Tmp] "=&r"(Tmp)
+ : [End] "r"(End)
+ : "memory");
return Begin;
}
More information about the llvm-commits
mailing list