[clang] 3859fc6 - AArch64: Switch to x20 as the shadow base register for outlined HWASan checks.

Peter Collingbourne via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 30 12:52:36 PDT 2020


Author: Peter Collingbourne
Date: 2020-10-30T12:51:30-07:00
New Revision: 3859fc653fb49f4acfb61b6f2c38736ec9bfae06

URL: https://github.com/llvm/llvm-project/commit/3859fc653fb49f4acfb61b6f2c38736ec9bfae06
DIFF: https://github.com/llvm/llvm-project/commit/3859fc653fb49f4acfb61b6f2c38736ec9bfae06.diff

LOG: AArch64: Switch to x20 as the shadow base register for outlined HWASan checks.

>From a code size perspective it turns out to be better to use a
callee-saved register to pass the shadow base. For non-leaf functions
it avoids the need to reload the shadow base into x9 after each
function call, at the cost of an additional stack slot to save the
caller's x20. But with x9 there is also a stack size cost, either
as a result of copying x9 to a callee-saved register across calls or
by spilling it to stack, so for the non-leaf functions the change to
stack usage is largely neutral.

It is also code size (and stack size) neutral for many leaf functions.
Although they now need to save/restore x20 this can typically be
combined via LDP/STP into the x30 save/restore. In the case where
the function needs callee-saved registers or stack spills we end up
needing, on average, 8 more bytes of stack and 1 more instruction
but given the improvements to other functions this seems like the
right tradeoff.

Unfortunately we cannot change the register for the v1 (non short
granules) check because the runtime assumes that the shadow base
register is stored in x9, so the v1 check still uses x9.

Aside from that there is no change to the ABI because the choice
of shadow base register is a contract between the caller and the
outlined check function, both of which are compiler generated. We do
need to rename the v2 check functions though because the functions
are deduplicated based on their names, not on their contents, and we
need to make sure that when object files from old and new compilers
are linked together we don't end up with a function that uses x9
calling an outlined check that uses x20 or vice versa.

With this change code size of /system/lib64/*.so in an Android build
with HWASan goes from 200066976 bytes to 194085912 bytes, or a 3%
decrease.

Differential Revision: https://reviews.llvm.org/D90422

Added: 
    

Modified: 
    clang/docs/HardwareAssistedAddressSanitizerDesign.rst
    compiler-rt/test/hwasan/TestCases/register-dump-read.c
    llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll

Removed: 
    


################################################################################
diff  --git a/clang/docs/HardwareAssistedAddressSanitizerDesign.rst b/clang/docs/HardwareAssistedAddressSanitizerDesign.rst
index 0e6c6902cfb3..bb612138264e 100644
--- a/clang/docs/HardwareAssistedAddressSanitizerDesign.rst
+++ b/clang/docs/HardwareAssistedAddressSanitizerDesign.rst
@@ -84,20 +84,20 @@ Currently, the following sequence is used:
   // clang -O2 --target=aarch64-linux-android30 -fsanitize=hwaddress -S -o - load.c
   [...]
   foo:
-        str     x30, [sp, #-16]!
-        adrp    x9, :got:__hwasan_shadow                // load shadow address from GOT into x9
-        ldr     x9, [x9, :got_lo12:__hwasan_shadow]
-        bl      __hwasan_check_x0_2_short               // call outlined tag check
-                                                        // (arguments: x0 = address, x9 = shadow base;
+        stp     x30, x20, [sp, #-16]!
+        adrp    x20, :got:__hwasan_shadow               // load shadow address from GOT into x20
+        ldr     x20, [x20, :got_lo12:__hwasan_shadow]
+        bl      __hwasan_check_x0_2_short_v2            // call outlined tag check
+                                                        // (arguments: x0 = address, x20 = shadow base;
                                                         // "2" encodes the access type and size)
         ldr     w0, [x0]                                // inline load
-        ldr     x30, [sp], #16
+        ldp     x30, x20, [sp], #16
         ret
 
   [...]
-  __hwasan_check_x0_2_short:
+  __hwasan_check_x0_2_short_v2:
         ubfx    x16, x0, #4, #52                        // shadow offset
-        ldrb    w16, [x9, x16]                          // load shadow tag
+        ldrb    w16, [x20, x16]                         // load shadow tag
         cmp     x16, x0, lsr #56                        // extract address tag, compare with shadow tag
         b.ne    .Ltmp0                                  // jump to short tag handler on mismatch
   .Ltmp1:

diff  --git a/compiler-rt/test/hwasan/TestCases/register-dump-read.c b/compiler-rt/test/hwasan/TestCases/register-dump-read.c
index 19bf03f5d030..8325857fa916 100644
--- a/compiler-rt/test/hwasan/TestCases/register-dump-read.c
+++ b/compiler-rt/test/hwasan/TestCases/register-dump-read.c
@@ -15,7 +15,7 @@ int main() {
   __hwasan_enable_allocator_tagging();
   char * volatile x = (char*) malloc(10);
   asm volatile("mov x10, #0x2222\n"
-               "mov x20, #0x3333\n"
+               "mov x23, #0x3333\n"
                "mov x27, #0x4444\n");
   return x[16];
 
@@ -35,8 +35,8 @@ int main() {
   // CHECK-SAME: x11{{[ ]+[0-9a-f]{16}$}}
   // CHECK-NEXT: x12{{[ ]+[0-9a-f]{16}[ ]}}x13{{[ ]+[0-9a-f]{16}[ ]}}x14{{[ ]+[0-9a-f]{16}[ ]}}x15{{[ ]+[0-9a-f]{16}$}}
   // CHECK-NEXT: x16{{[ ]+[0-9a-f]{16}[ ]}}x17{{[ ]+[0-9a-f]{16}[ ]}}x18{{[ ]+[0-9a-f]{16}[ ]}}x19{{[ ]+[0-9a-f]{16}$}}
-  // CHECK-NEXT: x20 0000000000003333
-  // CHECK-SAME: x21{{[ ]+[0-9a-f]{16}[ ]}}x22{{[ ]+[0-9a-f]{16}[ ]}}x23{{[ ]+[0-9a-f]{16}$}}
+  // CHECK-NEXT: x20{{[ ]+[0-9a-f]{16}[ ]}}x21{{[ ]+[0-9a-f]{16}[ ]}}x22{{[ ]+[0-9a-f]{16}[ ]}}
+  // CHECK-SAME: x23 0000000000003333{{$}}
   // CHECK-NEXT: x24{{[ ]+[0-9a-f]{16}[ ]}}x25{{[ ]+[0-9a-f]{16}[ ]}}x26{{[ ]+[0-9a-f]{16}[ ]}}
   // CHECK-SAME: x27 0000000000004444
   // CHECK-NEXT: x28{{[ ]+[0-9a-f]{16}[ ]}}x29{{[ ]+[0-9a-f]{16}[ ]}}x30{{[ ]+[0-9a-f]{16}$}}

diff  --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 5079494225eb..dd101db1917c 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -302,7 +302,7 @@ void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
     std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" +
                           utostr(AccessInfo);
     if (IsShort)
-      SymName += "_short";
+      SymName += "_short_v2";
     Sym = OutContext.getOrCreateSymbol(SymName);
   }
 
@@ -354,13 +354,14 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
                                      .addImm(4)
                                      .addImm(55),
                                  *STI);
-    OutStreamer->emitInstruction(MCInstBuilder(AArch64::LDRBBroX)
-                                     .addReg(AArch64::W16)
-                                     .addReg(AArch64::X9)
-                                     .addReg(AArch64::X16)
-                                     .addImm(0)
-                                     .addImm(0),
-                                 *STI);
+    OutStreamer->emitInstruction(
+        MCInstBuilder(AArch64::LDRBBroX)
+            .addReg(AArch64::W16)
+            .addReg(IsShort ? AArch64::X20 : AArch64::X9)
+            .addReg(AArch64::X16)
+            .addImm(0)
+            .addImm(0),
+        *STI);
     OutStreamer->emitInstruction(
         MCInstBuilder(AArch64::SUBSXrs)
             .addReg(AArch64::XZR)

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2691eaf38101..c69d840c3b03 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1123,9 +1123,12 @@ def HWASAN_CHECK_MEMACCESS : Pseudo<
   (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
   [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
   Sched<[]>;
+}
+
+let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
 def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
   (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
-  [(int_hwasan_check_memaccess_shortgranules X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
+  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
   Sched<[]>;
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll b/llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll
index 48c5bf68efab..4042eee6d262 100644
--- a/llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll
+++ b/llvm/test/CodeGen/AArch64/hwasan-check-memaccess.ll
@@ -18,12 +18,13 @@ define i8* @f1(i8* %x0, i8* %x1) {
 
 define i8* @f2(i8* %x0, i8* %x1) {
   ; CHECK: f2:
-  ; CHECK: str x30, [sp, #-16]!
+  ; CHECK: stp x30, x20, [sp, #-16]!
   ; CHECK-NEXT: .cfi_def_cfa_offset 16
+  ; CHECK-NEXT: .cfi_offset w20, -8
   ; CHECK-NEXT: .cfi_offset w30, -16
-  ; CHECK-NEXT: mov x9, x1
-  ; CHECK-NEXT: bl __hwasan_check_x0_2_short
-  ; CHECK-NEXT: ldr x30, [sp], #16
+  ; CHECK-NEXT: mov x20, x1
+  ; CHECK-NEXT: bl __hwasan_check_x0_2_short_v2
+  ; CHECK-NEXT: ldp x30, x20, [sp], #16
   ; CHECK-NEXT: ret
   call void @llvm.hwasan.check.memaccess.shortgranules(i8* %x1, i8* %x0, i32 2)
   ret i8* %x0
@@ -32,13 +33,13 @@ define i8* @f2(i8* %x0, i8* %x1) {
 declare void @llvm.hwasan.check.memaccess(i8*, i8*, i32)
 declare void @llvm.hwasan.check.memaccess.shortgranules(i8*, i8*, i32)
 
-; CHECK:      .section .text.hot,"axG", at progbits,__hwasan_check_x0_2_short,comdat
-; CHECK-NEXT: .type __hwasan_check_x0_2_short, at function
-; CHECK-NEXT: .weak __hwasan_check_x0_2_short
-; CHECK-NEXT: .hidden __hwasan_check_x0_2_short
-; CHECK-NEXT: __hwasan_check_x0_2_short:
+; CHECK:      .section .text.hot,"axG", at progbits,__hwasan_check_x0_2_short_v2,comdat
+; CHECK-NEXT: .type __hwasan_check_x0_2_short_v2, at function
+; CHECK-NEXT: .weak __hwasan_check_x0_2_short_v2
+; CHECK-NEXT: .hidden __hwasan_check_x0_2_short_v2
+; CHECK-NEXT: __hwasan_check_x0_2_short_v2:
 ; CHECK-NEXT: ubfx x16, x0, #4, #52
-; CHECK-NEXT: ldrb w16, [x9, x16]
+; CHECK-NEXT: ldrb w16, [x20, x16]
 ; CHECK-NEXT: cmp x16, x0, lsr #56
 ; CHECK-NEXT: b.ne .Ltmp0
 ; CHECK-NEXT: .Ltmp1:


        


More information about the cfe-commits mailing list