[llvm] Resolve FIXME: use RegScavenger to find the best unused register (PR #78910)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 22 11:16:43 PST 2024


https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/78910

>From 307235686bea441809dc4f7dbe0addc6413c97a9 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Sun, 21 Jan 2024 14:53:16 -0500
Subject: [PATCH] Resolve FIXME: use RegScavenger to find the best unused
 register

---
 .../AArch64/AArch64SpeculationHardening.cpp   |  17 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |   1 -
 llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp   |   4 +-
 .../AArch64/speculation-hardening-sls.mir     | 155 ++++++++++++++++++
 .../CodeGen/AArch64/speculation-hardening.mir | 115 +++++++++++++
 5 files changed, 279 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index a991d645eb6f405..78aa7a356382e57 100644
--- a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -296,17 +296,16 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
     // The RegScavenger represents registers available *after* the MI
     // instruction pointed to by RS.getCurrentPosition().
     // We need to have a register that is available *before* the MI is executed.
-    if (I == MBB.begin())
+    Register TmpReg;
+    if (I == MBB.begin()) {
       RS.enterBasicBlock(MBB);
-    else
+      // Cannot scavenge backwards as we are at the start of the basic block
+      TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
+    } else {
       RS.backward(I);
-    // FIXME: The below just finds *a* unused register. Maybe code could be
-    // optimized more if this looks for the register that isn't used for the
-    // longest time around this place, to enable more scheduling freedom. Not
-    // sure if that would actually result in a big performance difference
-    // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
-    // already to do this - but it's unclear if that could easily be used here.
-    Register TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
+      TmpReg = RS.scavengeRegisterBackwards(AArch64::GPR64commonRegClass, I,
+                                            false, 0, false);
+    }
     LLVM_DEBUG(dbgs() << "RS finds "
                       << ((TmpReg == 0) ? "no register " : "register ");
                if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index a2cacb5cbaa393a..8f9d95c6d4b1421 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1619,7 +1619,6 @@ void SIRegisterInfo::buildSpillLoadStore(
       }
       SubReg = TmpIntermediateVGPR;
     } else if (UseVGPROffset) {
-      // FIXME: change to scavengeRegisterBackwards()
       if (!TmpOffsetVGPR) {
         TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
                                                       MI, false, 0);
diff --git a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
index 3a271cb95c075d0..ff4f5b47dacbec7 100644
--- a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -165,9 +165,7 @@ bool LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if ((isSPLSOpcode(MI.getOpcode()) && !isInt<10>(Offset)) ||
       !isInt<16>(Offset)) {
     assert(RS && "Register scavenging must be on");
-    Register Reg = RS->FindUnusedReg(&Lanai::GPRRegClass);
-    if (!Reg)
-      Reg = RS->scavengeRegisterBackwards(Lanai::GPRRegClass, II, false, SPAdj);
+    Register Reg = RS->scavengeRegisterBackwards(Lanai::GPRRegClass, II, false, SPAdj);
     assert(Reg && "Register scavenger failed");
 
     bool HasNegOffset = false;
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-sls.mir b/llvm/test/CodeGen/AArch64/speculation-hardening-sls.mir
index d7a5a7e3766b44b..e1b4e6c3f904184 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening-sls.mir
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening-sls.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 # RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \
 # RUN:     -start-before aarch64-sls-hardening -o - %s \
 # RUN:     -mattr=+pauth,+harden-sls-retbr \
@@ -16,6 +17,44 @@
   @ptr_abz = private unnamed_addr constant [2 x i8*] [i8* blockaddress(@br_abz, %return), i8* blockaddress(@br_abz, %l2)], align 8
 
   define dso_local i32 @br_aa(i32 %a, i32 %b, i32 %i) {
+  ; ISBDSB-LABEL: br_aa:
+  ; ISBDSB:       // %bb.0: // %entry
+  ; ISBDSB-NEXT:    adrp x8, .Lptr_aa
+  ; ISBDSB-NEXT:    add x8, x8, :lo12:.Lptr_aa
+  ; ISBDSB-NEXT:    ldr x8, [x8, w2, sxtw #3]
+  ; ISBDSB-NEXT:    braa x8, sp
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ; ISBDSB-NEXT:  .Ltmp0: // Block address taken
+  ; ISBDSB-NEXT:  .LBB0_1: // %return
+  ; ISBDSB-NEXT:    mov w0, wzr
+  ; ISBDSB-NEXT:    ret
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ; ISBDSB-NEXT:  .Ltmp1: // Block address taken
+  ; ISBDSB-NEXT:  .LBB0_2: // %l2
+  ; ISBDSB-NEXT:    mov w0, #1 // =0x1
+  ; ISBDSB-NEXT:    ret
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ;
+  ; SB-LABEL: br_aa:
+  ; SB:       // %bb.0: // %entry
+  ; SB-NEXT:    adrp x8, .Lptr_aa
+  ; SB-NEXT:    add x8, x8, :lo12:.Lptr_aa
+  ; SB-NEXT:    ldr x8, [x8, w2, sxtw #3]
+  ; SB-NEXT:    braa x8, sp
+  ; SB-NEXT:    sb
+  ; SB-NEXT:  .Ltmp0: // Block address taken
+  ; SB-NEXT:  .LBB0_1: // %return
+  ; SB-NEXT:    mov w0, wzr
+  ; SB-NEXT:    ret
+  ; SB-NEXT:    sb
+  ; SB-NEXT:  .Ltmp1: // Block address taken
+  ; SB-NEXT:  .LBB0_2: // %l2
+  ; SB-NEXT:    mov w0, #1 // =0x1
+  ; SB-NEXT:    ret
+  ; SB-NEXT:    sb
   entry:
     br label %l2
   l2:
@@ -24,6 +63,44 @@
     ret i32 undef
   }
   define dso_local i32 @br_aaz(i32 %a, i32 %b, i32 %i) {
+  ; ISBDSB-LABEL: br_aaz:
+  ; ISBDSB:       // %bb.0: // %entry
+  ; ISBDSB-NEXT:    adrp x8, .Lptr_aaz
+  ; ISBDSB-NEXT:    add x8, x8, :lo12:.Lptr_aaz
+  ; ISBDSB-NEXT:    ldr x8, [x8, w2, sxtw #3]
+  ; ISBDSB-NEXT:    braaz x8
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ; ISBDSB-NEXT:  .Ltmp2: // Block address taken
+  ; ISBDSB-NEXT:  .LBB1_1: // %return
+  ; ISBDSB-NEXT:    mov w0, wzr
+  ; ISBDSB-NEXT:    ret
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ; ISBDSB-NEXT:  .Ltmp3: // Block address taken
+  ; ISBDSB-NEXT:  .LBB1_2: // %l2
+  ; ISBDSB-NEXT:    mov w0, #1 // =0x1
+  ; ISBDSB-NEXT:    ret
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ;
+  ; SB-LABEL: br_aaz:
+  ; SB:       // %bb.0: // %entry
+  ; SB-NEXT:    adrp x8, .Lptr_aaz
+  ; SB-NEXT:    add x8, x8, :lo12:.Lptr_aaz
+  ; SB-NEXT:    ldr x8, [x8, w2, sxtw #3]
+  ; SB-NEXT:    braaz x8
+  ; SB-NEXT:    sb
+  ; SB-NEXT:  .Ltmp2: // Block address taken
+  ; SB-NEXT:  .LBB1_1: // %return
+  ; SB-NEXT:    mov w0, wzr
+  ; SB-NEXT:    ret
+  ; SB-NEXT:    sb
+  ; SB-NEXT:  .Ltmp3: // Block address taken
+  ; SB-NEXT:  .LBB1_2: // %l2
+  ; SB-NEXT:    mov w0, #1 // =0x1
+  ; SB-NEXT:    ret
+  ; SB-NEXT:    sb
   entry:
     br label %l2
   l2:
@@ -32,6 +109,44 @@
     ret i32 undef
   }
   define dso_local i32 @br_ab(i32 %a, i32 %b, i32 %i) {
+  ; ISBDSB-LABEL: br_ab:
+  ; ISBDSB:       // %bb.0: // %entry
+  ; ISBDSB-NEXT:    adrp x8, .Lptr_ab
+  ; ISBDSB-NEXT:    add x8, x8, :lo12:.Lptr_ab
+  ; ISBDSB-NEXT:    ldr x8, [x8, w2, sxtw #3]
+  ; ISBDSB-NEXT:    braa x8, sp
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ; ISBDSB-NEXT:  .Ltmp4: // Block address taken
+  ; ISBDSB-NEXT:  .LBB2_1: // %return
+  ; ISBDSB-NEXT:    mov w0, wzr
+  ; ISBDSB-NEXT:    ret
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ; ISBDSB-NEXT:  .Ltmp5: // Block address taken
+  ; ISBDSB-NEXT:  .LBB2_2: // %l2
+  ; ISBDSB-NEXT:    mov w0, #1 // =0x1
+  ; ISBDSB-NEXT:    ret
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ;
+  ; SB-LABEL: br_ab:
+  ; SB:       // %bb.0: // %entry
+  ; SB-NEXT:    adrp x8, .Lptr_ab
+  ; SB-NEXT:    add x8, x8, :lo12:.Lptr_ab
+  ; SB-NEXT:    ldr x8, [x8, w2, sxtw #3]
+  ; SB-NEXT:    braa x8, sp
+  ; SB-NEXT:    sb
+  ; SB-NEXT:  .Ltmp4: // Block address taken
+  ; SB-NEXT:  .LBB2_1: // %return
+  ; SB-NEXT:    mov w0, wzr
+  ; SB-NEXT:    ret
+  ; SB-NEXT:    sb
+  ; SB-NEXT:  .Ltmp5: // Block address taken
+  ; SB-NEXT:  .LBB2_2: // %l2
+  ; SB-NEXT:    mov w0, #1 // =0x1
+  ; SB-NEXT:    ret
+  ; SB-NEXT:    sb
   entry:
     br label %l2
   l2:
@@ -40,6 +155,44 @@
     ret i32 undef
   }
   define dso_local i32 @br_abz(i32 %a, i32 %b, i32 %i) {
+  ; ISBDSB-LABEL: br_abz:
+  ; ISBDSB:       // %bb.0: // %entry
+  ; ISBDSB-NEXT:    adrp x8, .Lptr_abz
+  ; ISBDSB-NEXT:    add x8, x8, :lo12:.Lptr_abz
+  ; ISBDSB-NEXT:    ldr x8, [x8, w2, sxtw #3]
+  ; ISBDSB-NEXT:    braaz x8
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ; ISBDSB-NEXT:  .Ltmp6: // Block address taken
+  ; ISBDSB-NEXT:  .LBB3_1: // %return
+  ; ISBDSB-NEXT:    mov w0, wzr
+  ; ISBDSB-NEXT:    ret
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ; ISBDSB-NEXT:  .Ltmp7: // Block address taken
+  ; ISBDSB-NEXT:  .LBB3_2: // %l2
+  ; ISBDSB-NEXT:    mov w0, #1 // =0x1
+  ; ISBDSB-NEXT:    ret
+  ; ISBDSB-NEXT:    dsb sy
+  ; ISBDSB-NEXT:    isb
+  ;
+  ; SB-LABEL: br_abz:
+  ; SB:       // %bb.0: // %entry
+  ; SB-NEXT:    adrp x8, .Lptr_abz
+  ; SB-NEXT:    add x8, x8, :lo12:.Lptr_abz
+  ; SB-NEXT:    ldr x8, [x8, w2, sxtw #3]
+  ; SB-NEXT:    braaz x8
+  ; SB-NEXT:    sb
+  ; SB-NEXT:  .Ltmp6: // Block address taken
+  ; SB-NEXT:  .LBB3_1: // %return
+  ; SB-NEXT:    mov w0, wzr
+  ; SB-NEXT:    ret
+  ; SB-NEXT:    sb
+  ; SB-NEXT:  .Ltmp7: // Block address taken
+  ; SB-NEXT:  .LBB3_2: // %l2
+  ; SB-NEXT:    mov w0, #1 // =0x1
+  ; SB-NEXT:    ret
+  ; SB-NEXT:    sb
   entry:
     br label %l2
   l2:
@@ -148,3 +301,5 @@ body:             |
     $w0 = ORRWrs $wzr, $wzr, 0
     RET undef $lr, implicit $w0
 ...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.mir b/llvm/test/CodeGen/AArch64/speculation-hardening.mir
index 407ef2e0e905bb5..b02ec14783cb2b8 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening.mir
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 # RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \
 # RUN:     -start-before aarch64-speculation-hardening -o - %s \
 # RUN:   | FileCheck %s
@@ -11,34 +12,148 @@
 # - other direct branches don't seem to be generated by the AArch64 codegen
 --- |
   define void @nobranch_fallthrough(i32 %a, i32 %b) speculative_load_hardening {
+  ; CHECK-LABEL: nobranch_fallthrough:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:  // %bb.1:
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    ret
    ret void
   }
   define void @uncondbranch(i32 %a, i32 %b) speculative_load_hardening {
+  ; CHECK-LABEL: uncondbranch:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:  // %bb.1:
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    ret
    ret void
   }
   define void @condbranch_fallthrough(i32 %a, i32 %b) speculative_load_hardening {
+  ; CHECK-LABEL: condbranch_fallthrough:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    cmp w0, w1
+  ; CHECK-NEXT:    b.lt .LBB2_2
+  ; CHECK-NEXT:  // %bb.1:
+  ; CHECK-NEXT:    csel x16, x16, xzr, ge
+  ; CHECK-NEXT:    b .LBB2_3
+  ; CHECK-NEXT:  .LBB2_2:
+  ; CHECK-NEXT:    csel x16, x16, xzr, lt
+  ; CHECK-NEXT:  .LBB2_3:
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    ret
    ret void
   }
   define void @condbranch_uncondbranch(i32 %a, i32 %b) speculative_load_hardening {
+  ; CHECK-LABEL: condbranch_uncondbranch:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    cmp w0, w1
+  ; CHECK-NEXT:    b.lt .LBB3_2
+  ; CHECK-NEXT:  // %bb.1:
+  ; CHECK-NEXT:    csel x16, x16, xzr, ge
+  ; CHECK-NEXT:    b .LBB3_3
+  ; CHECK-NEXT:  .LBB3_2:
+  ; CHECK-NEXT:    csel x16, x16, xzr, lt
+  ; CHECK-NEXT:  .LBB3_3:
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    ret
    ret void
   }
   define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening {
+  ; CHECK-LABEL: indirectbranch:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    br x0
+  ; CHECK-NEXT:  .LBB4_1:
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    ret
+  ; CHECK-NEXT:  .LBB4_2:
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    ret
    ret void
   }
   ; Also check that a non-default temporary register gets picked correctly to
   ; transfer the SP to to and it with the taint register when the default
   ; temporary isn't available.
   define void @indirect_call_x17(i32 %a, i32 %b) speculative_load_hardening {
+  ; CHECK-LABEL: indirect_call_x17:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    mov x0, sp
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    and x0, x0, x16
+  ; CHECK-NEXT:    mov sp, x0
+  ; CHECK-NEXT:    blr x17
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    mov x0, sp
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    and x0, x0, x16
+  ; CHECK-NEXT:    mov sp, x0
+  ; CHECK-NEXT:    ret
    ret void
   }
   @g = common dso_local local_unnamed_addr global i64 (...)* null, align 8
   define void @indirect_tailcall_x17(i32 %a, i32 %b) speculative_load_hardening {
+  ; CHECK-LABEL: indirect_tailcall_x17:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    adrp x8, g
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    ldr x17, [x8, :lo12:g]
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    and x17, x17, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    csdb
+  ; CHECK-NEXT:    br x17
    ret void
   }
   define void @indirect_call_lr(i32 %a, i32 %b) speculative_load_hardening {
+  ; CHECK-LABEL: indirect_call_lr:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    blr x30
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    mov x1, sp
+  ; CHECK-NEXT:    add w0, w0, #1
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    and x1, x1, x16
+  ; CHECK-NEXT:    mov sp, x1
+  ; CHECK-NEXT:    ret
    ret void
   }
   define void @RS_cannot_find_available_regs() speculative_load_hardening {
+  ; CHECK-LABEL: RS_cannot_find_available_regs:
+  ; CHECK:       // %bb.0:
+  ; CHECK-NEXT:    dsb sy
+  ; CHECK-NEXT:    isb
+  ; CHECK-NEXT:    ldr x0, [x0]
+  ; CHECK-NEXT:    cmp sp, #0
+  ; CHECK-NEXT:    csetm x16, ne
+  ; CHECK-NEXT:    ret
    ret void
   }
 ...



More information about the llvm-commits mailing list