[llvm] [AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext. (PR #73332)

Fri Nov 24 06:33:54 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

Add 2 additional arguments to StoreSwiftAsyncContext to specify the scratch registers to used, instead of hard-coding them. This fixes a miscompile where StoreSwiftAsyncContext can clobber live registers after it got moved during shrink-wrapping.

---

Patch is 28.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73332.diff


5 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (+15-13) 
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+25-3) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+3-2) 
- (modified) llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll (+454-1) 
- (modified) llvm/test/CodeGen/AArch64/swift-async.ll (+21-21) 


``````````diff

diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index ac26f4d4fbe66ae..be8aed5967eb26f 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -857,6 +857,8 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
   Register CtxReg = MBBI->getOperand(0).getReg();
   Register BaseReg = MBBI->getOperand(1).getReg();
   int Offset = MBBI->getOperand(2).getImm();
+  Register ScratchReg1 = MBBI->getOperand(3).getReg();
+  Register ScratchReg2 = MBBI->getOperand(4).getReg();
   DebugLoc DL(MBBI->getDebugLoc());
   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
 
@@ -872,35 +874,35 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
 
   // We need to sign the context in an address-discriminated way. 0xc31a is a
   // fixed random value, chosen as part of the ABI.
-  //     add x16, xBase, #Offset
-  //     movk x16, #0xc31a, lsl #48
-  //     mov x17, x22/xzr
-  //     pacdb x17, x16
-  //     str x17, [xBase, #Offset]
+  //     add ScratchReg1, xBase, #Offset
+  //     movk ScratchReg1, #0xc31a, lsl #48
+  //     mov ScratchReg2, x22/xzr
+  //     pacdb ScratchReg2, ScratchReg1
+  //     str ScratchReg2, [xBase, #Offset]
   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
-  BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
+  BuildMI(MBB, MBBI, DL, TII->get(Opc), ScratchReg1)
       .addUse(BaseReg)
       .addImm(abs(Offset))
       .addImm(0)
       .setMIFlag(MachineInstr::FrameSetup);
-  BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
-      .addUse(AArch64::X16)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), ScratchReg1)
+      .addUse(ScratchReg1)
       .addImm(0xc31a)
       .addImm(48)
       .setMIFlag(MachineInstr::FrameSetup);
   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
   // move it somewhere before signing.
-  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), ScratchReg2)
       .addUse(AArch64::XZR)
       .addUse(CtxReg)
       .addImm(0)
       .setMIFlag(MachineInstr::FrameSetup);
-  BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
-      .addUse(AArch64::X17)
-      .addUse(AArch64::X16)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), ScratchReg2)
+      .addUse(ScratchReg2)
+      .addUse(ScratchReg1)
       .setMIFlag(MachineInstr::FrameSetup);
   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
-      .addUse(AArch64::X17)
+      .addUse(ScratchReg2)
       .addUse(BaseReg)
       .addImm(Offset / 8)
       .setMIFlag(MachineInstr::FrameSetup);
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd47970bd050596..e1b62e774146d10 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -296,7 +296,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
 static bool produceCompactUnwindFrame(MachineFunction &MF);
 static bool needsWinCFI(const MachineFunction &MF);
 static StackOffset getSVEStackSize(const MachineFunction &MF);
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+                                                 unsigned FirstScratchReg = 0);
 
 /// Returns true if a homogeneous prolog or epilog code can be emitted
 /// for the size optimization. If possible, a frame helper call is injected.
@@ -870,17 +871,22 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
 // but we would then have to make sure that we were in fact saving at least one
 // callee-save register in the prologue, which is additional complexity that
 // doesn't seem worth the benefit.
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
+//
+// If \p FirstScratchReg is not 0, it specifies the register that was chosen as first scratch register and indicates that it should return another scratch register, if possible.
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+                                                 unsigned FirstScratchReg) {
   MachineFunction *MF = MBB->getParent();
 
   // If MBB is an entry block, use X9 as the scratch register
-  if (&MF->front() == MBB)
+  if (&MF->front() == MBB && !FirstScratchReg)
     return AArch64::X9;
 
   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
   LivePhysRegs LiveRegs(TRI);
   LiveRegs.addLiveIns(*MBB);
+  if (FirstScratchReg)
+    LiveRegs.addReg(FirstScratchReg);
 
   // Mark callee saved registers as used so we will not choose them.
   const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
@@ -905,6 +911,17 @@ bool AArch64FrameLowering::canUseAsPrologue(
   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
+
+  if (AFI->hasSwiftAsyncContext()) {
+    // Expanding StoreSwiftAsyncContext requires 2 scratch registers.
+    unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(TmpMBB);
+    unsigned SecondScratchReg =
+        findScratchNonCalleeSaveRegister(TmpMBB, FirstScratchReg);
+    if (FirstScratchReg == AArch64::NoRegister ||
+        SecondScratchReg == AArch64::NoRegister)
+      return false;
+  }
 
   // Don't need a scratch register if we're not going to re-align the stack.
   if (!RegInfo->hasStackRealignment(*MF))
@@ -1681,11 +1698,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
       if (HaveInitialContext)
         MBB.addLiveIn(AArch64::X22);
+      unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+      unsigned SecondScratchReg =
+          findScratchNonCalleeSaveRegister(&MBB, FirstScratchReg);
       Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
           .addUse(Reg)
           .addUse(AArch64::SP)
           .addImm(FPOffset - 8)
+          .addDef(FirstScratchReg, RegState::Implicit)
+          .addDef(SecondScratchReg, RegState::Implicit)
           .setMIFlags(MachineInstr::FrameSetup);
       if (NeedsWinCFI) {
         // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0a8abfae5051dd8..75ab0ae6a41ad16 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9163,9 +9163,10 @@ def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
 //-----------------------------------------------------------------------------
 
 // This gets lowered into an instruction sequence of 20 bytes
-let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
+let mayStore = 1, isCodeGenOnly = 1, Size = 20 in
 def StoreSwiftAsyncContext
-      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
+      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset,
+                            GPR64:$scratch1, GPR64sp:$scratch2),
                []>, Sched<[]>;
 
 def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
diff --git a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
index f81531766ca13bf..a740b903e384dad 100644
--- a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
+++ b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
@@ -1,9 +1,64 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: not --crash llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s
+; RUN: llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x16_clobbered:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x9, sp, #8
+; CHECK-NEXT:    movk x9, #49946, lsl #48
+; CHECK-NEXT:    mov x1, x22
+; CHECK-NEXT:    pacdb x1, x9
+; CHECK-NEXT:    str x1, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh0:
+; CHECK-NEXT:    adrp x9, LJTI0_0 at PAGE
+; CHECK-NEXT:  Lloh1:
+; CHECK-NEXT:    add x9, x9, LJTI0_0 at PAGEOFF
+; CHECK-NEXT:  Ltmp0:
+; CHECK-NEXT:    adr x10, Ltmp0
+; CHECK-NEXT:    ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT:    add x10, x10, x11
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x10
+; CHECK-NEXT:  LBB0_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB0_3
+; CHECK-NEXT:  LBB0_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB0_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:    .loh AdrpAdd Lloh0, Lloh1
+; CHECK-NEXT:    .cfi_endproc
+; CHECK-NEXT:    .section __TEXT,__const
+; CHECK-NEXT:    .p2align 2, 0x0
+; CHECK-NEXT:  LJTI0_0:
+; CHECK-NEXT:    .long LBB0_3-Ltmp0
+; CHECK-NEXT:    .long LBB0_1-Ltmp0
+; CHECK-NEXT:    .long LBB0_1-Ltmp0
+; CHECK-NEXT:    .long LBB0_2-Ltmp0
 entry:
   %x16 = tail call i64 asm "", "={x16}"()
   %l = load i64, ptr %src, align 8
@@ -36,6 +91,61 @@ exit:
 }
 
 define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x17_clobbered:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x9, sp, #8
+; CHECK-NEXT:    movk x9, #49946, lsl #48
+; CHECK-NEXT:    mov x1, x22
+; CHECK-NEXT:    pacdb x1, x9
+; CHECK-NEXT:    str x1, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh2:
+; CHECK-NEXT:    adrp x9, LJTI1_0 at PAGE
+; CHECK-NEXT:  Lloh3:
+; CHECK-NEXT:    add x9, x9, LJTI1_0 at PAGEOFF
+; CHECK-NEXT:  Ltmp1:
+; CHECK-NEXT:    adr x10, Ltmp1
+; CHECK-NEXT:    ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT:    add x10, x10, x11
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x10
+; CHECK-NEXT:  LBB1_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB1_3
+; CHECK-NEXT:  LBB1_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB1_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:    .loh AdrpAdd Lloh2, Lloh3
+; CHECK-NEXT:    .cfi_endproc
+; CHECK-NEXT:    .section __TEXT,__const
+; CHECK-NEXT:    .p2align 2, 0x0
+; CHECK-NEXT:  LJTI1_0:
+; CHECK-NEXT:    .long LBB1_3-Ltmp1
+; CHECK-NEXT:    .long LBB1_1-Ltmp1
+; CHECK-NEXT:    .long LBB1_1-Ltmp1
+; CHECK-NEXT:    .long LBB1_2-Ltmp1
 entry:
   %x17 = tail call i64 asm "", "={x17}"()
   %l = load i64, ptr %src, align 8
@@ -68,6 +178,61 @@ exit:
 }
 
 define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x1_clobbered:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x9, sp, #8
+; CHECK-NEXT:    movk x9, #49946, lsl #48
+; CHECK-NEXT:    mov x2, x22
+; CHECK-NEXT:    pacdb x2, x9
+; CHECK-NEXT:    str x2, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh4:
+; CHECK-NEXT:    adrp x9, LJTI2_0 at PAGE
+; CHECK-NEXT:  Lloh5:
+; CHECK-NEXT:    add x9, x9, LJTI2_0 at PAGEOFF
+; CHECK-NEXT:  Ltmp2:
+; CHECK-NEXT:    adr x10, Ltmp2
+; CHECK-NEXT:    ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT:    add x10, x10, x11
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x10
+; CHECK-NEXT:  LBB2_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB2_3
+; CHECK-NEXT:  LBB2_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB2_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:    .loh AdrpAdd Lloh4, Lloh5
+; CHECK-NEXT:    .cfi_endproc
+; CHECK-NEXT:    .section __TEXT,__const
+; CHECK-NEXT:    .p2align 2, 0x0
+; CHECK-NEXT:  LJTI2_0:
+; CHECK-NEXT:    .long LBB2_3-Ltmp2
+; CHECK-NEXT:    .long LBB2_1-Ltmp2
+; CHECK-NEXT:    .long LBB2_1-Ltmp2
+; CHECK-NEXT:    .long LBB2_2-Ltmp2
 entry:
   %x1 = tail call i64 asm "", "={x1}"()
   %l = load i64, ptr %src, align 8
@@ -100,6 +265,65 @@ exit:
 }
 
 define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x1_x9_clobbered:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x2, sp, #8
+; CHECK-NEXT:    movk x2, #49946, lsl #48
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    pacdb x3, x2
+; CHECK-NEXT:    str x3, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh6:
+; CHECK-NEXT:    adrp x10, LJTI3_0 at PAGE
+; CHECK-NEXT:  Lloh7:
+; CHECK-NEXT:    add x10, x10, LJTI3_0 at PAGEOFF
+; CHECK-NEXT:  Ltmp3:
+; CHECK-NEXT:    adr x11, Ltmp3
+; CHECK-NEXT:    ldrsw x12, [x10, x8, lsl #2]
+; CHECK-NEXT:    add x11, x11, x12
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x11
+; CHECK-NEXT:  LBB3_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB3_3
+; CHECK-NEXT:  LBB3_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB3_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:    .loh AdrpAdd Lloh6, Lloh7
+; CHECK-NEXT:    .cfi_endproc
+; CHECK-NEXT:    .section __TEXT,__const
+; CHECK-NEXT:    .p2align 2, 0x0
+; CHECK-NEXT:  LJTI3_0:
+; CHECK-NEXT:    .long LBB3_3-Ltmp3
+; CHECK-NEXT:    .long LBB3_1-Ltmp3
+; CHECK-NEXT:    .long LBB3_1-Ltmp3
+; CHECK-NEXT:    .long LBB3_2-Ltmp3
 entry:
   %x1 = tail call i64 asm "", "={x1}"()
   %x9 = tail call i64 asm "", "={x9}"()
@@ -135,6 +359,117 @@ exit:
 
 ; There are 2 available scratch registers left, shrink-wrapping can happen.
 define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_2_available_regs_left:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x10, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x17, sp, #8
+; CHECK-NEXT:    movk x17, #49946, lsl #48
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    pacdb x20, x17
+; CHECK-NEXT:    str x20, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh8:
+; CHECK-NEXT:    adrp x17, LJTI4_0 at PAGE
+; CHECK-NEXT:  Lloh9:
+; CHECK-NEXT:    add x17, x17, LJTI4_0 at PAGEOFF
+; CHECK-NEXT:  Ltmp4:
+; CHECK-NEXT:    adr x0, Ltmp4
+; CHECK-NEXT:    ldrsw x19, [x17, x10, lsl #2]
+; CHECK-NEXT:    add x0, x0, x19
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x0
+; CHECK-NEXT:  LBB4_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB4_3
+; CHECK-NEXT:  LBB4_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB4_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; C...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/73332