[llvm] [AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext. (PR #73332)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 27 10:39:50 PST 2023
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/73332
>From fe623c353dcd394368a486715a16b9fba56e00d9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 24 Nov 2023 14:27:30 +0000
Subject: [PATCH] [AArch64] Pass scratch regs as operands to
StoreSwiftAsyncContext.
Add 2 additional arguments to StoreSwiftAsyncContext to specify the
scratch registers to used, instead of hard-coding them. This fixes a
miscompile where StoreSwiftAsyncContext can clobber live registers after
it got moved during shrink-wrapping.
---
.../AArch64/AArch64ExpandPseudoInsts.cpp | 28 +-
.../Target/AArch64/AArch64FrameLowering.cpp | 30 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 5 +-
...re-swift-async-context-clobber-live-reg.ll | 456 +++++++++++++++++-
llvm/test/CodeGen/AArch64/swift-async.ll | 42 +-
llvm/unittests/Target/AArch64/InstSizes.cpp | 2 +-
6 files changed, 521 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index ac26f4d4fbe66ae..be8aed5967eb26f 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -857,6 +857,8 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
Register CtxReg = MBBI->getOperand(0).getReg();
Register BaseReg = MBBI->getOperand(1).getReg();
int Offset = MBBI->getOperand(2).getImm();
+ Register ScratchReg1 = MBBI->getOperand(3).getReg();
+ Register ScratchReg2 = MBBI->getOperand(4).getReg();
DebugLoc DL(MBBI->getDebugLoc());
auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
@@ -872,35 +874,35 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
// We need to sign the context in an address-discriminated way. 0xc31a is a
// fixed random value, chosen as part of the ABI.
- // add x16, xBase, #Offset
- // movk x16, #0xc31a, lsl #48
- // mov x17, x22/xzr
- // pacdb x17, x16
- // str x17, [xBase, #Offset]
+ // add ScratchReg1, xBase, #Offset
+ // movk ScratchReg1, #0xc31a, lsl #48
+ // mov ScratchReg2, x22/xzr
+ // pacdb ScratchReg2, ScratchReg1
+ // str ScratchReg2, [xBase, #Offset]
unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
- BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
+ BuildMI(MBB, MBBI, DL, TII->get(Opc), ScratchReg1)
.addUse(BaseReg)
.addImm(abs(Offset))
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
- .addUse(AArch64::X16)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), ScratchReg1)
+ .addUse(ScratchReg1)
.addImm(0xc31a)
.addImm(48)
.setMIFlag(MachineInstr::FrameSetup);
// We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
// move it somewhere before signing.
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), ScratchReg2)
.addUse(AArch64::XZR)
.addUse(CtxReg)
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
- .addUse(AArch64::X17)
- .addUse(AArch64::X16)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), ScratchReg2)
+ .addUse(ScratchReg2)
+ .addUse(ScratchReg1)
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
- .addUse(AArch64::X17)
+ .addUse(ScratchReg2)
.addUse(BaseReg)
.addImm(Offset / 8)
.setMIFlag(MachineInstr::FrameSetup);
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd47970bd050596..a171581773e759d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -296,7 +296,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+ unsigned FirstScratchReg = 0);
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
@@ -870,17 +871,24 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
// but we would then have to make sure that we were in fact saving at least one
// callee-save register in the prologue, which is additional complexity that
// doesn't seem worth the benefit.
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
+//
+// If \p FirstScratchReg is not 0, it specifies the register that was chosen as
+// first scratch register and indicates that it should return another scratch
+// register, if possible.
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+ unsigned FirstScratchReg) {
MachineFunction *MF = MBB->getParent();
// If MBB is an entry block, use X9 as the scratch register
- if (&MF->front() == MBB)
+ if (&MF->front() == MBB && !FirstScratchReg)
return AArch64::X9;
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
LivePhysRegs LiveRegs(TRI);
LiveRegs.addLiveIns(*MBB);
+ if (FirstScratchReg)
+ LiveRegs.addReg(FirstScratchReg);
// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
@@ -905,6 +913,17 @@ bool AArch64FrameLowering::canUseAsPrologue(
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
+
+ if (AFI->hasSwiftAsyncContext()) {
+ // Expanding StoreSwiftAsyncContext requires 2 scratch registers.
+ unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(TmpMBB);
+ unsigned SecondScratchReg =
+ findScratchNonCalleeSaveRegister(TmpMBB, FirstScratchReg);
+ if (FirstScratchReg == AArch64::NoRegister ||
+ SecondScratchReg == AArch64::NoRegister)
+ return false;
+ }
// Don't need a scratch register if we're not going to re-align the stack.
if (!RegInfo->hasStackRealignment(*MF))
@@ -1681,11 +1700,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
if (HaveInitialContext)
MBB.addLiveIn(AArch64::X22);
+ unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+ unsigned SecondScratchReg =
+ findScratchNonCalleeSaveRegister(&MBB, FirstScratchReg);
Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
.addUse(Reg)
.addUse(AArch64::SP)
.addImm(FPOffset - 8)
+ .addDef(FirstScratchReg, RegState::Implicit)
+ .addDef(SecondScratchReg, RegState::Implicit)
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
// WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0a8abfae5051dd8..75ab0ae6a41ad16 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9163,9 +9163,10 @@ def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
//-----------------------------------------------------------------------------
// This gets lowered into an instruction sequence of 20 bytes
-let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
+let mayStore = 1, isCodeGenOnly = 1, Size = 20 in
def StoreSwiftAsyncContext
- : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
+ : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset,
+ GPR64:$scratch1, GPR64sp:$scratch2),
[]>, Sched<[]>;
def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
diff --git a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
index 217fb9bbfbeb9c3..a740b903e384dad 100644
--- a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
+++ b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
@@ -1,10 +1,64 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: not --crash llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s
-; REQUIRES: asserts
+; RUN: llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x16_clobbered:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x9, sp, #8
+; CHECK-NEXT: movk x9, #49946, lsl #48
+; CHECK-NEXT: mov x1, x22
+; CHECK-NEXT: pacdb x1, x9
+; CHECK-NEXT: str x1, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x9, LJTI0_0 at PAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: add x9, x9, LJTI0_0 at PAGEOFF
+; CHECK-NEXT: Ltmp0:
+; CHECK-NEXT: adr x10, Ltmp0
+; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x10
+; CHECK-NEXT: LBB0_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB0_3
+; CHECK-NEXT: LBB0_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB0_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI0_0:
+; CHECK-NEXT: .long LBB0_3-Ltmp0
+; CHECK-NEXT: .long LBB0_1-Ltmp0
+; CHECK-NEXT: .long LBB0_1-Ltmp0
+; CHECK-NEXT: .long LBB0_2-Ltmp0
entry:
%x16 = tail call i64 asm "", "={x16}"()
%l = load i64, ptr %src, align 8
@@ -37,6 +91,61 @@ exit:
}
define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x17_clobbered:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x9, sp, #8
+; CHECK-NEXT: movk x9, #49946, lsl #48
+; CHECK-NEXT: mov x1, x22
+; CHECK-NEXT: pacdb x1, x9
+; CHECK-NEXT: str x1, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: adrp x9, LJTI1_0 at PAGE
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: add x9, x9, LJTI1_0 at PAGEOFF
+; CHECK-NEXT: Ltmp1:
+; CHECK-NEXT: adr x10, Ltmp1
+; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x10
+; CHECK-NEXT: LBB1_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB1_3
+; CHECK-NEXT: LBB1_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB1_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh2, Lloh3
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI1_0:
+; CHECK-NEXT: .long LBB1_3-Ltmp1
+; CHECK-NEXT: .long LBB1_1-Ltmp1
+; CHECK-NEXT: .long LBB1_1-Ltmp1
+; CHECK-NEXT: .long LBB1_2-Ltmp1
entry:
%x17 = tail call i64 asm "", "={x17}"()
%l = load i64, ptr %src, align 8
@@ -69,6 +178,61 @@ exit:
}
define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x1_clobbered:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x9, sp, #8
+; CHECK-NEXT: movk x9, #49946, lsl #48
+; CHECK-NEXT: mov x2, x22
+; CHECK-NEXT: pacdb x2, x9
+; CHECK-NEXT: str x2, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh4:
+; CHECK-NEXT: adrp x9, LJTI2_0 at PAGE
+; CHECK-NEXT: Lloh5:
+; CHECK-NEXT: add x9, x9, LJTI2_0 at PAGEOFF
+; CHECK-NEXT: Ltmp2:
+; CHECK-NEXT: adr x10, Ltmp2
+; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x10
+; CHECK-NEXT: LBB2_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB2_3
+; CHECK-NEXT: LBB2_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB2_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh4, Lloh5
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI2_0:
+; CHECK-NEXT: .long LBB2_3-Ltmp2
+; CHECK-NEXT: .long LBB2_1-Ltmp2
+; CHECK-NEXT: .long LBB2_1-Ltmp2
+; CHECK-NEXT: .long LBB2_2-Ltmp2
entry:
%x1 = tail call i64 asm "", "={x1}"()
%l = load i64, ptr %src, align 8
@@ -101,6 +265,65 @@ exit:
}
define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x1_x9_clobbered:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x2, sp, #8
+; CHECK-NEXT: movk x2, #49946, lsl #48
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: pacdb x3, x2
+; CHECK-NEXT: str x3, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh6:
+; CHECK-NEXT: adrp x10, LJTI3_0 at PAGE
+; CHECK-NEXT: Lloh7:
+; CHECK-NEXT: add x10, x10, LJTI3_0 at PAGEOFF
+; CHECK-NEXT: Ltmp3:
+; CHECK-NEXT: adr x11, Ltmp3
+; CHECK-NEXT: ldrsw x12, [x10, x8, lsl #2]
+; CHECK-NEXT: add x11, x11, x12
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x11
+; CHECK-NEXT: LBB3_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB3_3
+; CHECK-NEXT: LBB3_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB3_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh6, Lloh7
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI3_0:
+; CHECK-NEXT: .long LBB3_3-Ltmp3
+; CHECK-NEXT: .long LBB3_1-Ltmp3
+; CHECK-NEXT: .long LBB3_1-Ltmp3
+; CHECK-NEXT: .long LBB3_2-Ltmp3
entry:
%x1 = tail call i64 asm "", "={x1}"()
%x9 = tail call i64 asm "", "={x9}"()
@@ -136,6 +359,117 @@ exit:
; There are 2 available scratch registers left, shrink-wrapping can happen.
define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_2_available_regs_left:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x10, [x0]
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x17, sp, #8
+; CHECK-NEXT: movk x17, #49946, lsl #48
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: pacdb x20, x17
+; CHECK-NEXT: str x20, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh8:
+; CHECK-NEXT: adrp x17, LJTI4_0 at PAGE
+; CHECK-NEXT: Lloh9:
+; CHECK-NEXT: add x17, x17, LJTI4_0 at PAGEOFF
+; CHECK-NEXT: Ltmp4:
+; CHECK-NEXT: adr x0, Ltmp4
+; CHECK-NEXT: ldrsw x19, [x17, x10, lsl #2]
+; CHECK-NEXT: add x0, x0, x19
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x0
+; CHECK-NEXT: LBB4_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB4_3
+; CHECK-NEXT: LBB4_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB4_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh8, Lloh9
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI4_0:
+; CHECK-NEXT: .long LBB4_3-Ltmp4
+; CHECK-NEXT: .long LBB4_1-Ltmp4
+; CHECK-NEXT: .long LBB4_1-Ltmp4
+; CHECK-NEXT: .long LBB4_2-Ltmp4
entry:
%x1 = tail call i64 asm "", "={x1}"()
%x2 = tail call i64 asm "", "={x2}"()
@@ -198,6 +532,124 @@ exit:
; There is only 1 available scratch registers left, shrink-wrapping cannot
; happen because StoreSwiftAsyncContext needs 2 free scratch registers.
define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_1_available_reg_left:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: stp x21, x19, [sp, #8] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: add x9, sp, #24
+; CHECK-NEXT: movk x9, #49946, lsl #48
+; CHECK-NEXT: mov x1, x22
+; CHECK-NEXT: pacdb x1, x9
+; CHECK-NEXT: str x1, [sp, #24]
+; CHECK-NEXT: add x29, sp, #32
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: .cfi_offset w21, -40
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x10, [x0]
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh10:
+; CHECK-NEXT: adrp x0, LJTI5_0 at PAGE
+; CHECK-NEXT: Lloh11:
+; CHECK-NEXT: add x0, x0, LJTI5_0 at PAGEOFF
+; CHECK-NEXT: Ltmp5:
+; CHECK-NEXT: adr x21, Ltmp5
+; CHECK-NEXT: ldrsw x19, [x0, x10, lsl #2]
+; CHECK-NEXT: add x21, x21, x19
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x21
+; CHECK-NEXT: LBB5_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB5_3
+; CHECK-NEXT: LBB5_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB5_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x21, x19, [sp, #8] ; 16-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh10, Lloh11
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI5_0:
+; CHECK-NEXT: .long LBB5_3-Ltmp5
+; CHECK-NEXT: .long LBB5_1-Ltmp5
+; CHECK-NEXT: .long LBB5_1-Ltmp5
+; CHECK-NEXT: .long LBB5_2-Ltmp5
entry:
%x1 = tail call i64 asm "", "={x1}"()
%x2 = tail call i64 asm "", "={x2}"()
diff --git a/llvm/test/CodeGen/AArch64/swift-async.ll b/llvm/test/CodeGen/AArch64/swift-async.ll
index 4a3bf15b666b3fb..28d32ebf1b991ea 100644
--- a/llvm/test/CodeGen/AArch64/swift-async.ll
+++ b/llvm/test/CodeGen/AArch64/swift-async.ll
@@ -12,11 +12,11 @@ define swifttailcc void @simple(ptr swiftasync %ctx) "frame-pointer"="all" {
; CHECK: stp x29, x30, [sp, #16]
; CHECK-NOAUTH-DAG: str x22, [sp, #8]
-; CHECK-AUTH: add x16, sp, #8
-; CHECK-AUTH: movk x16, #49946, lsl #48
-; CHECK-AUTH: mov x17, x22
-; CHECK-AUTH: pacdb x17, x16
-; CHECK-AUTH: str x17, [sp, #8]
+; CHECK-AUTH: add x9, sp, #8
+; CHECK-AUTH: movk x9, #49946, lsl #48
+; CHECK-AUTH: mov x0, x22
+; CHECK-AUTH: pacdb x0, x9
+; CHECK-AUTH: str x0, [sp, #8]
; CHECK-DAG: add x29, sp, #16
; CHECK: .cfi_def_cfa w29, 16
@@ -39,11 +39,11 @@ define swifttailcc void @more_csrs(ptr swiftasync %ctx) "frame-pointer"="all" {
; CHECK: stp x29, x30, [sp, #16]
; CHECK-NOAUTH-DAG: str x22, [sp, #8]
-; CHECK-AUTH: add x16, sp, #8
-; CHECK-AUTH: movk x16, #49946, lsl #48
-; CHECK-AUTH: mov x17, x22
-; CHECK-AUTH: pacdb x17, x16
-; CHECK-AUTH: str x17, [sp, #8]
+; CHECK-AUTH: add x9, sp, #8
+; CHECK-AUTH: movk x9, #49946, lsl #48
+; CHECK-AUTH: mov x0, x22
+; CHECK-AUTH: pacdb x0, x9
+; CHECK-AUTH: str x0, [sp, #8]
; CHECK-DAG: add x29, sp, #16
; CHECK: .cfi_def_cfa w29, 16
@@ -67,11 +67,11 @@ define swifttailcc void @locals(ptr swiftasync %ctx) "frame-pointer"="all" {
; CHECK: stp x29, x30, [sp, #48]
; CHECK-NOAUTH-DAG: str x22, [sp, #40]
-; CHECK-AUTH: add x16, sp, #40
-; CHECK-AUTH: movk x16, #49946, lsl #48
-; CHECK-AUTH: mov x17, x22
-; CHECK-AUTH: pacdb x17, x16
-; CHECK-AUTH: str x17, [sp, #40]
+; CHECK-AUTH: add x9, sp, #40
+; CHECK-AUTH: movk x9, #49946, lsl #48
+; CHECK-AUTH: mov x0, x22
+; CHECK-AUTH: pacdb x0, x9
+; CHECK-AUTH: str x0, [sp, #40]
; CHECK-DAG: add x29, sp, #48
; CHECK: .cfi_def_cfa w29, 16
@@ -95,7 +95,7 @@ define swifttailcc void @use_input_context(ptr swiftasync %ctx, ptr %ptr) "frame
; CHECK-LABEL: use_input_context:
; CHECK-NOAUTH: str x22, [sp
-; CHECK-AUTH: mov x17, x22
+; CHECK-AUTH: mov x1, x22
; CHECK-NOT: x22
; CHECK: str x22, [x0]
@@ -108,11 +108,11 @@ define swifttailcc ptr @context_in_func() "frame-pointer"="non-leaf" {
; CHECK-LABEL: context_in_func:
; CHECK-NOAUTH: str xzr, [sp, #8]
-; CHECK-AUTH: add x16, sp, #8
-; CHECK-AUTH: movk x16, #49946, lsl #48
-; CHECK-AUTH: mov x17, xzr
-; CHECK-AUTH: pacdb x17, x16
-; CHECK-AUTH: str x17, [sp, #8]
+; CHECK-AUTH: add x9, sp, #8
+; CHECK-AUTH: movk x9, #49946, lsl #48
+; CHECK-AUTH: mov x0, xzr
+; CHECK-AUTH: pacdb x0, x9
+; CHECK-AUTH: str x0, [sp, #8]
%ptr = call ptr @llvm.swift.async.context.addr()
ret ptr %ptr
diff --git a/llvm/unittests/Target/AArch64/InstSizes.cpp b/llvm/unittests/Target/AArch64/InstSizes.cpp
index d7e4b4a91cdbd6b..e5bc6a67b6b73b6 100644
--- a/llvm/unittests/Target/AArch64/InstSizes.cpp
+++ b/llvm/unittests/Target/AArch64/InstSizes.cpp
@@ -196,7 +196,7 @@ TEST(InstSizes, StoreSwiftAsyncContext) {
runChecks(
TM.get(), II.get(), "",
- " StoreSwiftAsyncContext $x0, $x1, 12, implicit-def $x16, "
+ " StoreSwiftAsyncContext $x0, $x1, 12, $x2, $x3, implicit-def $x16, "
"implicit-def $x17\n",
[](AArch64InstrInfo &II, MachineFunction &MF) {
auto I = MF.begin()->begin();
More information about the llvm-commits
mailing list