[llvm] [AArch64] Prevent the AArch64LoadStoreOptimizer from reordering CFI instructions (PR #101317)
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 02:53:03 PDT 2024
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/101317
>From 1fbde71dfb6ce63e9523f42459ee6d55d5a6ab3b Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 30 Jul 2024 17:33:58 +0100
Subject: [PATCH 1/2] [AArch64] Prevent the AArch64LoadStoreOptimizer from
reordering CFI instructions
When AArch64LoadStoreOptimizer pass merges an SP update with a load/store
instruction either:
* create the merged instruction at the location of the SP update (so no CFI
instructions are moved), or
* only move a CFI instruction if the move would not reorder it across other CFI
instructions
If neither of the above is possible, don't perform the optimisation.
---
.../AArch64/AArch64LoadStoreOptimizer.cpp | 109 +++++++++++++-----
llvm/test/CodeGen/AArch64/build-one-lane.ll | 2 +-
llvm/test/CodeGen/AArch64/insertextract.ll | 28 ++---
llvm/test/CodeGen/AArch64/no-reorder-cfi.ll | 26 +++++
...treaming-mode-fixed-length-bit-counting.ll | 18 +--
...ing-mode-fixed-length-insert-vector-elt.ll | 6 +-
llvm/test/CodeGen/AArch64/vector-compress.ll | 20 ++--
7 files changed, 142 insertions(+), 67 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/no-reorder-cfi.ll
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index eec97c45aff0a3..8031f19b63239e 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -190,8 +190,12 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan backwards.
+ // `MergeEither` is set to true if the combined instruction may be placed
+ // either at the location of the load/store instruction or at the location of
+ // the update intruction.
MachineBasicBlock::iterator
- findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
+ findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
+ bool &MergeEither);
// Find an instruction that updates the base register of the ld/st
// instruction.
@@ -202,9 +206,10 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
unsigned IndexReg, unsigned &Offset);
// Merge a pre- or post-index base register update into a ld/st instruction.
- MachineBasicBlock::iterator
+ std::optional<MachineBasicBlock::iterator>
mergeUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update, bool IsPreIdx);
+ MachineBasicBlock::iterator Update, bool IsForward,
+ bool IsPreIdx, bool MergeEither);
MachineBasicBlock::iterator
mergeConstOffsetInsn(MachineBasicBlock::iterator I,
@@ -2070,20 +2075,37 @@ maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
}
}
-MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update,
- bool IsPreIdx) {
+std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
+ MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update,
+ bool IsForward, bool IsPreIdx, bool MergeEither) {
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
- // If updating the SP and the following instruction is CFA offset related CFI
- // instruction move it after the merged instruction.
- MachineBasicBlock::iterator CFI =
- IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
+ // If updating the SP and the following instruction is CFA offset related CFI,
+ // make sure the CFI follows the SP update either by merging at the location
+ // of the update or by moving the CFI after the merged instruction. If unable
+ // to do so, bail.
+ MachineBasicBlock::iterator InsertPt = I;
+ if (IsForward) {
+ assert(IsPreIdx);
+ if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
+ if (MergeEither) {
+ InsertPt = Update;
+ } else {
+ // Take care not to reorder CFIs.
+ if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
+ return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
+ }))
+ return std::nullopt;
+
+ MachineBasicBlock *MBB = InsertPt->getParent();
+ MBB->splice(std::next(InsertPt), MBB, CFI);
+ }
+ }
+ }
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
@@ -2104,7 +2126,8 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
if (!AArch64InstrInfo::isPairedLdSt(*I)) {
// Non-paired instruction.
- MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
+ TII->get(NewOpc))
.add(Update->getOperand(0))
.add(getLdStRegOp(*I))
.add(AArch64InstrInfo::getLdStBaseOp(*I))
@@ -2113,7 +2136,8 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.setMIFlags(I->mergeFlagsWith(*Update));
} else {
// Paired instruction.
- MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
+ TII->get(NewOpc))
.add(Update->getOperand(0))
.add(getLdStRegOp(*I, 0))
.add(getLdStRegOp(*I, 1))
@@ -2122,10 +2146,6 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
}
- if (CFI != E) {
- MachineBasicBlock *MBB = I->getParent();
- MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
- }
if (IsPreIdx) {
++NumPreFolded;
@@ -2360,7 +2380,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
}
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
- MachineBasicBlock::iterator I, unsigned Limit) {
+ MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr &MemMI = *I;
@@ -2370,6 +2390,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm();
+ bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
+ Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
+ IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
+ : AArch64::NoRegister};
+
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
if (MBBI == B || Offset != 0)
@@ -2377,12 +2402,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
// If the base register overlaps a destination register, we can't
// merge the update.
if (!isTagStore(MemMI)) {
- bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
- for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
- Register DestReg = getLdStRegOp(MemMI, i).getReg();
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
+ if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
return E;
- }
}
const bool BaseRegSP = BaseReg == AArch64::SP;
@@ -2403,6 +2425,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
UsedRegUnits.clear();
unsigned Count = 0;
bool MemAcessBeforeSPPreInc = false;
+ MergeEither = true;
do {
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
@@ -2429,6 +2452,20 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
if (!ModifiedRegUnits.available(BaseReg) ||
!UsedRegUnits.available(BaseReg))
return E;
+
+ // If we have a destination register (i.e. a load instruction) and a
+ // destination register is used or modified, then we can only merge forward,
+ // i.e. the combined instruction is put in the place of the memory
+ // instruction. Same applies if we see a memory access or side effects.
+ if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
+ (DestReg[0] != AArch64::NoRegister &&
+ !(ModifiedRegUnits.available(DestReg[0]) &&
+ UsedRegUnits.available(DestReg[0]))) ||
+ (DestReg[1] != AArch64::NoRegister &&
+ !(ModifiedRegUnits.available(DestReg[1]) &&
+ UsedRegUnits.available(DestReg[1]))))
+ MergeEither = false;
+
// Keep track if we have a memory access before an SP pre-increment, in this
// case we need to validate later that the update amount respects the red
// zone.
@@ -2639,8 +2676,12 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
+ /*IsPreIdx=*/false,
+ /*MergeEither=*/false)) {
+ MBBI = *NextI;
+ return true;
+ }
}
// Don't know how to handle unscaled pre/post-index versions below, so bail.
@@ -2652,11 +2693,15 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// ldr x1, [x0]
// merged into:
// ldr x1, [x0, #8]!
- Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
+ bool MergeEither;
+ Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
+ /*IsPreIdx=*/true, MergeEither)) {
+ MBBI = *NextI;
+ return true;
+ }
}
// The immediate in the load/store is scaled by the size of the memory
@@ -2673,8 +2718,12 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
+ /*IsPreIdx=*/true,
+ /*MergeEither=*/false)) {
+ MBBI = *NextI;
+ return true;
+ }
}
return false;
diff --git a/llvm/test/CodeGen/AArch64/build-one-lane.ll b/llvm/test/CodeGen/AArch64/build-one-lane.ll
index a517ca4a1bb4bc..ac37fbc349d7d7 100644
--- a/llvm/test/CodeGen/AArch64/build-one-lane.ll
+++ b/llvm/test/CodeGen/AArch64/build-one-lane.ll
@@ -318,9 +318,9 @@ define void @v2f64st(ptr %p, double %s) nounwind {
define <32 x i8> @test_lanex_32xi8(<32 x i8> %a, i32 %x) {
; CHECK-LABEL: test_lanex_32xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0x1f
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w10, #30 // =0x1e
diff --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll
index 296e267a9c7f0b..54ee693db1239f 100644
--- a/llvm/test/CodeGen/AArch64/insertextract.ll
+++ b/llvm/test/CodeGen/AArch64/insertextract.ll
@@ -160,9 +160,9 @@ entry:
define <4 x double> @insert_v4f64_c(<4 x double> %a, double %b, i32 %c) {
; CHECK-SD-LABEL: insert_v4f64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str d2, [x9, x8, lsl #3]
@@ -387,9 +387,9 @@ entry:
define <8 x float> @insert_v8f32_c(<8 x float> %a, float %b, i32 %c) {
; CHECK-SD-LABEL: insert_v8f32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str s2, [x9, x8, lsl #2]
@@ -552,9 +552,9 @@ entry:
define <16 x half> @insert_v16f16_c(<16 x half> %a, half %b, i32 %c) {
; CHECK-SD-LABEL: insert_v16f16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str h2, [x9, x8, lsl #1]
@@ -715,9 +715,9 @@ entry:
define <32 x i8> @insert_v32i8_c(<32 x i8> %a, i8 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v32i8_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x1f
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: strb w0, [x9, x8]
@@ -876,9 +876,9 @@ entry:
define <16 x i16> @insert_v16i16_c(<16 x i16> %a, i16 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v16i16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: strh w0, [x9, x8, lsl #1]
@@ -1103,9 +1103,9 @@ entry:
define <8 x i32> @insert_v8i32_c(<8 x i32> %a, i32 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v8i32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str w0, [x9, x8, lsl #2]
@@ -1288,9 +1288,9 @@ entry:
define <4 x i64> @insert_v4i64_c(<4 x i64> %a, i64 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v4i64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str x0, [x9, x8, lsl #3]
@@ -1454,9 +1454,9 @@ entry:
define double @extract_v4f64_c(<4 x double> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v4f64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr d0, [x9, x8, lsl #3]
@@ -1662,9 +1662,9 @@ entry:
define float @extract_v8f32_c(<8 x float> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v8f32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr s0, [x9, x8, lsl #2]
@@ -1821,9 +1821,9 @@ entry:
define half @extract_v16f16_c(<16 x half> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v16f16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr h0, [x9, x8, lsl #1]
@@ -1979,9 +1979,9 @@ entry:
define i8 @extract_v32i8_c(<32 x i8> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v32i8_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x1f
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldrb w0, [x9, x8]
@@ -2135,9 +2135,9 @@ entry:
define i16 @extract_v16i16_c(<16 x i16> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v16i16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldrh w0, [x9, x8, lsl #1]
@@ -2368,9 +2368,9 @@ entry:
define i32 @extract_v8i32_c(<8 x i32> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v8i32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr w0, [x9, x8, lsl #2]
@@ -2551,9 +2551,9 @@ entry:
define i64 @extract_v4i64_c(<4 x i64> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v4i64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr x0, [x9, x8, lsl #3]
diff --git a/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll b/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll
new file mode 100644
index 00000000000000..cc7acf6ddfb5ec
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -homogeneous-prolog-epilog < %s | FileCheck %s
+target triple = "aarch64-linux"
+
+declare void @g(ptr, ptr)
+
+define void @f() minsize {
+; CHECK-LABEL: f:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: str xzr, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: add x0, sp, #8
+; CHECK-NEXT: mov x1, sp
+; CHECK-NEXT: bl g
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b OUTLINED_FUNCTION_EPILOG_TAIL_x30x29
+entry:
+ %p = alloca i32, align 8
+ %n = alloca i64, align 8
+ store i64 0, ptr %n, align 8
+ call void @g(ptr %p, ptr %n)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index 2a83f13251d76d..bd49db8a4c4149 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -741,11 +741,11 @@ define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v4i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #4]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2]
; NONEON-NOSVE-NEXT: ldrb w12, [sp]
; NONEON-NOSVE-NEXT: lsr w13, w9, #1
@@ -814,10 +814,10 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v8i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -939,10 +939,10 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v16i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -1611,11 +1611,11 @@ define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v2i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
; NONEON-NOSVE-NEXT: ldrh w10, [sp]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w11, w9, #1
; NONEON-NOSVE-NEXT: lsr w12, w10, #1
; NONEON-NOSVE-NEXT: and w11, w11, #0x55555555
@@ -1657,10 +1657,10 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v4i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -1730,10 +1730,10 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v8i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -2089,10 +2089,10 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v2i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr w9, [sp, #4]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -2135,10 +2135,10 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v4i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -2366,10 +2366,10 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v2i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr x9, [sp, #8]
+; NONEON-NOSVE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
; NONEON-NOSVE-NEXT: sub x9, x9, x10
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index 3ba61c3335a64c..d77473ed8f08e5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -518,13 +518,13 @@ define <2 x half> @insertelement_v2f16(<2 x half> %op1) {
;
; NONEON-NOSVE-LABEL: insertelement_v2f16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: adrp x8, .LCPI14_0
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: ldr h0, [x8, :lo12:.LCPI14_0]
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI14_0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
-; NONEON-NOSVE-NEXT: str h0, [sp, #10]
+; NONEON-NOSVE-NEXT: ldr h0, [x8, :lo12:.LCPI14_0]
; NONEON-NOSVE-NEXT: str h1, [sp, #8]
+; NONEON-NOSVE-NEXT: str h0, [sp, #10]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vector-compress.ll b/llvm/test/CodeGen/AArch64/vector-compress.ll
index fcf5c546f2610a..710ea70d678c5b 100644
--- a/llvm/test/CodeGen/AArch64/vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/vector-compress.ll
@@ -36,37 +36,37 @@ define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) {
define <4 x i32> @test_compress_v4i32_with_passthru(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> %passthru) {
; CHECK-LABEL: test_compress_v4i32_with_passthru:
; CHECK: ; %bb.0:
+; CHECK-NEXT: str q2, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ushll.4s v1, v1, #0
; CHECK-NEXT: movi.4s v3, #1
+; CHECK-NEXT: mov x12, sp
+; CHECK-NEXT: mov x10, sp
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: mov x14, sp
+; CHECK-NEXT: mov w15, #3 ; =0x3
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: and.16b v3, v1, v3
-; CHECK-NEXT: str q2, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov.s w8, v1[1]
; CHECK-NEXT: fmov w16, s1
-; CHECK-NEXT: mov x12, sp
; CHECK-NEXT: mov.s w11, v1[2]
-; CHECK-NEXT: addv.4s s2, v3
-; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: mov.s w13, v1[3]
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mov x14, sp
+; CHECK-NEXT: addv.4s s2, v3
; CHECK-NEXT: bfi x12, x16, #2, #1
; CHECK-NEXT: and x16, x16, #0x1
-; CHECK-NEXT: mov w15, #3 ; =0x3
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: add x8, x16, x8
-; CHECK-NEXT: fmov w16, s2
; CHECK-NEXT: and x11, x11, #0x1
; CHECK-NEXT: and x13, x13, #0x1
+; CHECK-NEXT: fmov w16, s2
; CHECK-NEXT: add x11, x8, x11
; CHECK-NEXT: orr x8, x9, x8, lsl #2
; CHECK-NEXT: add x13, x11, x13
; CHECK-NEXT: bfi x14, x11, #2, #2
+; CHECK-NEXT: cmp x13, #3
; CHECK-NEXT: bfi x10, x16, #2, #2
; CHECK-NEXT: mov.s w16, v0[3]
-; CHECK-NEXT: cmp x13, #3
; CHECK-NEXT: csel x11, x13, x15, lo
; CHECK-NEXT: ldr w10, [x10]
; CHECK-NEXT: str s0, [sp]
>From 8f40d66d637382c9b6db732e8d04f0c6d6fc3e5c Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 5 Sep 2024 18:01:55 +0100
Subject: [PATCH 2/2] [fixup] Add tests
---
.../no-reorder-cfi-merge-back-load.mir | 83 ++++++++++++++++++
.../no-reorder-cfi-merge-back-store.mir | 79 +++++++++++++++++
.../AArch64/no-reorder-cfi-merge-fwd-load.mir | 86 +++++++++++++++++++
.../AArch64/no-reorder-cfi-merge-fwd.mir | 82 ++++++++++++++++++
.../AArch64/no-reorder-cfi-no-merge.mir | 86 +++++++++++++++++++
5 files changed, 416 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-back-load.mir
create mode 100644 llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-back-store.mir
create mode 100644 llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-fwd-load.mir
create mode 100644 llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-fwd.mir
create mode 100644 llvm/test/CodeGen/AArch64/no-reorder-cfi-no-merge.mir
diff --git a/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-back-load.mir b/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-back-load.mir
new file mode 100644
index 00000000000000..dcd6178fb37f7c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-back-load.mir
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc --run-pass=aarch64-ldst-opt %s -o - | FileCheck %s
+--- |
+ target triple = "aarch64-unknown-linux"
+
+ define i32 @f(i32 %x) {
+ entry:
+ ret i32 0
+ }
+
+ declare i32 @g(i32)
+
+...
+---
+name: f
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ stackSize: 32
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+ isCalleeSavedInfoValid: true
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ bb.0.entry:
+ liveins: $w0, $lr, $x1
+ ;
+ ; Test a load instruction is merged backwards into an SP update
+ ;
+ ; CHECK-LABEL: name: f
+ ; CHECK: liveins: $w0, $lr, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp, $x0 = frame-setup LDRXpre $sp, -32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ ; CHECK-NEXT: frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1)
+ ; CHECK-NEXT: frame-setup STRXui killed $x1, $sp, 1
+ ; CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
+ ; CHECK-NEXT: RET undef $lr, implicit $w0
+ $sp = frame-setup SUBXri $sp, 32, 0
+ frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ $x0 = frame-setup LDRXui $sp, 0
+ frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1)
+ frame-setup STRXui killed $x1, $sp, 1
+ frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ $fp = frame-setup ADDXri $sp, 0, 0
+ frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ frame-setup CFI_INSTRUCTION offset $w30, -8
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ $sp = frame-destroy ADDXri $sp, 32, 0
+ RET undef $lr, implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-back-store.mir b/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-back-store.mir
new file mode 100644
index 00000000000000..eb9fe8d8c11503
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-back-store.mir
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc --run-pass=aarch64-ldst-opt %s -o - | FileCheck %s
+--- |
+ target triple = "aarch64-unknown-linux"
+
+ define i32 @f(i32 %x) {
+ entry:
+ ret i32 0
+ }
+
+ declare i32 @g(i32)
+
+...
+---
+name: f
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ stackSize: 32
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+ isCalleeSavedInfoValid: true
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ bb.0.entry:
+ liveins: $w0, $lr
+ ;
+ ; Test a store instruction is merged backwards into an SP update.
+ ;
+ ; CHECK-LABEL: name: f
+ ; CHECK: liveins: $w0, $lr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -32 :: (store (s64) into %stack.1)
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ ; CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
+ ; CHECK-NEXT: RET undef $lr, implicit $w0
+ $sp = frame-setup SUBXri $sp, 32, 0
+ frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1)
+ frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ $fp = frame-setup ADDXri $sp, 0, 0
+ frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ frame-setup CFI_INSTRUCTION offset $w30, -8
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ $sp = frame-destroy ADDXri $sp, 32, 0
+ RET undef $lr, implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-fwd-load.mir b/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-fwd-load.mir
new file mode 100644
index 00000000000000..253b1c847e053b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-fwd-load.mir
@@ -0,0 +1,86 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc --run-pass=aarch64-ldst-opt %s -o - | FileCheck %s
+--- |
+ target triple = "aarch64-unknown-linux"
+
+ define i32 @f(i32 %x) {
+ entry:
+ ret i32 0
+ }
+
+ declare i32 @g(i32)
+
+...
+---
+name: f
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ stackSize: 32
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+ isCalleeSavedInfoValid: true
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ bb.0.entry:
+ liveins: $w0, $lr, $x0
+ ;
+ ; Test an SP update is merged forwards into a load instruction,
+ ; because an access to the load destination reg prevents merging backwards.
+ ;
+ ; CHECK-LABEL: name: f
+ ; CHECK: liveins: $w0, $lr, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x1 = ADDXri $x0, 1, 0
+ ; CHECK-NEXT: early-clobber $sp, $x0 = frame-setup LDRXpre $sp, -32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ ; CHECK-NEXT: frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1)
+ ; CHECK-NEXT: frame-setup STRXui killed $x1, $sp, 1
+ ; CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
+ ; CHECK-NEXT: RET undef $lr, implicit $w0
+ $sp = frame-setup SUBXri $sp, 32, 0
+ frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ $x1 = ADDXri $x0, 1, 0
+ $x0 = frame-setup LDRXui $sp, 0
+ frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1)
+ frame-setup STRXui killed $x1, $sp, 1
+ frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ $fp = frame-setup ADDXri $sp, 0, 0
+ frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ frame-setup CFI_INSTRUCTION offset $w30, -8
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ $sp = frame-destroy ADDXri $sp, 32, 0
+ RET undef $lr, implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-fwd.mir b/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-fwd.mir
new file mode 100644
index 00000000000000..e7b519572f9ae3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-reorder-cfi-merge-fwd.mir
@@ -0,0 +1,82 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc --run-pass=aarch64-ldst-opt %s -o - | FileCheck %s
+--- |
+ target triple = "aarch64-unknown-linux"
+
+ define i32 @f(i32 %x) {
+ entry:
+ ret i32 0
+ }
+
+ declare i32 @g(i32)
+
+...
+---
+name: f
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ stackSize: 32
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+ isCalleeSavedInfoValid: true
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ bb.0.entry:
+ liveins: $w0, $lr, $x1, $x2
+ ;
+ ; Test an SP update is merged forwards into a store instruction,
+ ; because a memory access prevents merging backwards.
+ ;
+ ; CHECK-LABEL: name: f
+ ; CHECK: liveins: $w0, $lr, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: frame-setup STRXui killed $x1, $x2, 0
+ ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -32 :: (store (s64) into %stack.1)
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ ; CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
+ ; CHECK-NEXT: RET undef $lr, implicit $w0
+ $sp = frame-setup SUBXri $sp, 32, 0
+ frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ frame-setup STRXui killed $x1, $x2, 0
+ frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1)
+ frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ $fp = frame-setup ADDXri $sp, 0, 0
+ frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ frame-setup CFI_INSTRUCTION offset $w30, -8
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ $sp = frame-destroy ADDXri $sp, 32, 0
+ RET undef $lr, implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/no-reorder-cfi-no-merge.mir b/llvm/test/CodeGen/AArch64/no-reorder-cfi-no-merge.mir
new file mode 100644
index 00000000000000..d26f8449be1854
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-reorder-cfi-no-merge.mir
@@ -0,0 +1,86 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc --run-pass=aarch64-ldst-opt %s -o - | FileCheck %s
+--- |
+ target triple = "aarch64-unknown-linux"
+
+ define i32 @f(i32 %x) {
+ entry:
+ ret i32 0
+ }
+
+ declare i32 @g(i32)
+
+...
+---
+name: f
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+frameInfo:
+ stackSize: 32
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+ isCalleeSavedInfoValid: true
+fixedStack: []
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ bb.0.entry:
+ liveins: $w0, $lr, $x1, $x2
+ ;
+ ; Test an SP update is can't be merged in either direction,
+ ; because a memory access prevens a backward merge and a CFI instruction
+ ; prevents a forward merge.
+ ;
+ ; CHECK-LABEL: name: f
+ ; CHECK: liveins: $w0, $lr, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 32, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ ; CHECK-NEXT: frame-setup STRXui killed $x1, $x2, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ ; CHECK-NEXT: frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1)
+ ; CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
+ ; CHECK-NEXT: renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ ; CHECK-NEXT: $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ ; CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
+ ; CHECK-NEXT: RET undef $lr, implicit $w0
+ $sp = frame-setup SUBXri $sp, 32, 0
+ frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ frame-setup STRXui killed $x1, $x2, 0
+ frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1)
+ frame-setup STRXui killed $lr, $sp, 2 :: (store (s64) into %stack.0)
+ $fp = frame-setup ADDXri $sp, 0, 0
+ frame-setup CFI_INSTRUCTION def_cfa $w29, 32
+ frame-setup CFI_INSTRUCTION offset $w30, -8
+ frame-setup CFI_INSTRUCTION offset $w29, -16
+ renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0
+ BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+ $w0 = nsw ADDWri killed renamable $w0, 1, 0
+ $fp = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
+ $lr = frame-destroy LDRXui $sp, 2 :: (load (s64) from %stack.0)
+ $sp = frame-destroy ADDXri $sp, 32, 0
+ RET undef $lr, implicit $w0
+
+...
More information about the llvm-commits
mailing list