[llvm] [AArch64] Prevent the AArch64LoadStoreOptimizer from reordering CFI instructions (PR #101317)
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 03:20:15 PDT 2024
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/101317
When AArch64LoadStoreOptimizer pass merges an SP update with a load/store
instruction and needs to adjust unwind information either:
* create the merged instruction at the location of the SP update (so no CFI
instructions are moved), or
* only move a CFI instruction if the move would not reorder it across other CFI
instructions
If neither of the above is possible, don't perform the optimisation.
>From 84ef8f8da899eb4bfbc3151e17932ebd39c33a71 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 30 Jul 2024 13:34:19 +0100
Subject: [PATCH 1/2] [AArch64] Fix incorrectly getting the destination reg of
an insn
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 ++
llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 6 +++---
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6cd9a1a817086..6902b783d7d65 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4112,6 +4112,7 @@ bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
}
const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
+ assert(MI.mayLoadOrStore() && "Load or store instruction expected");
unsigned Idx =
AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2
: 1;
@@ -4120,6 +4121,7 @@ const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
const MachineOperand &
AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
+ assert(MI.mayLoadOrStore() && "Load or store instruction expected");
unsigned Idx =
AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3
: 2;
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index d0adb78b231a7..6deea4f162db6 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1956,7 +1956,7 @@ maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
!(MI.getFlag(MachineInstr::FrameSetup) ||
MI.getFlag(MachineInstr::FrameDestroy)) ||
- AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
+ MI.getOperand(0).getReg() != AArch64::SP)
return End;
const MachineFunction &MF = *MI.getParent()->getParent();
@@ -2006,7 +2006,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
if (!AArch64InstrInfo::isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .add(getLdStRegOp(*Update))
+ .add(Update->getOperand(0))
.add(getLdStRegOp(*I))
.add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)
@@ -2015,7 +2015,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
} else {
// Paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .add(getLdStRegOp(*Update))
+ .add(Update->getOperand(0))
.add(getLdStRegOp(*I, 0))
.add(getLdStRegOp(*I, 1))
.add(AArch64InstrInfo::getLdStBaseOp(*I))
>From 011b5413c946050fd3db993ee464f60acdb033ed Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 30 Jul 2024 17:33:58 +0100
Subject: [PATCH 2/2] [AArch64] Prevent the AArch64LoadStoreOptimizer from
reordering CFI instructions
When AArch64LoadStoreOptimizer pass merges an SP update with a load/store
instruction either:
* create the merged instruction at the location of the SP update (so no CFI
instructions are moved), or
* only move a CFI instruction if the move would not reorder it across other CFI
instructions
If neither of the above is possible, don't perform the optimisation.
---
.../AArch64/AArch64LoadStoreOptimizer.cpp | 109 +++++++++++++-----
llvm/test/CodeGen/AArch64/build-one-lane.ll | 2 +-
llvm/test/CodeGen/AArch64/insertextract.ll | 28 ++---
llvm/test/CodeGen/AArch64/no-reorder-cfi.ll | 26 +++++
...treaming-mode-fixed-length-bit-counting.ll | 18 +--
...ing-mode-fixed-length-insert-vector-elt.ll | 6 +-
llvm/test/CodeGen/AArch64/vector-compress.ll | 20 ++--
7 files changed, 142 insertions(+), 67 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/no-reorder-cfi.ll
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 6deea4f162db6..e6ecfaab49a3d 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -176,8 +176,12 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan backwards.
+ // `MergeEither` is set to true if the combined instruction may be placed
+ // either at the location of the load/store instruction or at the location of
+ // the update intruction.
MachineBasicBlock::iterator
- findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
+ findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
+ bool &MergeEither);
// Find an instruction that updates the base register of the ld/st
// instruction.
@@ -185,9 +189,10 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
unsigned BaseReg, int Offset);
// Merge a pre- or post-index base register update into a ld/st instruction.
- MachineBasicBlock::iterator
+ std::optional<MachineBasicBlock::iterator>
mergeUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update, bool IsPreIdx);
+ MachineBasicBlock::iterator Update, bool IsForward,
+ bool IsPreIdx, bool MergeEither);
// Find and merge zero store instructions.
bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
@@ -1971,20 +1976,37 @@ maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
}
}
-MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update,
- bool IsPreIdx) {
+std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
+ MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update,
+ bool IsForward, bool IsPreIdx, bool MergeEither) {
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
- // If updating the SP and the following instruction is CFA offset related CFI
- // instruction move it after the merged instruction.
- MachineBasicBlock::iterator CFI =
- IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
+ // If updating the SP and the following instruction is CFA offset related CFI,
+ // make sure the CFI follows the SP update either by merging at the location
+ // of the update or by moving the CFI after the merged instruction. If unable
+ // to do so, bail.
+ MachineBasicBlock::iterator InsertPt = I;
+ if (IsForward) {
+ assert(IsPreIdx);
+ if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
+ if (MergeEither) {
+ InsertPt = Update;
+ } else {
+ // Take care not to reorder CFIs.
+ if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
+ return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
+ }))
+ return std::nullopt;
+
+ MachineBasicBlock *MBB = InsertPt->getParent();
+ MBB->splice(std::next(InsertPt), MBB, CFI);
+ }
+ }
+ }
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
@@ -2005,7 +2027,8 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
if (!AArch64InstrInfo::isPairedLdSt(*I)) {
// Non-paired instruction.
- MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
+ TII->get(NewOpc))
.add(Update->getOperand(0))
.add(getLdStRegOp(*I))
.add(AArch64InstrInfo::getLdStBaseOp(*I))
@@ -2014,7 +2037,8 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.setMIFlags(I->mergeFlagsWith(*Update));
} else {
// Paired instruction.
- MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
+ TII->get(NewOpc))
.add(Update->getOperand(0))
.add(getLdStRegOp(*I, 0))
.add(getLdStRegOp(*I, 1))
@@ -2023,10 +2047,6 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
}
- if (CFI != E) {
- MachineBasicBlock *MBB = I->getParent();
- MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
- }
if (IsPreIdx) {
++NumPreFolded;
@@ -2174,7 +2194,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
}
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
- MachineBasicBlock::iterator I, unsigned Limit) {
+ MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr &MemMI = *I;
@@ -2184,6 +2204,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm();
+ bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
+ Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
+ IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
+ : AArch64::NoRegister};
+
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
if (MBBI == B || Offset != 0)
@@ -2191,12 +2216,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
// If the base register overlaps a destination register, we can't
// merge the update.
if (!isTagStore(MemMI)) {
- bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
- for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
- Register DestReg = getLdStRegOp(MemMI, i).getReg();
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
+ if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
return E;
- }
}
const bool BaseRegSP = BaseReg == AArch64::SP;
@@ -2217,6 +2239,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
UsedRegUnits.clear();
unsigned Count = 0;
bool MemAcessBeforeSPPreInc = false;
+ MergeEither = true;
do {
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
@@ -2243,6 +2266,20 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
if (!ModifiedRegUnits.available(BaseReg) ||
!UsedRegUnits.available(BaseReg))
return E;
+
+ // If we have a destination register (i.e. a load instruction) and a
+ // destination register is used or modified, then we can only merge forward,
+ // i.e. the combined instruction is put in the place of the memory
+ // instruction. Same applies if we see a memory access or side effects.
+ if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
+ (DestReg[0] != AArch64::NoRegister &&
+ !(ModifiedRegUnits.available(DestReg[0]) &&
+ UsedRegUnits.available(DestReg[0]))) ||
+ (DestReg[1] != AArch64::NoRegister &&
+ !(ModifiedRegUnits.available(DestReg[1]) &&
+ UsedRegUnits.available(DestReg[1]))))
+ MergeEither = false;
+
// Keep track if we have a memory access before an SP pre-increment, in this
// case we need to validate later that the update amount respects the red
// zone.
@@ -2399,8 +2436,12 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
+ /*IsPreIdx=*/false,
+ /*MergeEither=*/false)) {
+ MBBI = *NextI;
+ return true;
+ }
}
// Don't know how to handle unscaled pre/post-index versions below, so bail.
@@ -2412,11 +2453,15 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// ldr x1, [x0]
// merged into:
// ldr x1, [x0, #8]!
- Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
+ bool MergeEither;
+ Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
+ /*IsPreIdx=*/true, MergeEither)) {
+ MBBI = *NextI;
+ return true;
+ }
}
// The immediate in the load/store is scaled by the size of the memory
@@ -2433,8 +2478,12 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
+ /*IsPreIdx=*/true,
+ /*MergeEither=*/false)) {
+ MBBI = *NextI;
+ return true;
+ }
}
return false;
diff --git a/llvm/test/CodeGen/AArch64/build-one-lane.ll b/llvm/test/CodeGen/AArch64/build-one-lane.ll
index a517ca4a1bb4b..ac37fbc349d7d 100644
--- a/llvm/test/CodeGen/AArch64/build-one-lane.ll
+++ b/llvm/test/CodeGen/AArch64/build-one-lane.ll
@@ -318,9 +318,9 @@ define void @v2f64st(ptr %p, double %s) nounwind {
define <32 x i8> @test_lanex_32xi8(<32 x i8> %a, i32 %x) {
; CHECK-LABEL: test_lanex_32xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0x1f
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w10, #30 // =0x1e
diff --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll
index 8b82004388b09..d1258d127d1a9 100644
--- a/llvm/test/CodeGen/AArch64/insertextract.ll
+++ b/llvm/test/CodeGen/AArch64/insertextract.ll
@@ -160,9 +160,9 @@ entry:
define <4 x double> @insert_v4f64_c(<4 x double> %a, double %b, i32 %c) {
; CHECK-SD-LABEL: insert_v4f64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str d2, [x9, x8, lsl #3]
@@ -396,9 +396,9 @@ entry:
define <8 x float> @insert_v8f32_c(<8 x float> %a, float %b, i32 %c) {
; CHECK-SD-LABEL: insert_v8f32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str s2, [x9, x8, lsl #2]
@@ -561,9 +561,9 @@ entry:
define <16 x half> @insert_v16f16_c(<16 x half> %a, half %b, i32 %c) {
; CHECK-SD-LABEL: insert_v16f16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str h2, [x9, x8, lsl #1]
@@ -724,9 +724,9 @@ entry:
define <32 x i8> @insert_v32i8_c(<32 x i8> %a, i8 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v32i8_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x1f
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: strb w0, [x9, x8]
@@ -885,9 +885,9 @@ entry:
define <16 x i16> @insert_v16i16_c(<16 x i16> %a, i16 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v16i16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: strh w0, [x9, x8, lsl #1]
@@ -1114,9 +1114,9 @@ entry:
define <8 x i32> @insert_v8i32_c(<8 x i32> %a, i32 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v8i32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str w0, [x9, x8, lsl #2]
@@ -1299,9 +1299,9 @@ entry:
define <4 x i64> @insert_v4i64_c(<4 x i64> %a, i64 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v4i64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str x0, [x9, x8, lsl #3]
@@ -1465,9 +1465,9 @@ entry:
define double @extract_v4f64_c(<4 x double> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v4f64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr d0, [x9, x8, lsl #3]
@@ -1673,9 +1673,9 @@ entry:
define float @extract_v8f32_c(<8 x float> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v8f32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr s0, [x9, x8, lsl #2]
@@ -1832,9 +1832,9 @@ entry:
define half @extract_v16f16_c(<16 x half> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v16f16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr h0, [x9, x8, lsl #1]
@@ -1990,9 +1990,9 @@ entry:
define i8 @extract_v32i8_c(<32 x i8> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v32i8_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x1f
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldrb w0, [x9, x8]
@@ -2146,9 +2146,9 @@ entry:
define i16 @extract_v16i16_c(<16 x i16> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v16i16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldrh w0, [x9, x8, lsl #1]
@@ -2379,9 +2379,9 @@ entry:
define i32 @extract_v8i32_c(<8 x i32> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v8i32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr w0, [x9, x8, lsl #2]
@@ -2562,9 +2562,9 @@ entry:
define i64 @extract_v4i64_c(<4 x i64> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v4i64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr x0, [x9, x8, lsl #3]
diff --git a/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll b/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll
new file mode 100644
index 0000000000000..cc7acf6ddfb5e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -homogeneous-prolog-epilog < %s | FileCheck %s
+target triple = "aarch64-linux"
+
+declare void @g(ptr, ptr)
+
+define void @f() minsize {
+; CHECK-LABEL: f:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: str xzr, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: add x0, sp, #8
+; CHECK-NEXT: mov x1, sp
+; CHECK-NEXT: bl g
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b OUTLINED_FUNCTION_EPILOG_TAIL_x30x29
+entry:
+ %p = alloca i32, align 8
+ %n = alloca i64, align 8
+ store i64 0, ptr %n, align 8
+ call void @g(ptr %p, ptr %n)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index 2a83f13251d76..bd49db8a4c414 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -741,11 +741,11 @@ define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v4i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #6]
; NONEON-NOSVE-NEXT: ldrb w11, [sp, #4]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2]
; NONEON-NOSVE-NEXT: ldrb w12, [sp]
; NONEON-NOSVE-NEXT: lsr w13, w9, #1
@@ -814,10 +814,10 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v8i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -939,10 +939,10 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v16i8:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -1611,11 +1611,11 @@ define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v2i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #4]
; NONEON-NOSVE-NEXT: ldrh w10, [sp]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w11, w9, #1
; NONEON-NOSVE-NEXT: lsr w12, w10, #1
; NONEON-NOSVE-NEXT: and w11, w11, #0x55555555
@@ -1657,10 +1657,10 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v4i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -1730,10 +1730,10 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v8i16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -2089,10 +2089,10 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v2i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr w9, [sp, #4]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -2135,10 +2135,10 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v4i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr w9, [sp, #12]
+; NONEON-NOSVE-NEXT: mov w8, #16843009 // =0x1010101
; NONEON-NOSVE-NEXT: lsr w10, w9, #1
; NONEON-NOSVE-NEXT: and w10, w10, #0x55555555
; NONEON-NOSVE-NEXT: sub w9, w9, w10
@@ -2366,10 +2366,10 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
;
; NONEON-NOSVE-LABEL: ctpop_v2i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr x9, [sp, #8]
+; NONEON-NOSVE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
; NONEON-NOSVE-NEXT: lsr x10, x9, #1
; NONEON-NOSVE-NEXT: and x10, x10, #0x5555555555555555
; NONEON-NOSVE-NEXT: sub x9, x9, x10
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index 3ba61c3335a64..d77473ed8f08e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -518,13 +518,13 @@ define <2 x half> @insertelement_v2f16(<2 x half> %op1) {
;
; NONEON-NOSVE-LABEL: insertelement_v2f16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: adrp x8, .LCPI14_0
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: ldr h0, [x8, :lo12:.LCPI14_0]
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI14_0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
-; NONEON-NOSVE-NEXT: str h0, [sp, #10]
+; NONEON-NOSVE-NEXT: ldr h0, [x8, :lo12:.LCPI14_0]
; NONEON-NOSVE-NEXT: str h1, [sp, #8]
+; NONEON-NOSVE-NEXT: str h0, [sp, #10]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vector-compress.ll b/llvm/test/CodeGen/AArch64/vector-compress.ll
index fcf5c546f2610..710ea70d678c5 100644
--- a/llvm/test/CodeGen/AArch64/vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/vector-compress.ll
@@ -36,37 +36,37 @@ define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) {
define <4 x i32> @test_compress_v4i32_with_passthru(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> %passthru) {
; CHECK-LABEL: test_compress_v4i32_with_passthru:
; CHECK: ; %bb.0:
+; CHECK-NEXT: str q2, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ushll.4s v1, v1, #0
; CHECK-NEXT: movi.4s v3, #1
+; CHECK-NEXT: mov x12, sp
+; CHECK-NEXT: mov x10, sp
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: mov x14, sp
+; CHECK-NEXT: mov w15, #3 ; =0x3
; CHECK-NEXT: shl.4s v1, v1, #31
; CHECK-NEXT: cmlt.4s v1, v1, #0
; CHECK-NEXT: and.16b v3, v1, v3
-; CHECK-NEXT: str q2, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov.s w8, v1[1]
; CHECK-NEXT: fmov w16, s1
-; CHECK-NEXT: mov x12, sp
; CHECK-NEXT: mov.s w11, v1[2]
-; CHECK-NEXT: addv.4s s2, v3
-; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: mov.s w13, v1[3]
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mov x14, sp
+; CHECK-NEXT: addv.4s s2, v3
; CHECK-NEXT: bfi x12, x16, #2, #1
; CHECK-NEXT: and x16, x16, #0x1
-; CHECK-NEXT: mov w15, #3 ; =0x3
; CHECK-NEXT: and x8, x8, #0x1
; CHECK-NEXT: add x8, x16, x8
-; CHECK-NEXT: fmov w16, s2
; CHECK-NEXT: and x11, x11, #0x1
; CHECK-NEXT: and x13, x13, #0x1
+; CHECK-NEXT: fmov w16, s2
; CHECK-NEXT: add x11, x8, x11
; CHECK-NEXT: orr x8, x9, x8, lsl #2
; CHECK-NEXT: add x13, x11, x13
; CHECK-NEXT: bfi x14, x11, #2, #2
+; CHECK-NEXT: cmp x13, #3
; CHECK-NEXT: bfi x10, x16, #2, #2
; CHECK-NEXT: mov.s w16, v0[3]
-; CHECK-NEXT: cmp x13, #3
; CHECK-NEXT: csel x11, x13, x15, lo
; CHECK-NEXT: ldr w10, [x10]
; CHECK-NEXT: str s0, [sp]
More information about the llvm-commits
mailing list