[llvm] [AArch64] Prevent the AArch64LoadStoreOptimizer from reordering CFI instructions (PR #101317)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 03:20:46 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Momchil Velikov (momchil-velikov)
<details>
<summary>Changes</summary>
When AArch64LoadStoreOptimizer pass merges an SP update with a load/store
instruction and needs to adjust unwind information either:
* create the merged instruction at the location of the SP update (so no CFI
instructions are moved), or
* only move a CFI instruction if the move would not reorder it across other CFI
instructions
If neither of the above is possible, don't perform the optimisation.
---
Patch is 28.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101317.diff
8 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+2)
- (modified) llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (+82-33)
- (modified) llvm/test/CodeGen/AArch64/build-one-lane.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/insertextract.ll (+14-14)
- (added) llvm/test/CodeGen/AArch64/no-reorder-cfi.ll (+26)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/vector-compress.ll (+10-10)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6cd9a1a817086..6902b783d7d65 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4112,6 +4112,7 @@ bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
}
const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
+ assert(MI.mayLoadOrStore() && "Load or store instruction expected");
unsigned Idx =
AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2
: 1;
@@ -4120,6 +4121,7 @@ const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
const MachineOperand &
AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
+ assert(MI.mayLoadOrStore() && "Load or store instruction expected");
unsigned Idx =
AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3
: 2;
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index d0adb78b231a7..e6ecfaab49a3d 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -176,8 +176,12 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan backwards.
+ // `MergeEither` is set to true if the combined instruction may be placed
+ // either at the location of the load/store instruction or at the location of
+ // the update intruction.
MachineBasicBlock::iterator
- findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
+ findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
+ bool &MergeEither);
// Find an instruction that updates the base register of the ld/st
// instruction.
@@ -185,9 +189,10 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
unsigned BaseReg, int Offset);
// Merge a pre- or post-index base register update into a ld/st instruction.
- MachineBasicBlock::iterator
+ std::optional<MachineBasicBlock::iterator>
mergeUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update, bool IsPreIdx);
+ MachineBasicBlock::iterator Update, bool IsForward,
+ bool IsPreIdx, bool MergeEither);
// Find and merge zero store instructions.
bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
@@ -1956,7 +1961,7 @@ maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
!(MI.getFlag(MachineInstr::FrameSetup) ||
MI.getFlag(MachineInstr::FrameDestroy)) ||
- AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
+ MI.getOperand(0).getReg() != AArch64::SP)
return End;
const MachineFunction &MF = *MI.getParent()->getParent();
@@ -1971,20 +1976,37 @@ maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
}
}
-MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update,
- bool IsPreIdx) {
+std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
+ MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update,
+ bool IsForward, bool IsPreIdx, bool MergeEither) {
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
- // If updating the SP and the following instruction is CFA offset related CFI
- // instruction move it after the merged instruction.
- MachineBasicBlock::iterator CFI =
- IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
+ // If updating the SP and the following instruction is CFA offset related CFI,
+ // make sure the CFI follows the SP update either by merging at the location
+ // of the update or by moving the CFI after the merged instruction. If unable
+ // to do so, bail.
+ MachineBasicBlock::iterator InsertPt = I;
+ if (IsForward) {
+ assert(IsPreIdx);
+ if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
+ if (MergeEither) {
+ InsertPt = Update;
+ } else {
+ // Take care not to reorder CFIs.
+ if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
+ return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
+ }))
+ return std::nullopt;
+
+ MachineBasicBlock *MBB = InsertPt->getParent();
+ MBB->splice(std::next(InsertPt), MBB, CFI);
+ }
+ }
+ }
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
@@ -2005,8 +2027,9 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
if (!AArch64InstrInfo::isPairedLdSt(*I)) {
// Non-paired instruction.
- MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .add(getLdStRegOp(*Update))
+ MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
+ TII->get(NewOpc))
+ .add(Update->getOperand(0))
.add(getLdStRegOp(*I))
.add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)
@@ -2014,8 +2037,9 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.setMIFlags(I->mergeFlagsWith(*Update));
} else {
// Paired instruction.
- MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .add(getLdStRegOp(*Update))
+ MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
+ TII->get(NewOpc))
+ .add(Update->getOperand(0))
.add(getLdStRegOp(*I, 0))
.add(getLdStRegOp(*I, 1))
.add(AArch64InstrInfo::getLdStBaseOp(*I))
@@ -2023,10 +2047,6 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
}
- if (CFI != E) {
- MachineBasicBlock *MBB = I->getParent();
- MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
- }
if (IsPreIdx) {
++NumPreFolded;
@@ -2174,7 +2194,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
}
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
- MachineBasicBlock::iterator I, unsigned Limit) {
+ MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr &MemMI = *I;
@@ -2184,6 +2204,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm();
+ bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
+ Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
+ IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
+ : AArch64::NoRegister};
+
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
if (MBBI == B || Offset != 0)
@@ -2191,12 +2216,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
// If the base register overlaps a destination register, we can't
// merge the update.
if (!isTagStore(MemMI)) {
- bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
- for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
- Register DestReg = getLdStRegOp(MemMI, i).getReg();
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
+ if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
return E;
- }
}
const bool BaseRegSP = BaseReg == AArch64::SP;
@@ -2217,6 +2239,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
UsedRegUnits.clear();
unsigned Count = 0;
bool MemAcessBeforeSPPreInc = false;
+ MergeEither = true;
do {
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
@@ -2243,6 +2266,20 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
if (!ModifiedRegUnits.available(BaseReg) ||
!UsedRegUnits.available(BaseReg))
return E;
+
+ // If we have a destination register (i.e. a load instruction) and a
+ // destination register is used or modified, then we can only merge forward,
+ // i.e. the combined instruction is put in the place of the memory
+ // instruction. Same applies if we see a memory access or side effects.
+ if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
+ (DestReg[0] != AArch64::NoRegister &&
+ !(ModifiedRegUnits.available(DestReg[0]) &&
+ UsedRegUnits.available(DestReg[0]))) ||
+ (DestReg[1] != AArch64::NoRegister &&
+ !(ModifiedRegUnits.available(DestReg[1]) &&
+ UsedRegUnits.available(DestReg[1]))))
+ MergeEither = false;
+
// Keep track if we have a memory access before an SP pre-increment, in this
// case we need to validate later that the update amount respects the red
// zone.
@@ -2399,8 +2436,12 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
+ /*IsPreIdx=*/false,
+ /*MergeEither=*/false)) {
+ MBBI = *NextI;
+ return true;
+ }
}
// Don't know how to handle unscaled pre/post-index versions below, so bail.
@@ -2412,11 +2453,15 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// ldr x1, [x0]
// merged into:
// ldr x1, [x0, #8]!
- Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
+ bool MergeEither;
+ Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
+ /*IsPreIdx=*/true, MergeEither)) {
+ MBBI = *NextI;
+ return true;
+ }
}
// The immediate in the load/store is scaled by the size of the memory
@@ -2433,8 +2478,12 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
- return true;
+ if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
+ /*IsPreIdx=*/true,
+ /*MergeEither=*/false)) {
+ MBBI = *NextI;
+ return true;
+ }
}
return false;
diff --git a/llvm/test/CodeGen/AArch64/build-one-lane.ll b/llvm/test/CodeGen/AArch64/build-one-lane.ll
index a517ca4a1bb4b..ac37fbc349d7d 100644
--- a/llvm/test/CodeGen/AArch64/build-one-lane.ll
+++ b/llvm/test/CodeGen/AArch64/build-one-lane.ll
@@ -318,9 +318,9 @@ define void @v2f64st(ptr %p, double %s) nounwind {
define <32 x i8> @test_lanex_32xi8(<32 x i8> %a, i32 %x) {
; CHECK-LABEL: test_lanex_32xi8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0x1f
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mov w10, #30 // =0x1e
diff --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll
index 8b82004388b09..d1258d127d1a9 100644
--- a/llvm/test/CodeGen/AArch64/insertextract.ll
+++ b/llvm/test/CodeGen/AArch64/insertextract.ll
@@ -160,9 +160,9 @@ entry:
define <4 x double> @insert_v4f64_c(<4 x double> %a, double %b, i32 %c) {
; CHECK-SD-LABEL: insert_v4f64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str d2, [x9, x8, lsl #3]
@@ -396,9 +396,9 @@ entry:
define <8 x float> @insert_v8f32_c(<8 x float> %a, float %b, i32 %c) {
; CHECK-SD-LABEL: insert_v8f32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str s2, [x9, x8, lsl #2]
@@ -561,9 +561,9 @@ entry:
define <16 x half> @insert_v16f16_c(<16 x half> %a, half %b, i32 %c) {
; CHECK-SD-LABEL: insert_v16f16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str h2, [x9, x8, lsl #1]
@@ -724,9 +724,9 @@ entry:
define <32 x i8> @insert_v32i8_c(<32 x i8> %a, i8 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v32i8_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x1f
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: strb w0, [x9, x8]
@@ -885,9 +885,9 @@ entry:
define <16 x i16> @insert_v16i16_c(<16 x i16> %a, i16 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v16i16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: strh w0, [x9, x8, lsl #1]
@@ -1114,9 +1114,9 @@ entry:
define <8 x i32> @insert_v8i32_c(<8 x i32> %a, i32 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v8i32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str w0, [x9, x8, lsl #2]
@@ -1299,9 +1299,9 @@ entry:
define <4 x i64> @insert_v4i64_c(<4 x i64> %a, i64 %b, i32 %c) {
; CHECK-SD-LABEL: insert_v4i64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT: and x8, x1, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: str x0, [x9, x8, lsl #3]
@@ -1465,9 +1465,9 @@ entry:
define double @extract_v4f64_c(<4 x double> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v4f64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr d0, [x9, x8, lsl #3]
@@ -1673,9 +1673,9 @@ entry:
define float @extract_v8f32_c(<8 x float> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v8f32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr s0, [x9, x8, lsl #2]
@@ -1832,9 +1832,9 @@ entry:
define half @extract_v16f16_c(<16 x half> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v16f16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr h0, [x9, x8, lsl #1]
@@ -1990,9 +1990,9 @@ entry:
define i8 @extract_v32i8_c(<32 x i8> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v32i8_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x1f
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldrb w0, [x9, x8]
@@ -2146,9 +2146,9 @@ entry:
define i16 @extract_v16i16_c(<16 x i16> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v16i16_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0xf
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldrh w0, [x9, x8, lsl #1]
@@ -2379,9 +2379,9 @@ entry:
define i32 @extract_v8i32_c(<8 x i32> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v8i32_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x7
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr w0, [x9, x8, lsl #2]
@@ -2562,9 +2562,9 @@ entry:
define i64 @extract_v4i64_c(<4 x i64> %a, i32 %c) {
; CHECK-SD-LABEL: extract_v4i64_c:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: and x8, x0, #0x3
; CHECK-SD-NEXT: mov x9, sp
; CHECK-SD-NEXT: ldr x0, [x9, x8, lsl #3]
diff --git a/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll b/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll
new file mode 100644
index 0000000000000..cc7acf6ddfb5e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-reorder-cfi.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -homogeneous-prolog-epilog < %s | FileCheck %s
+target triple = "aarch64-linux"
+
+declare void @g(ptr, ptr)
+
+define void @f() minsize {
+; CHECK-LABEL: f:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x3...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101317
More information about the llvm-commits
mailing list