[llvm] c3d5886 - [AArch64][SME] Create new pass to remove COALESCER_BARRIER early. (#85386)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 15 07:07:24 PDT 2024
Author: Sander de Smalen
Date: 2024-04-15T15:07:20+01:00
New Revision: c3d58867bd3da85958dbbb46c4a22c0d3c60af66
URL: https://github.com/llvm/llvm-project/commit/c3d58867bd3da85958dbbb46c4a22c0d3c60af66
DIFF: https://github.com/llvm/llvm-project/commit/c3d58867bd3da85958dbbb46c4a22c0d3c60af66.diff
LOG: [AArch64][SME] Create new pass to remove COALESCER_BARRIER early. (#85386)
The purpose of the COALESCER_BARRIER pseudo node is to prevent the
register coalescer from coalescing certain COPY instructions around
smstart/smstop instructions, so that we spill only the (required) FPR
register rather than the encompassing ZPR register.
The pseudos are removed in the AArch64ExpandPseudo pass. However,
because the node itself is a _use_ of a register, this occassionally
leads to redundant spills/fills, because the register allocator thinks
the virtual register is actually used before an smstart/smstop
instruction, causing it to be filled, at which points it requires
immediate spilling again to ensure it stays live over the smstart/smstop
instruction.
We can avoid that by removing the pseudo nodes right after coalescing,
but before register allocation.
Added:
llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
Modified:
llvm/lib/Target/AArch64/AArch64.h
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/lib/Target/AArch64/CMakeLists.txt
llvm/test/CodeGen/AArch64/O3-pipeline.ll
llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
llvm/test/CodeGen/AArch64/sme-streaming-body.ll
llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index f7d81f42ef5d8e..b70fbe42fe5fc6 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -54,6 +54,7 @@ FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64PointerAuthPass();
FunctionPass *createAArch64BranchTargetsPass();
FunctionPass *createAArch64MIPeepholeOptPass();
+FunctionPass *createAArch64PostCoalescerPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
@@ -93,6 +94,7 @@ void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
+void initializeAArch64PostCoalescerPass(PassRegistry &);
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
void initializeAArch64PostSelectOptimizePass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
new file mode 100644
index 00000000000000..dd5234c4504d3c
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
@@ -0,0 +1,101 @@
+//===- AArch64PostCoalescerPass.cpp - AArch64 Post Coalescer pass ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-post-coalescer-pass"
+
+namespace {
+
+struct AArch64PostCoalescer : public MachineFunctionPass {
+ static char ID;
+
+ AArch64PostCoalescer() : MachineFunctionPass(ID) {
+ initializeAArch64PostCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ LiveIntervals *LIS;
+ MachineRegisterInfo *MRI;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "AArch64 Post Coalescer pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+char AArch64PostCoalescer::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
+ "AArch64 Post Coalescer Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(AArch64PostCoalescer, "aarch64-post-coalescer-pass",
+ "AArch64 Post Coalescer Pass", false, false)
+
+bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ if (!FuncInfo->hasStreamingModeChanges())
+ return false;
+
+ MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : make_early_inc_range(MBB)) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::COALESCER_BARRIER_FPR16:
+ case AArch64::COALESCER_BARRIER_FPR32:
+ case AArch64::COALESCER_BARRIER_FPR64:
+ case AArch64::COALESCER_BARRIER_FPR128: {
+ Register Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ if (Src != Dst)
+ MRI->replaceRegWith(Dst, Src);
+
+ // MI must be erased from the basic block before recalculating the live
+ // interval.
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+
+ LIS->removeInterval(Src);
+ LIS->createAndComputeVirtRegInterval(Src);
+
+ Changed = true;
+ break;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64PostCoalescerPass() {
+ return new AArch64PostCoalescer();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7d4a57d792a191..7ef78cbba352a5 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -235,6 +235,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PointerAuthPass(*PR);
+ initializeAArch64PostCoalescerPass(*PR);
initializeAArch64PostLegalizerCombinerPass(*PR);
initializeAArch64PostLegalizerLoweringPass(*PR);
initializeAArch64PostSelectOptimizePass(*PR);
@@ -539,6 +540,7 @@ class AArch64PassConfig : public TargetPassConfig {
void addPreEmitPass() override;
void addPostBBSections() override;
void addPreEmitPass2() override;
+ bool addRegAssignAndRewriteOptimized() override;
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
@@ -876,6 +878,11 @@ void AArch64PassConfig::addPreEmitPass2() {
addPass(createUnpackMachineBundles(nullptr));
}
+bool AArch64PassConfig::addRegAssignAndRewriteOptimized() {
+ addPass(createAArch64PostCoalescerPass());
+ return TargetPassConfig::addRegAssignAndRewriteOptimized();
+}
+
MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 95b228f293204e..8e76f6c9279e73 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -73,6 +73,7 @@ add_llvm_target(AArch64CodeGen
AArch64MIPeepholeOpt.cpp
AArch64MCInstLower.cpp
AArch64PointerAuth.cpp
+ AArch64PostCoalescerPass.cpp
AArch64PromoteConstant.cpp
AArch64PBQPRegAlloc.cpp
AArch64RegisterInfo.cpp
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index eee9a27c90c19e..d3c8e3b7e805c1 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -167,6 +167,7 @@
; CHECK-NEXT: Register Coalescer
; CHECK-NEXT: Rename Disconnected Subregister Components
; CHECK-NEXT: Machine Instruction Scheduler
+; CHECK-NEXT: AArch64 Post Coalescer pass
; CHECK-NEXT: Machine Block Frequency Analysis
; CHECK-NEXT: Debug Variable Analysis
; CHECK-NEXT: Live Stack Slot Analysis
diff --git a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
index cd5046a9a64752..8e3866fcec89a7 100644
--- a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
+++ b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
@@ -25,8 +25,7 @@ define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind
; CHECK-REGALLOC: bb.0 (%ir-block.0):
; CHECK-REGALLOC-NEXT: liveins: $q0
; CHECK-REGALLOC-NEXT: {{ $}}
- ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
- ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
@@ -61,7 +60,6 @@ define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind {
; CHECK-REGALLOC-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL renamable $q0, implicit killed $z0
- ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
@@ -94,17 +92,13 @@ define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_psta
; CHECK-REGALLOC: bb.0 (%ir-block.0):
; CHECK-REGALLOC-NEXT: liveins: $q0
; CHECK-REGALLOC-NEXT: {{ $}}
- ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
- ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
- ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
- ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
- ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0
diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
index dba3227459b906..07377195d62a0b 100644
--- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
+++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll
@@ -52,9 +52,7 @@ define void @streaming_compatible_arg(float %f) #0 {
; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: tbz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index cd348be5d771d1..254e37e836cbb9 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -331,9 +331,9 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
-; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
+; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
-; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-COMMON-NEXT: bl __addtf3
; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
@@ -392,9 +392,9 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
-; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill
+; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
-; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload
+; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
@@ -422,9 +422,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl __arm_sme_state
-; CHECK-COMMON-NEXT: ldp s2, s0, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: and x19, x0, #0x1
-; CHECK-COMMON-NEXT: stp s2, s0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
index d5bea725b6d14d..0c674c5685e000 100644
--- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
+++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
@@ -1085,7 +1085,6 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1116,7 +1115,6 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1151,7 +1149,6 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1183,7 +1180,6 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1215,7 +1211,6 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1247,7 +1242,6 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1311,7 +1305,6 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
@@ -1343,7 +1336,6 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 {
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
index 6e262cc0786e41..d67573384ca959 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
@@ -8,31 +8,27 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: sm_body_sm_compatible_simple:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
-; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: tbnz w8, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: fmov s0, wzr
-; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: tbnz w8, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: fmov s0, wzr
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
ret float zeroinitializer
}
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
index 08dec228d2f756..cd6d45f54eb9a9 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
@@ -247,15 +247,11 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl cos
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index 6d2abf7e18419a..47b24290d3c85f 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -469,11 +469,7 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
; CHECK-NEXT: mov x9, x0
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: ldp s4, s0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: and x19, x0, #0x1
-; CHECK-NEXT: stp s4, s0, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: ldp d4, d0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: stp d4, d0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: tbz w19, #0, .LBB10_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstop sm
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
index de676ac5e0d2e6..465fb4667af694 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll
@@ -405,11 +405,11 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
-; CHECK-NEXT: stp s1, s0, [sp, #24] // 8-byte Folded Spill
-; CHECK-NEXT: stp d3, d2, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
-; CHECK-NEXT: ldp s1, s0, [sp, #24] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d3, d2, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: bl bar
; CHECK-NEXT: smstart sm
; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
index cf171f8ef5ed3a..45ca7844b06551 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
@@ -22,9 +22,9 @@ define void @test_no_stackslot_scavenging(float %f) #0 {
; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
-; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: bl use_f
More information about the llvm-commits
mailing list