[llvm] f914e8e - [AArch64][SME] Add coalescer barrier for args/results in locally streaming functions. (#85388)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 04:40:34 PDT 2024
Author: Sander de Smalen
Date: 2024-03-26T11:40:31Z
New Revision: f914e8e77c4703814e2f2dcef1d4569b17837c92
URL: https://github.com/llvm/llvm-project/commit/f914e8e77c4703814e2f2dcef1d4569b17837c92
DIFF: https://github.com/llvm/llvm-project/commit/f914e8e77c4703814e2f2dcef1d4569b17837c92.diff
LOG: [AArch64][SME] Add coalescer barrier for args/results in locally streaming functions. (#85388)
Similar to how we protected FP/fixed-vector arguments and results from
calls, we should do the same for arguments/results from locally-streaming
functions such that those are not spilled/filled as ZPR registers.
This may cause a small regression (additional spills/fills), which is
addressed by #85386.
Added:
llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
llvm/test/CodeGen/AArch64/sme-streaming-body.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 043a617898a69e..f552f91929201c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6895,6 +6895,11 @@ AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
return TPIDR2Obj;
}
+static bool isPassedInFPR(EVT VT) {
+ return VT.isFixedLengthVector() ||
+ (VT.isFloatingPoint() && !VT.isScalableVector());
+}
+
SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
@@ -7031,6 +7036,13 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// This will be the new Chain/Root node.
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT, Glue);
Glue = ArgValue.getValue(2);
+ if (isPassedInFPR(ArgValue.getValueType())) {
+ ArgValue =
+ DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL,
+ DAG.getVTList(ArgValue.getValueType(), MVT::Glue),
+ {ArgValue, Glue});
+ Glue = ArgValue.getValue(1);
+ }
} else
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
@@ -7402,11 +7414,6 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
}
}
-static bool isPassedInFPR(EVT VT) {
- return VT.isFixedLengthVector() ||
- (VT.isFloatingPoint() && !VT.isScalableVector());
-}
-
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue AArch64TargetLowering::LowerCallResult(
@@ -8632,6 +8639,10 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 4> RetOps(1, Chain);
for (auto &RetVal : RetVals) {
+ if (FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface() &&
+ isPassedInFPR(RetVal.second.getValueType()))
+ RetVal.second = DAG.getNode(AArch64ISD::COALESCER_BARRIER, DL,
+ RetVal.second.getValueType(), RetVal.second);
Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index b65c67e70a4e0f..2db0fa25343450 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -29,7 +29,7 @@ def AArch64_save_zt : SDNode<"AArch64ISD::SAVE_ZT", SDTypeProfile<0, 2,
[SDTCisInt<0>, SDTCisPtrTy<1>]>,
[SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
def AArch64CoalescerBarrier
- : SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, []>;
+ : SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, [SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// Instruction naming conventions.
diff --git a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
new file mode 100644
index 00000000000000..cd5046a9a64752
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mattr=+sme -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK-COALESCER-BARRIER
+; RUN: llc -mattr=+sme -stop-after=virtregrewriter < %s | FileCheck %s --check-prefix=CHECK-REGALLOC
+
+target triple = "aarch64"
+
+define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind {
+ ; CHECK-COALESCER-BARRIER-LABEL: name: dont_coalesce_args
+ ; CHECK-COALESCER-BARRIER: bb.0 (%ir-block.0):
+ ; CHECK-COALESCER-BARRIER-NEXT: liveins: $q0
+ ; CHECK-COALESCER-BARRIER-NEXT: {{ $}}
+ ; CHECK-COALESCER-BARRIER-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-COALESCER-BARRIER-NEXT: [[COALESCER_BARRIER_FPR128_:%[0-9]+]]:fpr128 = COALESCER_BARRIER_FPR128 [[COPY]]
+ ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+ ; CHECK-COALESCER-BARRIER-NEXT: [[DEF:%[0-9]+]]:zpr = IMPLICIT_DEF
+ ; CHECK-COALESCER-BARRIER-NEXT: [[INSERT_SUBREG:%[0-9]+]]:zpr = INSERT_SUBREG [[DEF]], [[COALESCER_BARRIER_FPR128_]], %subreg.zsub
+ ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-COALESCER-BARRIER-NEXT: $z0 = COPY [[INSERT_SUBREG]]
+ ; CHECK-COALESCER-BARRIER-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
+ ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+ ; CHECK-COALESCER-BARRIER-NEXT: RET_ReallyLR
+ ;
+ ; CHECK-REGALLOC-LABEL: name: dont_coalesce_args
+ ; CHECK-REGALLOC: bb.0 (%ir-block.0):
+ ; CHECK-REGALLOC-NEXT: liveins: $q0
+ ; CHECK-REGALLOC-NEXT: {{ $}}
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
+ ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
+ ; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
+ ; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+ ; CHECK-REGALLOC-NEXT: RET_ReallyLR
+ %sa = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> %a, i64 0)
+ call void @scalable_args(<vscale x 2 x i64> %sa)
+ ret void
+}
+
+define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind {
+ ; CHECK-COALESCER-BARRIER-LABEL: name: dont_coalesce_res
+ ; CHECK-COALESCER-BARRIER: bb.0 (%ir-block.0):
+ ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+ ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-COALESCER-BARRIER-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
+ ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-COALESCER-BARRIER-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0
+ ; CHECK-COALESCER-BARRIER-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY [[COPY]].zsub
+ ; CHECK-COALESCER-BARRIER-NEXT: [[COALESCER_BARRIER_FPR128_:%[0-9]+]]:fpr128 = COALESCER_BARRIER_FPR128 [[COPY1]]
+ ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $q0, implicit $vg, implicit-def $vg
+ ; CHECK-COALESCER-BARRIER-NEXT: $q0 = COPY [[COALESCER_BARRIER_FPR128_]]
+ ; CHECK-COALESCER-BARRIER-NEXT: RET_ReallyLR implicit $q0
+ ;
+ ; CHECK-REGALLOC-LABEL: name: dont_coalesce_res
+ ; CHECK-REGALLOC: bb.0 (%ir-block.0):
+ ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+ ; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-REGALLOC-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
+ ; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL renamable $q0, implicit killed $z0
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
+ ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
+ ; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+ ; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0
+ %sa = call <vscale x 2 x i64> @scalable_res()
+ %res = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %sa, i64 0)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind {
+ ; CHECK-COALESCER-BARRIER-LABEL: name: dont_coalesce_arg_that_is_also_res
+ ; CHECK-COALESCER-BARRIER: bb.0 (%ir-block.0):
+ ; CHECK-COALESCER-BARRIER-NEXT: liveins: $q0
+ ; CHECK-COALESCER-BARRIER-NEXT: {{ $}}
+ ; CHECK-COALESCER-BARRIER-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-COALESCER-BARRIER-NEXT: [[COALESCER_BARRIER_FPR128_:%[0-9]+]]:fpr128 = COALESCER_BARRIER_FPR128 [[COPY]]
+ ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+ ; CHECK-COALESCER-BARRIER-NEXT: [[DEF:%[0-9]+]]:zpr = IMPLICIT_DEF
+ ; CHECK-COALESCER-BARRIER-NEXT: [[INSERT_SUBREG:%[0-9]+]]:zpr = INSERT_SUBREG [[DEF]], [[COALESCER_BARRIER_FPR128_]], %subreg.zsub
+ ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-COALESCER-BARRIER-NEXT: $z0 = COPY [[INSERT_SUBREG]]
+ ; CHECK-COALESCER-BARRIER-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
+ ; CHECK-COALESCER-BARRIER-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-COALESCER-BARRIER-NEXT: [[COALESCER_BARRIER_FPR128_1:%[0-9]+]]:fpr128 = COALESCER_BARRIER_FPR128 [[COALESCER_BARRIER_FPR128_]]
+ ; CHECK-COALESCER-BARRIER-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $q0, implicit $vg, implicit-def $vg
+ ; CHECK-COALESCER-BARRIER-NEXT: $q0 = COPY [[COALESCER_BARRIER_FPR128_1]]
+ ; CHECK-COALESCER-BARRIER-NEXT: RET_ReallyLR implicit $q0
+ ;
+ ; CHECK-REGALLOC-LABEL: name: dont_coalesce_arg_that_is_also_res
+ ; CHECK-REGALLOC: bb.0 (%ir-block.0):
+ ; CHECK-REGALLOC-NEXT: liveins: $q0
+ ; CHECK-REGALLOC-NEXT: {{ $}}
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
+ ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0
+ ; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
+ ; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+ ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0
+ ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
+ ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
+ ; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+ ; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0
+ %sa = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> %a, i64 0)
+ call void @scalable_args(<vscale x 2 x i64> %sa)
+ ret <2 x i64> %a
+}
+
+declare void @scalable_args(<vscale x 2 x i64>) "aarch64_pstate_sm_enabled"
+declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
+
+declare <vscale x 2 x i64> @scalable_res() "aarch64_pstate_sm_enabled"
+declare <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64>, i64)
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
index d67573384ca959..6e262cc0786e41 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
@@ -8,27 +8,31 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: sm_body_sm_compatible_simple:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x8, x0, #0x1
; CHECK-NEXT: tbnz w8, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: fmov s0, wzr
+; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: tbnz w8, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: fmov s0, wzr
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
ret float zeroinitializer
}
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
index 93875549cffc86..08dec228d2f756 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
@@ -87,29 +87,27 @@ if.end:
define <2 x i64> @locally_streaming_caller_no_callee(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: locally_streaming_caller_no_callee:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: addsvl sp, sp, #-1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: add z0.d, z0.d, #41 // =0x29
-; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: smstop sm
-; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: addsvl sp, sp, #1
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
%add = add <2 x i64> %a, <i64 41, i64 42>;
More information about the llvm-commits
mailing list