[llvm] 2c72d90 - [AArch64-SVE]: Force generating code compatible to streaming mode.
Hassnaa Hamdi via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 14 10:47:09 PDT 2022
Author: Hassnaa Hamdi
Date: 2022-10-14T17:46:56Z
New Revision: 2c72d90ecc69e703363a2f7d4e344e0bd868f8bf
URL: https://github.com/llvm/llvm-project/commit/2c72d90ecc69e703363a2f7d4e344e0bd868f8bf
DIFF: https://github.com/llvm/llvm-project/commit/2c72d90ecc69e703363a2f7d4e344e0bd868f8bf.diff
LOG: [AArch64-SVE]: Force generating code compatible to streaming mode.
Add a compile-time flag for enabling streaming mode.
When streaming mode is enabled, lower basic loads and stores of fixed-width vectors;
to generate code that is compatible to streaming mode.
Differential Revision: https://reviews.llvm.org/D133433
Added:
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3d12cf5a0807a..7e0fb016c340d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5754,7 +5754,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::LOAD:
- if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ if (useSVEForFixedLengthVectorVT(Op.getValueType(),
+ Subtarget->forceStreamingCompatibleSVE()))
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
return LowerLOAD(Op, DAG);
case ISD::ADD:
@@ -11055,7 +11056,8 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
- if (useSVEForFixedLengthVectorVT(VT))
+ if (useSVEForFixedLengthVectorVT(VT,
+ Subtarget->forceStreamingCompatibleSVE()))
return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
// Convert shuffles that are directly supported on NEON to target-specific
@@ -11745,7 +11747,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (useSVEForFixedLengthVectorVT(VT)) {
+ if (useSVEForFixedLengthVectorVT(VT,
+ Subtarget->forceStreamingCompatibleSVE())) {
if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index fab471aa15428..cfa2fdddf0143 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -220,6 +220,8 @@ def UseNegativeImmediates
def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+def NotInStreamingSVEMode : Predicate<"!Subtarget->forceStreamingCompatibleSVE()">;
+
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
@@ -7132,16 +7134,17 @@ def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndex
// Same as above, but the first element is populated using
// scalar_to_vector + insert_subvector instead of insert_vector_elt.
-class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
- SDPatternOperator ExtLoad, Instruction LD1>
- : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
- (ResultTy (EXTRACT_SUBREG
- (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
-
-def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
-def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
-def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
+let Predicates = [NotInStreamingSVEMode] in {
+ class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
+ SDPatternOperator ExtLoad, Instruction LD1>
+ : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
+ (ResultTy (EXTRACT_SUBREG
+ (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
+ def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
+ def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
+ def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
+}
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
: Pat<(vector_insert (VTy VecListOne64:$Rd),
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index a9c76354eae33..75a0c34c6fe73 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -65,6 +65,10 @@ ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
"Should only be used for testing register allocator."),
cl::CommaSeparated, cl::Hidden);
+static cl::opt<bool>
+ ForceStreamingCompatibleSVE("force-streaming-compatible-sve",
+ cl::init(false), cl::Hidden);
+
unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
return OverrideVectorInsertExtractBaseCost;
@@ -431,3 +435,11 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
}
bool AArch64Subtarget::useAA() const { return UseAA; }
+
+bool AArch64Subtarget::forceStreamingCompatibleSVE() const {
+ if (ForceStreamingCompatibleSVE) {
+ assert((hasSVE() || hasSME()) && "Expected SVE to be available");
+ return hasSVE() || hasSME();
+ }
+ return false;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 326e5120d4ba9..427a4178670d7 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -368,10 +368,15 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
}
bool useSVEForFixedLengthVectors() const {
+ if (forceStreamingCompatibleSVE())
+ return true;
+
// Prefer NEON unless larger SVE registers are available.
return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
}
+ bool forceStreamingCompatibleSVE() const;
+
unsigned getVScaleForTuning() const { return VScaleForTuning; }
const char* getChkStkName() const {
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
new file mode 100644
index 0000000000000..ee4ef1da16af9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -0,0 +1,230 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <4 x i8> @load_v4i8(<4 x i8>* %a) #0 {
+; CHECK-LABEL: load_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h, vl4
+; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+ %load = load <4 x i8>, <4 x i8>* %a
+ ret <4 x i8> %load
+}
+
+define <8 x i8> @load_v8i8(<8 x i8>* %a) #0 {
+; CHECK-LABEL: load_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %load = load <8 x i8>, <8 x i8>* %a
+ ret <8 x i8> %load
+}
+
+define <16 x i8> @load_v16i8(<16 x i8>* %a) #0 {
+; CHECK-LABEL: load_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %load = load <16 x i8>, <16 x i8>* %a
+ ret <16 x i8> %load
+}
+
+define <32 x i8> @load_v32i8(<32 x i8>* %a) #0 {
+; CHECK-LABEL: load_v32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %load = load <32 x i8>, <32 x i8>* %a
+ ret <32 x i8> %load
+}
+
+define <2 x i16> @load_v2i16(<2 x i16>* %a) #0 {
+; CHECK-LABEL: load_v2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldrh w8, [x0, #2]
+; CHECK-NEXT: ldrh w9, [x0]
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: fmov s1, w9
+; CHECK-NEXT: zip1 z0.s, z1.s, z0.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+ %load = load <2 x i16>, <2 x i16>* %a
+ ret <2 x i16> %load
+}
+
+define <2 x half> @load_v2f16(<2 x half>* %a) #0 {
+; CHECK-LABEL: load_v2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ret
+ %load = load <2 x half>, <2 x half>* %a
+ ret <2 x half> %load
+}
+
+define <4 x i16> @load_v4i16(<4 x i16>* %a) #0 {
+; CHECK-LABEL: load_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %load = load <4 x i16>, <4 x i16>* %a
+ ret <4 x i16> %load
+}
+
+define <4 x half> @load_v4f16(<4 x half>* %a) #0 {
+; CHECK-LABEL: load_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %load = load <4 x half>, <4 x half>* %a
+ ret <4 x half> %load
+}
+
+define <8 x i16> @load_v8i16(<8 x i16>* %a) #0 {
+; CHECK-LABEL: load_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %load = load <8 x i16>, <8 x i16>* %a
+ ret <8 x i16> %load
+}
+
+define <8 x half> @load_v8f16(<8 x half>* %a) #0 {
+; CHECK-LABEL: load_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %load = load <8 x half>, <8 x half>* %a
+ ret <8 x half> %load
+}
+
+define <16 x i16> @load_v16i16(<16 x i16>* %a) #0 {
+; CHECK-LABEL: load_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %load = load <16 x i16>, <16 x i16>* %a
+ ret <16 x i16> %load
+}
+
+define <16 x half> @load_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: load_v16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %load = load <16 x half>, <16 x half>* %a
+ ret <16 x half> %load
+}
+
+define <2 x i32> @load_v2i32(<2 x i32>* %a) #0 {
+; CHECK-LABEL: load_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %load = load <2 x i32>, <2 x i32>* %a
+ ret <2 x i32> %load
+}
+
+define <2 x float> @load_v2f32(<2 x float>* %a) #0 {
+; CHECK-LABEL: load_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %load = load <2 x float>, <2 x float>* %a
+ ret <2 x float> %load
+}
+
+define <4 x i32> @load_v4i32(<4 x i32>* %a) #0 {
+; CHECK-LABEL: load_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %load = load <4 x i32>, <4 x i32>* %a
+ ret <4 x i32> %load
+}
+
+define <4 x float> @load_v4f32(<4 x float>* %a) #0 {
+; CHECK-LABEL: load_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %load = load <4 x float>, <4 x float>* %a
+ ret <4 x float> %load
+}
+
+define <8 x i32> @load_v8i32(<8 x i32>* %a) #0 {
+; CHECK-LABEL: load_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %load = load <8 x i32>, <8 x i32>* %a
+ ret <8 x i32> %load
+}
+
+define <8 x float> @load_v8f32(<8 x float>* %a) #0 {
+; CHECK-LABEL: load_v8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %load = load <8 x float>, <8 x float>* %a
+ ret <8 x float> %load
+}
+
+define <1 x i64> @load_v1i64(<1 x i64>* %a) #0 {
+; CHECK-LABEL: load_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %load = load <1 x i64>, <1 x i64>* %a
+ ret <1 x i64> %load
+}
+
+define <1 x double> @load_v1f64(<1 x double>* %a) #0 {
+; CHECK-LABEL: load_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
+ %load = load <1 x double>, <1 x double>* %a
+ ret <1 x double> %load
+}
+
+define <2 x i64> @load_v2i64(<2 x i64>* %a) #0 {
+; CHECK-LABEL: load_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %load = load <2 x i64>, <2 x i64>* %a
+ ret <2 x i64> %load
+}
+
+define <2 x double> @load_v2f64(<2 x double>* %a) #0 {
+; CHECK-LABEL: load_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %load = load <2 x double>, <2 x double>* %a
+ ret <2 x double> %load
+}
+
+define <4 x i64> @load_v4i64(<4 x i64>* %a) #0 {
+; CHECK-LABEL: load_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %load = load <4 x i64>, <4 x i64>* %a
+ ret <4 x i64> %load
+}
+
+define <4 x double> @load_v4f64(<4 x double>* %a) #0 {
+; CHECK-LABEL: load_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ret
+ %load = load <4 x double>, <4 x double>* %a
+ ret <4 x double> %load
+}
+
+
+attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
new file mode 100644
index 0000000000000..25abc60107283
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @store_v4i8(<4 x i8>* %a) #0 {
+; CHECK-LABEL: store_v4i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI0_0
+; CHECK-NEXT: ptrue p0.h, vl4
+; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT: st1b { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+ store <4 x i8> zeroinitializer, <4 x i8>* %a
+ ret void
+}
+
+define void @store_v8i8(<8 x i8>* %a) #0 {
+; CHECK-LABEL: store_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI1_0
+; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ store <8 x i8> zeroinitializer, <8 x i8>* %a
+ ret void
+}
+
+define void @store_v16i8(<16 x i8>* %a) #0 {
+; CHECK-LABEL: store_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI2_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ store <16 x i8> zeroinitializer, <16 x i8>* %a
+ ret void
+}
+
+define void @store_v32i8(<32 x i8>* %a) #0 {
+; CHECK-LABEL: store_v32i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI3_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT: stp q0, q0, [x0]
+; CHECK-NEXT: ret
+ store <32 x i8> zeroinitializer, <32 x i8>* %a
+ ret void
+}
+
+define void @store_v2i16(<2 x i16>* %a) #0 {
+; CHECK-LABEL: store_v2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI4_0
+; CHECK-NEXT: ptrue p0.s, vl2
+; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ store <2 x i16> zeroinitializer, <2 x i16>* %a
+ ret void
+}
+
+define void @store_v2f16(<2 x half>* %a) #0 {
+; CHECK-LABEL: store_v2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI5_0
+; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+ store <2 x half> zeroinitializer, <2 x half>* %a
+ ret void
+}
+
+define void @store_v4i16(<4 x i16>* %a) #0 {
+; CHECK-LABEL: store_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI6_0
+; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0]
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ store <4 x i16> zeroinitializer, <4 x i16>* %a
+ ret void
+}
+
+define void @store_v4f16(<4 x half>* %a) #0 {
+; CHECK-LABEL: store_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI7_0
+; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI7_0]
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ store <4 x half> zeroinitializer, <4 x half>* %a
+ ret void
+}
+
+define void @store_v8i16(<8 x i16>* %a) #0 {
+; CHECK-LABEL: store_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI8_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ store <8 x i16> zeroinitializer, <8 x i16>* %a
+ ret void
+}
+
+define void @store_v8f16(<8 x half>* %a) #0 {
+; CHECK-LABEL: store_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI9_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ store <8 x half> zeroinitializer, <8 x half>* %a
+ ret void
+}
+
+define void @store_v16i16(<16 x i16>* %a) #0 {
+; CHECK-LABEL: store_v16i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI10_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0]
+; CHECK-NEXT: stp q0, q0, [x0]
+; CHECK-NEXT: ret
+ store <16 x i16> zeroinitializer, <16 x i16>* %a
+ ret void
+}
+
+define void @store_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: store_v16f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI11_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT: stp q0, q0, [x0]
+; CHECK-NEXT: ret
+ store <16 x half> zeroinitializer, <16 x half>* %a
+ ret void
+}
+
+define void @store_v2i32(<2 x i32>* %a) #0 {
+; CHECK-LABEL: store_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str xzr, [x0]
+; CHECK-NEXT: ret
+ store <2 x i32> zeroinitializer, <2 x i32>* %a
+ ret void
+}
+
+define void @store_v2f32(<2 x float>* %a) #0 {
+; CHECK-LABEL: store_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str xzr, [x0]
+; CHECK-NEXT: ret
+ store <2 x float> zeroinitializer, <2 x float>* %a
+ ret void
+}
+
+define void @store_v4i32(<4 x i32>* %a) #0 {
+; CHECK-LABEL: store_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp xzr, xzr, [x0]
+; CHECK-NEXT: ret
+ store <4 x i32> zeroinitializer, <4 x i32>* %a
+ ret void
+}
+
+define void @store_v4f32(<4 x float>* %a) #0 {
+; CHECK-LABEL: store_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp xzr, xzr, [x0]
+; CHECK-NEXT: ret
+ store <4 x float> zeroinitializer, <4 x float>* %a
+ ret void
+}
+
+define void @store_v8i32(<8 x i32>* %a) #0 {
+; CHECK-LABEL: store_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI16_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT: stp q0, q0, [x0]
+; CHECK-NEXT: ret
+ store <8 x i32> zeroinitializer, <8 x i32>* %a
+ ret void
+}
+
+define void @store_v8f32(<8 x float>* %a) #0 {
+; CHECK-LABEL: store_v8f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI17_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT: stp q0, q0, [x0]
+; CHECK-NEXT: ret
+ store <8 x float> zeroinitializer, <8 x float>* %a
+ ret void
+}
+
+define void @store_v1i64(<1 x i64>* %a) #0 {
+; CHECK-LABEL: store_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, xzr
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ store <1 x i64> zeroinitializer, <1 x i64>* %a
+ ret void
+}
+
+define void @store_v1f64(<1 x double>* %a) #0 {
+; CHECK-LABEL: store_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ store <1 x double> zeroinitializer, <1 x double>* %a
+ ret void
+}
+
+define void @store_v2i64(<2 x i64>* %a) #0 {
+; CHECK-LABEL: store_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp xzr, xzr, [x0]
+; CHECK-NEXT: ret
+ store <2 x i64> zeroinitializer, <2 x i64>* %a
+ ret void
+}
+
+define void @store_v2f64(<2 x double>* %a) #0 {
+; CHECK-LABEL: store_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp xzr, xzr, [x0]
+; CHECK-NEXT: ret
+ store <2 x double> zeroinitializer, <2 x double>* %a
+ ret void
+}
+
+define void @store_v4i64(<4 x i64>* %a) #0 {
+; CHECK-LABEL: store_v4i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI22_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_0]
+; CHECK-NEXT: stp q0, q0, [x0]
+; CHECK-NEXT: ret
+ store <4 x i64> zeroinitializer, <4 x i64>* %a
+ ret void
+}
+
+define void @store_v4f64(<4 x double>* %a) #0 {
+; CHECK-LABEL: store_v4f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI23_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0]
+; CHECK-NEXT: stp q0, q0, [x0]
+; CHECK-NEXT: ret
+ store <4 x double> zeroinitializer, <4 x double>* %a
+ ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
More information about the llvm-commits
mailing list