[llvm] adbb46e - [VE] Support load/store vm regsiters
Kazushi Marukawa via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 30 16:25:30 PDT 2022
Author: Kazushi (Jam) Marukawa
Date: 2022-07-01T08:25:24+09:00
New Revision: adbb46ea651cb44b6d1661ab31389db41bd1f314
URL: https://github.com/llvm/llvm-project/commit/adbb46ea651cb44b6d1661ab31389db41bd1f314
DIFF: https://github.com/llvm/llvm-project/commit/adbb46ea651cb44b6d1661ab31389db41bd1f314.diff
LOG: [VE] Support load/store vm regsiters
Support load/store vm registers to memory location as a first step.
As a next step, support load/store vm registers to stack location.
This patch also adds several regression tests for not only load/store
vm registers but also missing load/store for vr registers.
Reviewed By: efocht
Differential Revision: https://reviews.llvm.org/D128610
Added:
llvm/test/CodeGen/VE/Vector/loadvm.ll
llvm/test/CodeGen/VE/Vector/loadvr.ll
llvm/test/CodeGen/VE/Vector/storevm.ll
llvm/test/CodeGen/VE/Vector/storevr.ll
Modified:
llvm/lib/Target/VE/VEISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index d9fa3b8d58a0d..63d48971c292a 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -343,6 +343,13 @@ void VETargetLowering::initVPUActions() {
for (unsigned IntRedOpc : IntReductionOCs)
setOperationAction(IntRedOpc, VT, Custom);
}
+
+ // v256i1 and v512i1 ops
+ for (MVT MaskVT : AllMaskVTs) {
+ // Custom lower mask ops
+ setOperationAction(ISD::STORE, MaskVT, Custom);
+ setOperationAction(ISD::LOAD, MaskVT, Custom);
+ }
}
SDValue
@@ -1339,6 +1346,72 @@ static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues(Ops, DL);
}
+// Lower a vXi1 load into following instructions
+// LDrii %1, (,%addr)
+// LVMxir %vm, 0, %1
+// LDrii %2, 8(,%addr)
+// LVMxir %vm, 0, %2
+// ...
+static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
+ assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
+
+ SDValue BasePtr = LdNode->getBasePtr();
+ unsigned Alignment = LdNode->getAlign().value();
+ if (Alignment > 8)
+ Alignment = 8;
+
+ EVT AddrVT = BasePtr.getValueType();
+ EVT MemVT = LdNode->getMemoryVT();
+ if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
+ SDValue OutChains[4];
+ SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
+ for (int i = 0; i < 4; ++i) {
+ // Generate load dag and prepare chains.
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
+ DAG.getConstant(8 * i, DL, AddrVT));
+ SDValue Val =
+ DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
+ LdNode->getPointerInfo(), Alignment,
+ LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ OutChains[i] = SDValue(Val.getNode(), 1);
+
+ VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
+ DAG.getTargetConstant(i, DL, MVT::i64), Val,
+ SDValue(VM, 0));
+ }
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ SDValue Ops[2] = {SDValue(VM, 0), OutChain};
+ return DAG.getMergeValues(Ops, DL);
+ } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
+ SDValue OutChains[8];
+ SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
+ for (int i = 0; i < 8; ++i) {
+ // Generate load dag and prepare chains.
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
+ DAG.getConstant(8 * i, DL, AddrVT));
+ SDValue Val =
+ DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
+ LdNode->getPointerInfo(), Alignment,
+ LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ OutChains[i] = SDValue(Val.getNode(), 1);
+
+ VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
+ DAG.getTargetConstant(i, DL, MVT::i64), Val,
+ SDValue(VM, 0));
+ }
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ SDValue Ops[2] = {SDValue(VM, 0), OutChain};
+ return DAG.getMergeValues(Ops, DL);
+ } else {
+ // Otherwise, ask llvm to expand it.
+ return SDValue();
+ }
+}
+
SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
@@ -1357,6 +1430,8 @@ SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (MemVT == MVT::f128)
return lowerLoadF128(Op, DAG);
+ if (isMaskType(MemVT))
+ return lowerLoadI1(Op, DAG);
return Op;
}
@@ -1397,11 +1472,64 @@ static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
}
+// Lower a vXi1 store into following instructions
+// SVMi %1, %vm, 0
+// STrii %1, (,%addr)
+// SVMi %2, %vm, 1
+// STrii %2, 8(,%addr)
+// ...
+static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
+ assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+
+ SDValue BasePtr = StNode->getBasePtr();
+ unsigned Alignment = StNode->getAlign().value();
+ if (Alignment > 8)
+ Alignment = 8;
+ EVT AddrVT = BasePtr.getValueType();
+ EVT MemVT = StNode->getMemoryVT();
+ if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
+ SDValue OutChains[4];
+ for (int i = 0; i < 4; ++i) {
+ SDNode *V =
+ DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
+ DAG.getTargetConstant(i, DL, MVT::i64));
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
+ DAG.getConstant(8 * i, DL, AddrVT));
+ OutChains[i] =
+ DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
+ MachinePointerInfo(), Alignment,
+ StNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
+ SDValue OutChains[8];
+ for (int i = 0; i < 8; ++i) {
+ SDNode *V =
+ DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
+ DAG.getTargetConstant(i, DL, MVT::i64));
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
+ DAG.getConstant(8 * i, DL, AddrVT));
+ OutChains[i] =
+ DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
+ MachinePointerInfo(), Alignment,
+ StNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ } else {
+ // Otherwise, ask llvm to expand it.
+ return SDValue();
+ }
+}
+
SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
- // always expand non-mask vector loads to VVP
+ // always expand non-mask vector loads to VVP
EVT MemVT = StNode->getMemoryVT();
if (MemVT.isVector() && !isMaskType(MemVT))
return lowerToVVP(Op, DAG);
@@ -1415,6 +1543,8 @@ SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (MemVT == MVT::f128)
return lowerStoreF128(Op, DAG);
+ if (isMaskType(MemVT))
+ return lowerStoreI1(Op, DAG);
// Otherwise, ask llvm to expand it.
return SDValue();
diff --git a/llvm/test/CodeGen/VE/Vector/loadvm.ll b/llvm/test/CodeGen/VE/Vector/loadvm.ll
new file mode 100644
index 0000000000000..8d82821fc09b2
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/loadvm.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+ at v256i1 = common dso_local local_unnamed_addr global <256 x i1> zeroinitializer, align 4
+ at v512i1 = common dso_local local_unnamed_addr global <512 x i1> zeroinitializer, align 4
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <256 x i1> @loadv256i1(<256 x i1>* nocapture readonly %mp) {
+; CHECK-LABEL: loadv256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld %s1, (, %s0)
+; CHECK-NEXT: ld %s2, 8(, %s0)
+; CHECK-NEXT: ld %s3, 16(, %s0)
+; CHECK-NEXT: ld %s0, 24(, %s0)
+; CHECK-NEXT: lvm %vm1, 0, %s1
+; CHECK-NEXT: lvm %vm1, 1, %s2
+; CHECK-NEXT: lvm %vm1, 2, %s3
+; CHECK-NEXT: lvm %vm1, 3, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %m = load <256 x i1>, <256 x i1>* %mp, align 16
+ ret <256 x i1> %m
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <256 x i1> @loadv256i1com() {
+; CHECK-LABEL: loadv256i1com:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, v256i1 at lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s0, v256i1 at hi(, %s0)
+; CHECK-NEXT: ld %s1, (, %s0)
+; CHECK-NEXT: ld %s2, 8(, %s0)
+; CHECK-NEXT: ld %s3, 16(, %s0)
+; CHECK-NEXT: ld %s0, 24(, %s0)
+; CHECK-NEXT: lvm %vm1, 0, %s1
+; CHECK-NEXT: lvm %vm1, 1, %s2
+; CHECK-NEXT: lvm %vm1, 2, %s3
+; CHECK-NEXT: lvm %vm1, 3, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %m = load <256 x i1>, <256 x i1>* @v256i1, align 16
+ ret <256 x i1> %m
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <512 x i1> @loadv512i1(<512 x i1>* nocapture readonly %mp) {
+; CHECK-LABEL: loadv512i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld %s1, (, %s0)
+; CHECK-NEXT: ld %s2, 8(, %s0)
+; CHECK-NEXT: ld %s3, 16(, %s0)
+; CHECK-NEXT: ld %s4, 24(, %s0)
+; CHECK-NEXT: lvm %vm3, 0, %s1
+; CHECK-NEXT: lvm %vm3, 1, %s2
+; CHECK-NEXT: lvm %vm3, 2, %s3
+; CHECK-NEXT: lvm %vm3, 3, %s4
+; CHECK-NEXT: ld %s1, 32(, %s0)
+; CHECK-NEXT: ld %s2, 40(, %s0)
+; CHECK-NEXT: ld %s3, 48(, %s0)
+; CHECK-NEXT: ld %s0, 56(, %s0)
+; CHECK-NEXT: lvm %vm2, 0, %s1
+; CHECK-NEXT: lvm %vm2, 1, %s2
+; CHECK-NEXT: lvm %vm2, 2, %s3
+; CHECK-NEXT: lvm %vm2, 3, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %m = load <512 x i1>, <512 x i1>* %mp, align 16
+ ret <512 x i1> %m
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <512 x i1> @loadv512i1com() {
+; CHECK-LABEL: loadv512i1com:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, v512i1 at lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s0, v512i1 at hi(, %s0)
+; CHECK-NEXT: ld %s1, (, %s0)
+; CHECK-NEXT: ld %s2, 8(, %s0)
+; CHECK-NEXT: ld %s3, 16(, %s0)
+; CHECK-NEXT: ld %s4, 24(, %s0)
+; CHECK-NEXT: lvm %vm3, 0, %s1
+; CHECK-NEXT: lvm %vm3, 1, %s2
+; CHECK-NEXT: lvm %vm3, 2, %s3
+; CHECK-NEXT: lvm %vm3, 3, %s4
+; CHECK-NEXT: ld %s1, 32(, %s0)
+; CHECK-NEXT: ld %s2, 40(, %s0)
+; CHECK-NEXT: ld %s3, 48(, %s0)
+; CHECK-NEXT: ld %s0, 56(, %s0)
+; CHECK-NEXT: lvm %vm2, 0, %s1
+; CHECK-NEXT: lvm %vm2, 1, %s2
+; CHECK-NEXT: lvm %vm2, 2, %s3
+; CHECK-NEXT: lvm %vm2, 3, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %m = load <512 x i1>, <512 x i1>* @v512i1, align 16
+ ret <512 x i1> %m
+}
+
diff --git a/llvm/test/CodeGen/VE/Vector/loadvr.ll b/llvm/test/CodeGen/VE/Vector/loadvr.ll
new file mode 100644
index 0000000000000..d26e014027edf
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/loadvr.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+ at v256i64 = common dso_local local_unnamed_addr global <256 x i64> zeroinitializer, align 16
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <256 x i64> @loadv256i64(<256 x i64>* nocapture readonly) {
+; CHECK-LABEL: loadv256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vld %v0, 8, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %2 = load <256 x i64>, <256 x i64>* %0, align 16
+ ret <256 x i64> %2
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <256 x double> @loadv256f64(<256 x double>* nocapture readonly) {
+; CHECK-LABEL: loadv256f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vld %v0, 8, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %2 = load <256 x double>, <256 x double>* %0, align 16
+ ret <256 x double> %2
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <256 x i32> @loadv256i32(<256 x i32>* nocapture readonly) {
+; CHECK-LABEL: loadv256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vldl.zx %v0, 4, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %2 = load <256 x i32>, <256 x i32>* %0, align 16
+ ret <256 x i32> %2
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <256 x float> @loadv256f32(<256 x float>* nocapture readonly) {
+; CHECK-LABEL: loadv256f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vldu %v0, 4, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %2 = load <256 x float>, <256 x float>* %0, align 16
+ ret <256 x float> %2
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <256 x i64> @loadv256i64stk() {
+; CHECK-LABEL: loadv256i64stk:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s11, -2048(, %s11)
+; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: ld %s61, 24(, %s14)
+; CHECK-NEXT: or %s62, 0, %s0
+; CHECK-NEXT: lea %s63, 315
+; CHECK-NEXT: shm.l %s63, (%s61)
+; CHECK-NEXT: shm.l %s8, 8(%s61)
+; CHECK-NEXT: shm.l %s11, 16(%s61)
+; CHECK-NEXT: monc
+; CHECK-NEXT: or %s0, 0, %s62
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lea %s1, (, %s11)
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vld %v0, 8, %s1
+; CHECK-NEXT: lea %s11, 2048(, %s11)
+; CHECK-NEXT: b.l.t (, %s10)
+ %addr = alloca <256 x i64>, align 16
+ %1 = load <256 x i64>, <256 x i64>* %addr, align 16
+ ret <256 x i64> %1
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc <256 x i64> @loadv256i64com() {
+; CHECK-LABEL: loadv256i64com:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, v256i64 at lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s0, v256i64 at hi(, %s0)
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vld %v0, 8, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %1 = load <256 x i64>, <256 x i64>* @v256i64, align 16
+ ret <256 x i64> %1
+}
diff --git a/llvm/test/CodeGen/VE/Vector/storevm.ll b/llvm/test/CodeGen/VE/Vector/storevm.ll
new file mode 100644
index 0000000000000..448dc2d4bd254
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/storevm.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+ at v256i1 = common dso_local local_unnamed_addr global <256 x i1> zeroinitializer, align 4
+ at v512i1 = common dso_local local_unnamed_addr global <512 x i1> zeroinitializer, align 4
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc void @storev256i1(<256 x i1>* nocapture %mp, <256 x i1> %m) {
+; CHECK-LABEL: storev256i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: svm %s1, %vm1, 3
+; CHECK-NEXT: st %s1, 24(, %s0)
+; CHECK-NEXT: svm %s1, %vm1, 2
+; CHECK-NEXT: st %s1, 16(, %s0)
+; CHECK-NEXT: svm %s1, %vm1, 1
+; CHECK-NEXT: st %s1, 8(, %s0)
+; CHECK-NEXT: svm %s1, %vm1, 0
+; CHECK-NEXT: st %s1, (, %s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ store <256 x i1> %m, <256 x i1>* %mp, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc void @storev256i1com(<256 x i1> %m) {
+; CHECK-LABEL: storev256i1com:
+; CHECK: # %bb.0:
+; CHECK-NEXT: svm %s0, %vm1, 3
+; CHECK-NEXT: lea %s1, v256i1 at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s1, v256i1 at hi(, %s1)
+; CHECK-NEXT: st %s0, 24(, %s1)
+; CHECK-NEXT: svm %s0, %vm1, 2
+; CHECK-NEXT: st %s0, 16(, %s1)
+; CHECK-NEXT: svm %s0, %vm1, 1
+; CHECK-NEXT: st %s0, 8(, %s1)
+; CHECK-NEXT: svm %s0, %vm1, 0
+; CHECK-NEXT: st %s0, (, %s1)
+; CHECK-NEXT: b.l.t (, %s10)
+ store <256 x i1> %m, <256 x i1>* @v256i1, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc void @storev512i1(<512 x i1>* nocapture %mp, <512 x i1> %m) {
+; CHECK-LABEL: storev512i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: svm %s1, %vm2, 3
+; CHECK-NEXT: st %s1, 56(, %s0)
+; CHECK-NEXT: svm %s1, %vm2, 2
+; CHECK-NEXT: st %s1, 48(, %s0)
+; CHECK-NEXT: svm %s1, %vm2, 1
+; CHECK-NEXT: st %s1, 40(, %s0)
+; CHECK-NEXT: svm %s1, %vm2, 0
+; CHECK-NEXT: st %s1, 32(, %s0)
+; CHECK-NEXT: svm %s1, %vm3, 3
+; CHECK-NEXT: st %s1, 24(, %s0)
+; CHECK-NEXT: svm %s1, %vm3, 2
+; CHECK-NEXT: st %s1, 16(, %s0)
+; CHECK-NEXT: svm %s1, %vm3, 1
+; CHECK-NEXT: st %s1, 8(, %s0)
+; CHECK-NEXT: svm %s1, %vm3, 0
+; CHECK-NEXT: st %s1, (, %s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ store <512 x i1> %m, <512 x i1>* %mp, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc void @storev512i1com(<512 x i1> %m) {
+; CHECK-LABEL: storev512i1com:
+; CHECK: # %bb.0:
+; CHECK-NEXT: svm %s0, %vm2, 3
+; CHECK-NEXT: lea %s1, v512i1 at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s1, v512i1 at hi(, %s1)
+; CHECK-NEXT: st %s0, 56(, %s1)
+; CHECK-NEXT: svm %s0, %vm2, 2
+; CHECK-NEXT: st %s0, 48(, %s1)
+; CHECK-NEXT: svm %s0, %vm2, 1
+; CHECK-NEXT: st %s0, 40(, %s1)
+; CHECK-NEXT: svm %s0, %vm2, 0
+; CHECK-NEXT: st %s0, 32(, %s1)
+; CHECK-NEXT: svm %s0, %vm3, 3
+; CHECK-NEXT: st %s0, 24(, %s1)
+; CHECK-NEXT: svm %s0, %vm3, 2
+; CHECK-NEXT: st %s0, 16(, %s1)
+; CHECK-NEXT: svm %s0, %vm3, 1
+; CHECK-NEXT: st %s0, 8(, %s1)
+; CHECK-NEXT: svm %s0, %vm3, 0
+; CHECK-NEXT: st %s0, (, %s1)
+; CHECK-NEXT: b.l.t (, %s10)
+ store <512 x i1> %m, <512 x i1>* @v512i1, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/VE/Vector/storevr.ll b/llvm/test/CodeGen/VE/Vector/storevr.ll
new file mode 100644
index 0000000000000..5b8e3effa4321
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/storevr.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+ at v256i64 = common dso_local local_unnamed_addr global <256 x i64> zeroinitializer, align 16
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc void @storev256i64(<256 x i64>* nocapture, <256 x i64>) {
+; CHECK-LABEL: storev256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vst %v0, 8, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ store <256 x i64> %1, <256 x i64>* %0, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc void @storev256i64stk(<256 x i64>) {
+; CHECK-LABEL: storev256i64stk:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s11, -2048(, %s11)
+; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: ld %s61, 24(, %s14)
+; CHECK-NEXT: or %s62, 0, %s0
+; CHECK-NEXT: lea %s63, 315
+; CHECK-NEXT: shm.l %s63, (%s61)
+; CHECK-NEXT: shm.l %s8, 8(%s61)
+; CHECK-NEXT: shm.l %s11, 16(%s61)
+; CHECK-NEXT: monc
+; CHECK-NEXT: or %s0, 0, %s62
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lea %s1, (, %s11)
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vst %v0, 8, %s1
+; CHECK-NEXT: lea %s11, 2048(, %s11)
+; CHECK-NEXT: b.l.t (, %s10)
+ %addr = alloca <256 x i64>, align 16
+ store <256 x i64> %0, <256 x i64>* %addr, align 16
+ ret void
+}
+
+; Function Attrs: norecurse nounwind readonly
+define fastcc void @storev256i64com(<256 x i64>) {
+; CHECK-LABEL: storev256i64com:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, v256i64 at lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s0, v256i64 at hi(, %s0)
+; CHECK-NEXT: lea %s1, 256
+; CHECK-NEXT: lvl %s1
+; CHECK-NEXT: vst %v0, 8, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ store <256 x i64> %0, <256 x i64>* @v256i64, align 16
+ ret void
+}
More information about the llvm-commits
mailing list