[llvm] [PowerPC] custom lower v1024i1 load/store (PR #126969)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 15:03:01 PST 2025
https://github.com/RolandF77 updated https://github.com/llvm/llvm-project/pull/126969
>From b3b84164de8bb9be64787f660e05c1d7768aa04b Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Wed, 12 Feb 2025 20:51:33 +0000
Subject: [PATCH 1/2] custom lower v1024i1 load/store
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 86 ++++++++++++--
.../test/CodeGen/PowerPC/mmaplus-acc-spill.ll | 8 +-
.../CodeGen/PowerPC/mmaplus-intrinsics.ll | 106 +++++++++---------
llvm/test/CodeGen/PowerPC/v1024ls.ll | 65 +++++++++++
4 files changed, 197 insertions(+), 68 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/v1024ls.ll
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index bdc1ac7c7da58..300fa716297bd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1363,6 +1363,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v512i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
}
+ if (Subtarget.isISAFuture()) {
+ setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
+ addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
+ }
if (Subtarget.has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
@@ -11766,9 +11771,13 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SDValue BasePtr = LN->getBasePtr();
EVT VT = Op.getValueType();
- if (VT != MVT::v256i1 && VT != MVT::v512i1)
+ if (VT != MVT::v256i1 && VT != MVT::v512i1 && VT != MVT::v1024i1)
return Op;
+ // Used for dense math registers.
+ assert((VT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
+ "Type unsupported for this processor");
+
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
// 2 or 4 vsx registers.
@@ -11796,9 +11805,36 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
std::reverse(LoadChains.begin(), LoadChains.end());
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- SDValue Value =
- DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
- dl, VT, Loads);
+ SDValue Value;
+ if (VT == MVT::v1024i1) {
+ SmallVector<SDValue, 4> Pairs;
+ SDValue Vsx0Idx = DAG.getTargetConstant(PPC::sub_vsx0, dl, MVT::i32);
+ SDValue Vsx1Idx = DAG.getTargetConstant(PPC::sub_vsx1, dl, MVT::i32);
+ SDValue VSRpRC = DAG.getTargetConstant(PPC::VSRpRCRegClassID, dl, MVT::i32);
+ NumVecs >>= 1;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ const SDValue Ops[] = {VSRpRC, Loads[Idx * 2], Vsx0Idx,
+ Loads[Idx * 2 + 1], Vsx1Idx};
+ Pairs.push_back(SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v256i1, Ops), 0));
+ }
+ SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1,
+ Pairs[0], Pairs[1]),
+ 0);
+ SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
+ SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
+ Pairs[2], Pairs[3]),
+ 0);
+ SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
+ SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
+ const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+ Value = SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
+ } else {
+ Value =
+ DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
+ dl, VT, Loads);
+ }
SDValue RetOps[] = {Value, TF};
return DAG.getMergeValues(RetOps, dl);
}
@@ -11810,12 +11846,17 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue StoreChain = SN->getChain();
SDValue BasePtr = SN->getBasePtr();
SDValue Value = SN->getValue();
- SDValue Value2 = SN->getValue();
EVT StoreVT = Value.getValueType();
+ SmallVector<SDValue, 4> ValueVec;
- if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
+ if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1 &&
+ StoreVT != MVT::v1024i1)
return Op;
+ // Used for dense math registers.
+ assert((StoreVT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
+ "Type unsupported for this processor");
+
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
// Here we create 2 or 4 v16i8 stores to store the pair or accumulator
// underlying registers individually.
@@ -11832,20 +11873,43 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
MachineSDNode *ExtNode = DAG.getMachineNode(
PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
- Value = SDValue(ExtNode, 0);
- Value2 = SDValue(ExtNode, 1);
+ ValueVec.push_back(SDValue(ExtNode, 0));
+ ValueVec.push_back(SDValue(ExtNode, 1));
} else
Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
NumVecs = 4;
+
+ } else if (StoreVT == MVT::v1024i1) {
+ SDValue Lo(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+ SDValue Hi(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+ EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
+ ValueVec.push_back(SDValue(ExtNode, 0));
+ ValueVec.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
+ ValueVec.push_back(SDValue(ExtNode, 0));
+ ValueVec.push_back(SDValue(ExtNode, 1));
+ NumVecs = 8;
}
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
SDValue Elt;
if (Subtarget.isISAFuture()) {
VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
- Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
- Idx > 1 ? Value2 : Value,
- DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
+ unsigned Pairx =
+ Subtarget.isLittleEndian() ? (NumVecs - Idx - 1) / 2 : Idx / 2;
+ Elt = DAG.getNode(
+ PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, ValueVec[Pairx],
+ DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
} else
Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
index 5ca8c7b02cab4..c8ead89f96d66 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
@@ -46,10 +46,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-NEXT: xvf16ger2pp wacc0, v28, v30
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r30)
-; CHECK-NEXT: stxv v5, 32(r30)
-; CHECK-NEXT: stxv v2, 16(r30)
-; CHECK-NEXT: stxv v3, 0(r30)
+; CHECK-NEXT: stxv v2, 48(r30)
+; CHECK-NEXT: stxv v3, 32(r30)
+; CHECK-NEXT: stxv v4, 16(r30)
+; CHECK-NEXT: stxv v5, 0(r30)
; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
index 158ec7a3427c8..b3e4392b8d0e3 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
@@ -31,10 +31,10 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r3)
-; CHECK-NEXT: stxv v5, 32(r3)
-; CHECK-NEXT: stxv v2, 16(r3)
-; CHECK-NEXT: stxv v3, 0(r3)
+; CHECK-NEXT: stxv v2, 48(r3)
+; CHECK-NEXT: stxv v3, 32(r3)
+; CHECK-NEXT: stxv v4, 16(r3)
+; CHECK-NEXT: stxv v5, 0(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ass_acc:
@@ -55,7 +55,7 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
; CHECK-O0-NEXT: vmr v3, v4
; CHECK-O0-NEXT: vmr v2, v4
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -121,10 +121,10 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-NEXT: lxv v4, 48(r3)
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r7)
-; CHECK-NEXT: stxv v5, 32(r7)
-; CHECK-NEXT: stxv v2, 16(r7)
-; CHECK-NEXT: stxv v3, 0(r7)
+; CHECK-NEXT: stxv v2, 48(r7)
+; CHECK-NEXT: stxv v3, 32(r7)
+; CHECK-NEXT: stxv v4, 16(r7)
+; CHECK-NEXT: stxv v5, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ld_st_xxmtacc:
@@ -154,7 +154,7 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-O0-NEXT: lxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r7)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -236,10 +236,10 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp32, vsp36, 0
; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r7)
-; CHECK-NEXT: stxv v5, 32(r7)
-; CHECK-NEXT: stxv v2, 16(r7)
-; CHECK-NEXT: stxv v3, 0(r7)
+; CHECK-NEXT: stxv v2, 48(r7)
+; CHECK-NEXT: stxv v3, 32(r7)
+; CHECK-NEXT: stxv v4, 16(r7)
+; CHECK-NEXT: stxv v5, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ld_op_st_xxmtacc:
@@ -271,7 +271,7 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-O0-NEXT: xxlor v4, vs0, vs0
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp32, 0
; CHECK-O0-NEXT: xvi4ger8pp wacc0, v2, v2
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r7)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -356,14 +356,14 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-NEXT: lxv v4, 48(r3)
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r3)
-; CHECK-NEXT: stxv v5, 32(r3)
-; CHECK-NEXT: stxv v2, 16(r3)
-; CHECK-NEXT: stxv v3, 0(r3)
-; CHECK-NEXT: stxv v4, 48(r7)
-; CHECK-NEXT: stxv v5, 32(r7)
-; CHECK-NEXT: stxv v2, 16(r7)
-; CHECK-NEXT: stxv v3, 0(r7)
+; CHECK-NEXT: stxv v2, 48(r3)
+; CHECK-NEXT: stxv v3, 32(r3)
+; CHECK-NEXT: stxv v4, 16(r3)
+; CHECK-NEXT: stxv v5, 0(r3)
+; CHECK-NEXT: stxv v2, 48(r7)
+; CHECK-NEXT: stxv v3, 32(r7)
+; CHECK-NEXT: stxv v4, 16(r7)
+; CHECK-NEXT: stxv v5, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ld_st_xxmfacc:
@@ -397,7 +397,7 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-O0-NEXT: lxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs3, v4, v4
; CHECK-O0-NEXT: stxv vs3, 48(r3)
; CHECK-O0-NEXT: xxlor vs2, v5, v5
@@ -496,10 +496,10 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp32, vsp36, 0
; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r7)
-; CHECK-NEXT: stxv v5, 32(r7)
-; CHECK-NEXT: stxv v2, 16(r7)
-; CHECK-NEXT: stxv v3, 0(r7)
+; CHECK-NEXT: stxv v2, 48(r7)
+; CHECK-NEXT: stxv v3, 32(r7)
+; CHECK-NEXT: stxv v4, 16(r7)
+; CHECK-NEXT: stxv v5, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ld_op_st_xxmfacc:
@@ -531,7 +531,7 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-O0-NEXT: xxlor v4, vs0, vs0
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp32, 0
; CHECK-O0-NEXT: xvi4ger8pp wacc0, v2, v2
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r7)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -621,10 +621,10 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2)
; CHECK-NEXT: xvf64gerpp wacc0, vsp34, v5
; CHECK-NEXT: xvf64gerpp wacc0, vsp36, v4
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r3)
-; CHECK-NEXT: stxv v5, 32(r3)
-; CHECK-NEXT: stxv v2, 16(r3)
-; CHECK-NEXT: stxv v3, 0(r3)
+; CHECK-NEXT: stxv v2, 48(r3)
+; CHECK-NEXT: stxv v3, 32(r3)
+; CHECK-NEXT: stxv v4, 16(r3)
+; CHECK-NEXT: stxv v5, 0(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: cmplx_xxmacc:
@@ -673,7 +673,7 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2)
; CHECK-O0-NEXT: xvf64gerpp wacc0, vsp32, vs0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: xvf64gerpp wacc0, vsp34, vs0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -783,10 +783,10 @@ define void @int_xxsetaccz(ptr %ptr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxsetaccz wacc0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r3)
-; CHECK-NEXT: stxv v5, 32(r3)
-; CHECK-NEXT: stxv v2, 16(r3)
-; CHECK-NEXT: stxv v3, 0(r3)
+; CHECK-NEXT: stxv v2, 48(r3)
+; CHECK-NEXT: stxv v3, 32(r3)
+; CHECK-NEXT: stxv v4, 16(r3)
+; CHECK-NEXT: stxv v5, 0(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: int_xxsetaccz:
@@ -802,7 +802,7 @@ define void @int_xxsetaccz(ptr %ptr) {
; CHECK-O0-LABEL: int_xxsetaccz:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: xxsetaccz wacc0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -946,14 +946,14 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
; CHECK-NEXT: xxsetaccz wacc0
; CHECK-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r3)
-; CHECK-NEXT: stxv v5, 32(r3)
-; CHECK-NEXT: stxv v2, 16(r3)
-; CHECK-NEXT: stxv v3, 0(r3)
-; CHECK-NEXT: stxv v4, 112(r3)
-; CHECK-NEXT: stxv v5, 96(r3)
-; CHECK-NEXT: stxv v2, 80(r3)
-; CHECK-NEXT: stxv v3, 64(r3)
+; CHECK-NEXT: stxv v2, 48(r3)
+; CHECK-NEXT: stxv v3, 32(r3)
+; CHECK-NEXT: stxv v4, 16(r3)
+; CHECK-NEXT: stxv v5, 0(r3)
+; CHECK-NEXT: stxv v2, 112(r3)
+; CHECK-NEXT: stxv v3, 96(r3)
+; CHECK-NEXT: stxv v4, 80(r3)
+; CHECK-NEXT: stxv v5, 64(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testcse:
@@ -975,7 +975,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: xxsetaccz wacc0
; CHECK-O0-NEXT: xvf32gerpp wacc0, v2, v2
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs3, v4, v4
; CHECK-O0-NEXT: stxv vs3, 48(r3)
; CHECK-O0-NEXT: xxlor vs2, v5, v5
@@ -1065,10 +1065,10 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
; CHECK-NEXT: plxvp vsp36, 8(r4), 0
; CHECK-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v4, 48(r7)
-; CHECK-NEXT: stxv v5, 32(r7)
-; CHECK-NEXT: stxv v2, 16(r7)
-; CHECK-NEXT: stxv v3, 0(r7)
+; CHECK-NEXT: stxv v2, 48(r7)
+; CHECK-NEXT: stxv v3, 32(r7)
+; CHECK-NEXT: stxv v4, 16(r7)
+; CHECK-NEXT: stxv v5, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test_ldst_1:
@@ -1104,7 +1104,7 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
; CHECK-O0-NEXT: plxvp vsp34, 8(r4), 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: pmxvf64gernn wacc0, vsp34, vs0, 0, 0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r7)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
diff --git a/llvm/test/CodeGen/PowerPC/v1024ls.ll b/llvm/test/CodeGen/PowerPC/v1024ls.ll
new file mode 100644
index 0000000000000..97668009cb0d7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/v1024ls.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -disable-auto-paired-vec-st=false \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -disable-auto-paired-vec-st=false \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @v1024ls(ptr nocapture readonly %vqp, ptr nocapture %resp) {
+; CHECK-LABEL: v1024ls:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: lxv v5, 32(r3)
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv v4, 48(r3)
+; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxv v3, 64(r3)
+; CHECK-NEXT: lxv v5, 96(r3)
+; CHECK-NEXT: lxv v2, 80(r3)
+; CHECK-NEXT: lxv v4, 112(r3)
+; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: stxv v2, 112(r4)
+; CHECK-NEXT: stxv v3, 96(r4)
+; CHECK-NEXT: stxv v4, 80(r4)
+; CHECK-NEXT: stxv v5, 64(r4)
+; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT: stxv v2, 48(r4)
+; CHECK-NEXT: stxv v3, 32(r4)
+; CHECK-NEXT: stxv v4, 16(r4)
+; CHECK-NEXT: stxv v5, 0(r4)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: v1024ls:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxv v3, 112(r3)
+; CHECK-BE-NEXT: lxv v5, 80(r3)
+; CHECK-BE-NEXT: lxv v2, 96(r3)
+; CHECK-BE-NEXT: lxv v4, 64(r3)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxv v3, 48(r3)
+; CHECK-BE-NEXT: lxv v5, 16(r3)
+; CHECK-BE-NEXT: lxv v2, 32(r3)
+; CHECK-BE-NEXT: lxv v4, 0(r3)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT: stxv v5, 112(r4)
+; CHECK-BE-NEXT: stxv v4, 96(r4)
+; CHECK-BE-NEXT: stxv v3, 80(r4)
+; CHECK-BE-NEXT: stxv v2, 64(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT: stxv v5, 48(r4)
+; CHECK-BE-NEXT: stxv v4, 32(r4)
+; CHECK-BE-NEXT: stxv v3, 16(r4)
+; CHECK-BE-NEXT: stxv v2, 0(r4)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vqp, align 64
+ store <1024 x i1> %0, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
>From f4ca19ec2d33f763ca2eb31f654d32c105c06983 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Thu, 20 Feb 2025 23:19:31 +0000
Subject: [PATCH 2/2] separate 1024 code
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 208 ++++++++++++------
llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 +
.../test/CodeGen/PowerPC/mmaplus-acc-spill.ll | 8 +-
.../CodeGen/PowerPC/mmaplus-intrinsics.ll | 106 ++++-----
llvm/test/CodeGen/PowerPC/v1024ls.ll | 50 ++---
5 files changed, 213 insertions(+), 161 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 300fa716297bd..a3d35cdb1f97a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11763,6 +11763,64 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return Op;
}
+SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ SDValue LoadChain = LN->getChain();
+ SDValue BasePtr = LN->getBasePtr();
+ EVT VT = Op.getValueType();
+
+ // Type v1024i1 is used for Dense Math dmr registers.
+ assert(VT == MVT::v1024i1 && "Unsupported type.");
+ assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
+ "Dense Math support required.");
+ assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
+
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> LoadChains;
+ SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
+ SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
+ MachineMemOperand *MMO = LN->getMemOperand();
+ unsigned NumVecs = VT.getSizeInBits() / 256;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ MachineMemOperand *NewMMO =
+ DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
+ if (Idx > 0) {
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(32, dl, BasePtr.getValueType()));
+ LoadOps[2] = BasePtr;
+ }
+ SDValue Ld = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(MVT::v256i1, MVT::Other),
+ LoadOps, MVT::v256i1, NewMMO);
+ LoadChains.push_back(Ld.getValue(1));
+ Loads.push_back(Ld);
+ }
+
+ if (Subtarget.isLittleEndian()) {
+ std::reverse(Loads.begin(), Loads.end());
+ std::reverse(LoadChains.begin(), LoadChains.end());
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, Loads[0],
+ Loads[1]),
+ 0);
+ SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
+ SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
+ Loads[2], Loads[3]),
+ 0);
+ SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
+ SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
+ const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+ SDValue Value =
+ SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
+
+ SDValue RetOps[] = {Value, TF};
+ return DAG.getMergeValues(RetOps, dl);
+}
+
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -11771,12 +11829,11 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SDValue BasePtr = LN->getBasePtr();
EVT VT = Op.getValueType();
- if (VT != MVT::v256i1 && VT != MVT::v512i1 && VT != MVT::v1024i1)
- return Op;
+ if (VT == MVT::v1024i1)
+ return LowerDMFVectorLoad(Op, DAG);
- // Used for dense math registers.
- assert((VT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
- "Type unsupported for this processor");
+ if (VT != MVT::v256i1 && VT != MVT::v512i1)
+ return Op;
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
@@ -11805,40 +11862,76 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
std::reverse(LoadChains.begin(), LoadChains.end());
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- SDValue Value;
- if (VT == MVT::v1024i1) {
- SmallVector<SDValue, 4> Pairs;
- SDValue Vsx0Idx = DAG.getTargetConstant(PPC::sub_vsx0, dl, MVT::i32);
- SDValue Vsx1Idx = DAG.getTargetConstant(PPC::sub_vsx1, dl, MVT::i32);
- SDValue VSRpRC = DAG.getTargetConstant(PPC::VSRpRCRegClassID, dl, MVT::i32);
- NumVecs >>= 1;
- for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
- const SDValue Ops[] = {VSRpRC, Loads[Idx * 2], Vsx0Idx,
- Loads[Idx * 2 + 1], Vsx1Idx};
- Pairs.push_back(SDValue(
- DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v256i1, Ops), 0));
- }
- SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1,
- Pairs[0], Pairs[1]),
- 0);
- SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
- SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
- Pairs[2], Pairs[3]),
- 0);
- SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
- SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
- const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
- Value = SDValue(
- DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
- } else {
- Value =
- DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
- dl, VT, Loads);
- }
+ SDValue Value =
+ DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
+ dl, VT, Loads);
SDValue RetOps[] = {Value, TF};
return DAG.getMergeValues(RetOps, dl);
}
+SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDLoc dl(Op);
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ SDValue StoreChain = SN->getChain();
+ SDValue BasePtr = SN->getBasePtr();
+ SmallVector<SDValue, 4> Values;
+ SmallVector<SDValue, 4> Stores;
+ EVT VT = SN->getValue().getValueType();
+
+ // Type v1024i1 is used for Dense Math dmr registers.
+ assert(VT == MVT::v1024i1 && "Unsupported type.");
+ assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
+ "Dense Math support required.");
+ assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
+
+ SDValue Lo(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+ SDValue Hi(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+ EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+
+ if (Subtarget.isLittleEndian())
+ std::reverse(Values.begin(), Values.end());
+
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SmallVector<SDValue, 4> Ops{
+ StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
+ Values[0], BasePtr};
+ MachineMemOperand *MMO = SN->getMemOperand();
+ unsigned NumVecs = VT.getSizeInBits() / 256;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ MachineMemOperand *NewMMO =
+ DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
+ if (Idx > 0) {
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(32, dl, BasePtr.getValueType()));
+ Ops[3] = BasePtr;
+ }
+ Ops[2] = Values[Idx];
+ SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
+ MVT::v256i1, NewMMO);
+ Stores.push_back(St);
+ }
+
+ SDValue TF = DAG.getTokenFactor(dl, Stores);
+ return TF;
+}
+
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -11846,16 +11939,14 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue StoreChain = SN->getChain();
SDValue BasePtr = SN->getBasePtr();
SDValue Value = SN->getValue();
+ SDValue Value2 = SN->getValue();
EVT StoreVT = Value.getValueType();
- SmallVector<SDValue, 4> ValueVec;
- if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1 &&
- StoreVT != MVT::v1024i1)
- return Op;
+ if (StoreVT == MVT::v1024i1)
+ return LowerDMFVectorStore(Op, DAG);
- // Used for dense math registers.
- assert((StoreVT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
- "Type unsupported for this processor");
+ if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
+ return Op;
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
// Here we create 2 or 4 v16i8 stores to store the pair or accumulator
@@ -11873,43 +11964,20 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
MachineSDNode *ExtNode = DAG.getMachineNode(
PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
- ValueVec.push_back(SDValue(ExtNode, 0));
- ValueVec.push_back(SDValue(ExtNode, 1));
+ Value = SDValue(ExtNode, 0);
+ Value2 = SDValue(ExtNode, 1);
} else
Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
NumVecs = 4;
-
- } else if (StoreVT == MVT::v1024i1) {
- SDValue Lo(DAG.getMachineNode(
- TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
- 0);
- SDValue Hi(DAG.getMachineNode(
- TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
- 0);
- EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
- MachineSDNode *ExtNode =
- DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
- ValueVec.push_back(SDValue(ExtNode, 0));
- ValueVec.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
- ValueVec.push_back(SDValue(ExtNode, 0));
- ValueVec.push_back(SDValue(ExtNode, 1));
- NumVecs = 8;
}
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
SDValue Elt;
if (Subtarget.isISAFuture()) {
VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
- unsigned Pairx =
- Subtarget.isLittleEndian() ? (NumVecs - Idx - 1) / 2 : Idx / 2;
- Elt = DAG.getNode(
- PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, ValueVec[Pairx],
- DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
+ Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
+ Idx > 1 ? Value2 : Value,
+ DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
} else
Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 514329bbe92d7..1f22aa16a89be 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1344,6 +1344,8 @@ namespace llvm {
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
index c8ead89f96d66..5ca8c7b02cab4 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
@@ -46,10 +46,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-NEXT: xvf16ger2pp wacc0, v28, v30
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r30)
-; CHECK-NEXT: stxv v3, 32(r30)
-; CHECK-NEXT: stxv v4, 16(r30)
-; CHECK-NEXT: stxv v5, 0(r30)
+; CHECK-NEXT: stxv v4, 48(r30)
+; CHECK-NEXT: stxv v5, 32(r30)
+; CHECK-NEXT: stxv v2, 16(r30)
+; CHECK-NEXT: stxv v3, 0(r30)
; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
index b3e4392b8d0e3..158ec7a3427c8 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
@@ -31,10 +31,10 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r3)
-; CHECK-NEXT: stxv v3, 32(r3)
-; CHECK-NEXT: stxv v4, 16(r3)
-; CHECK-NEXT: stxv v5, 0(r3)
+; CHECK-NEXT: stxv v4, 48(r3)
+; CHECK-NEXT: stxv v5, 32(r3)
+; CHECK-NEXT: stxv v2, 16(r3)
+; CHECK-NEXT: stxv v3, 0(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ass_acc:
@@ -55,7 +55,7 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
; CHECK-O0-NEXT: vmr v3, v4
; CHECK-O0-NEXT: vmr v2, v4
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -121,10 +121,10 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-NEXT: lxv v4, 48(r3)
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r7)
-; CHECK-NEXT: stxv v3, 32(r7)
-; CHECK-NEXT: stxv v4, 16(r7)
-; CHECK-NEXT: stxv v5, 0(r7)
+; CHECK-NEXT: stxv v4, 48(r7)
+; CHECK-NEXT: stxv v5, 32(r7)
+; CHECK-NEXT: stxv v2, 16(r7)
+; CHECK-NEXT: stxv v3, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ld_st_xxmtacc:
@@ -154,7 +154,7 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-O0-NEXT: lxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r7)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -236,10 +236,10 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp32, vsp36, 0
; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r7)
-; CHECK-NEXT: stxv v3, 32(r7)
-; CHECK-NEXT: stxv v4, 16(r7)
-; CHECK-NEXT: stxv v5, 0(r7)
+; CHECK-NEXT: stxv v4, 48(r7)
+; CHECK-NEXT: stxv v5, 32(r7)
+; CHECK-NEXT: stxv v2, 16(r7)
+; CHECK-NEXT: stxv v3, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ld_op_st_xxmtacc:
@@ -271,7 +271,7 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-O0-NEXT: xxlor v4, vs0, vs0
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp32, 0
; CHECK-O0-NEXT: xvi4ger8pp wacc0, v2, v2
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r7)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -356,14 +356,14 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-NEXT: lxv v4, 48(r3)
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r3)
-; CHECK-NEXT: stxv v3, 32(r3)
-; CHECK-NEXT: stxv v4, 16(r3)
-; CHECK-NEXT: stxv v5, 0(r3)
-; CHECK-NEXT: stxv v2, 48(r7)
-; CHECK-NEXT: stxv v3, 32(r7)
-; CHECK-NEXT: stxv v4, 16(r7)
-; CHECK-NEXT: stxv v5, 0(r7)
+; CHECK-NEXT: stxv v4, 48(r3)
+; CHECK-NEXT: stxv v5, 32(r3)
+; CHECK-NEXT: stxv v2, 16(r3)
+; CHECK-NEXT: stxv v3, 0(r3)
+; CHECK-NEXT: stxv v4, 48(r7)
+; CHECK-NEXT: stxv v5, 32(r7)
+; CHECK-NEXT: stxv v2, 16(r7)
+; CHECK-NEXT: stxv v3, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ld_st_xxmfacc:
@@ -397,7 +397,7 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-O0-NEXT: lxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs3, v4, v4
; CHECK-O0-NEXT: stxv vs3, 48(r3)
; CHECK-O0-NEXT: xxlor vs2, v5, v5
@@ -496,10 +496,10 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp32, vsp36, 0
; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r7)
-; CHECK-NEXT: stxv v3, 32(r7)
-; CHECK-NEXT: stxv v4, 16(r7)
-; CHECK-NEXT: stxv v5, 0(r7)
+; CHECK-NEXT: stxv v4, 48(r7)
+; CHECK-NEXT: stxv v5, 32(r7)
+; CHECK-NEXT: stxv v2, 16(r7)
+; CHECK-NEXT: stxv v3, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: ld_op_st_xxmfacc:
@@ -531,7 +531,7 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
; CHECK-O0-NEXT: xxlor v4, vs0, vs0
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp32, 0
; CHECK-O0-NEXT: xvi4ger8pp wacc0, v2, v2
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r7)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -621,10 +621,10 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2)
; CHECK-NEXT: xvf64gerpp wacc0, vsp34, v5
; CHECK-NEXT: xvf64gerpp wacc0, vsp36, v4
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r3)
-; CHECK-NEXT: stxv v3, 32(r3)
-; CHECK-NEXT: stxv v4, 16(r3)
-; CHECK-NEXT: stxv v5, 0(r3)
+; CHECK-NEXT: stxv v4, 48(r3)
+; CHECK-NEXT: stxv v5, 32(r3)
+; CHECK-NEXT: stxv v2, 16(r3)
+; CHECK-NEXT: stxv v3, 0(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: cmplx_xxmacc:
@@ -673,7 +673,7 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2)
; CHECK-O0-NEXT: xvf64gerpp wacc0, vsp32, vs0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: xvf64gerpp wacc0, vsp34, vs0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -783,10 +783,10 @@ define void @int_xxsetaccz(ptr %ptr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxsetaccz wacc0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r3)
-; CHECK-NEXT: stxv v3, 32(r3)
-; CHECK-NEXT: stxv v4, 16(r3)
-; CHECK-NEXT: stxv v5, 0(r3)
+; CHECK-NEXT: stxv v4, 48(r3)
+; CHECK-NEXT: stxv v5, 32(r3)
+; CHECK-NEXT: stxv v2, 16(r3)
+; CHECK-NEXT: stxv v3, 0(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: int_xxsetaccz:
@@ -802,7 +802,7 @@ define void @int_xxsetaccz(ptr %ptr) {
; CHECK-O0-LABEL: int_xxsetaccz:
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: xxsetaccz wacc0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r3)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -946,14 +946,14 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
; CHECK-NEXT: xxsetaccz wacc0
; CHECK-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r3)
-; CHECK-NEXT: stxv v3, 32(r3)
-; CHECK-NEXT: stxv v4, 16(r3)
-; CHECK-NEXT: stxv v5, 0(r3)
-; CHECK-NEXT: stxv v2, 112(r3)
-; CHECK-NEXT: stxv v3, 96(r3)
-; CHECK-NEXT: stxv v4, 80(r3)
-; CHECK-NEXT: stxv v5, 64(r3)
+; CHECK-NEXT: stxv v4, 48(r3)
+; CHECK-NEXT: stxv v5, 32(r3)
+; CHECK-NEXT: stxv v2, 16(r3)
+; CHECK-NEXT: stxv v3, 0(r3)
+; CHECK-NEXT: stxv v4, 112(r3)
+; CHECK-NEXT: stxv v5, 96(r3)
+; CHECK-NEXT: stxv v2, 80(r3)
+; CHECK-NEXT: stxv v3, 64(r3)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testcse:
@@ -975,7 +975,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
; CHECK-O0: # %bb.0: # %entry
; CHECK-O0-NEXT: xxsetaccz wacc0
; CHECK-O0-NEXT: xvf32gerpp wacc0, v2, v2
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs3, v4, v4
; CHECK-O0-NEXT: stxv vs3, 48(r3)
; CHECK-O0-NEXT: xxlor vs2, v5, v5
@@ -1065,10 +1065,10 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
; CHECK-NEXT: plxvp vsp36, 8(r4), 0
; CHECK-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 48(r7)
-; CHECK-NEXT: stxv v3, 32(r7)
-; CHECK-NEXT: stxv v4, 16(r7)
-; CHECK-NEXT: stxv v5, 0(r7)
+; CHECK-NEXT: stxv v4, 48(r7)
+; CHECK-NEXT: stxv v5, 32(r7)
+; CHECK-NEXT: stxv v2, 16(r7)
+; CHECK-NEXT: stxv v3, 0(r7)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: test_ldst_1:
@@ -1104,7 +1104,7 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
; CHECK-O0-NEXT: plxvp vsp34, 8(r4), 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: pmxvf64gernn wacc0, vsp34, vs0, 0, 0
-; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r7)
; CHECK-O0-NEXT: xxlor vs0, v5, v5
diff --git a/llvm/test/CodeGen/PowerPC/v1024ls.ll b/llvm/test/CodeGen/PowerPC/v1024ls.ll
index 97668009cb0d7..c7f6911f9ddbc 100644
--- a/llvm/test/CodeGen/PowerPC/v1024ls.ll
+++ b/llvm/test/CodeGen/PowerPC/v1024ls.ll
@@ -1,60 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN: -disable-auto-paired-vec-st=false \
; RUN: -mcpu=future -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
-; RUN: -disable-auto-paired-vec-st=false \
; RUN: -mcpu=future -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
define void @v1024ls(ptr nocapture readonly %vqp, ptr nocapture %resp) {
; CHECK-LABEL: v1024ls:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lxv v3, 0(r3)
-; CHECK-NEXT: lxv v5, 32(r3)
-; CHECK-NEXT: lxv v2, 16(r3)
-; CHECK-NEXT: lxv v4, 48(r3)
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
-; CHECK-NEXT: lxv v3, 64(r3)
-; CHECK-NEXT: lxv v5, 96(r3)
-; CHECK-NEXT: lxv v2, 80(r3)
-; CHECK-NEXT: lxv v4, 112(r3)
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT: stxv v2, 112(r4)
-; CHECK-NEXT: stxv v3, 96(r4)
-; CHECK-NEXT: stxv v4, 80(r4)
-; CHECK-NEXT: stxv v5, 64(r4)
+; CHECK-NEXT: stxvp vsp34, 96(r4)
+; CHECK-NEXT: stxvp vsp36, 64(r4)
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
-; CHECK-NEXT: stxv v2, 48(r4)
-; CHECK-NEXT: stxv v3, 32(r4)
-; CHECK-NEXT: stxv v4, 16(r4)
-; CHECK-NEXT: stxv v5, 0(r4)
+; CHECK-NEXT: stxvp vsp34, 32(r4)
+; CHECK-NEXT: stxvp vsp36, 0(r4)
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: v1024ls:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lxv v3, 112(r3)
-; CHECK-BE-NEXT: lxv v5, 80(r3)
-; CHECK-BE-NEXT: lxv v2, 96(r3)
-; CHECK-BE-NEXT: lxv v4, 64(r3)
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
-; CHECK-BE-NEXT: lxv v3, 48(r3)
-; CHECK-BE-NEXT: lxv v5, 16(r3)
-; CHECK-BE-NEXT: lxv v2, 32(r3)
-; CHECK-BE-NEXT: lxv v4, 0(r3)
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
-; CHECK-BE-NEXT: stxv v5, 112(r4)
-; CHECK-BE-NEXT: stxv v4, 96(r4)
-; CHECK-BE-NEXT: stxv v3, 80(r4)
-; CHECK-BE-NEXT: stxv v2, 64(r4)
+; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-BE-NEXT: stxv v5, 48(r4)
-; CHECK-BE-NEXT: stxv v4, 32(r4)
-; CHECK-BE-NEXT: stxv v3, 16(r4)
-; CHECK-BE-NEXT: stxv v2, 0(r4)
+; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
; CHECK-BE-NEXT: blr
entry:
%0 = load <1024 x i1>, ptr %vqp, align 64
More information about the llvm-commits
mailing list