[llvm] a73e591 - [PowerPC] custom lower v1024i1 load/store (#126969)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 07:25:11 PST 2025
Author: RolandF77
Date: 2025-02-28T10:25:07-05:00
New Revision: a73e591f33159d177dbd123d1bc9d9352e3e531e
URL: https://github.com/llvm/llvm-project/commit/a73e591f33159d177dbd123d1bc9d9352e3e531e
DIFF: https://github.com/llvm/llvm-project/commit/a73e591f33159d177dbd123d1bc9d9352e3e531e.diff
LOG: [PowerPC] custom lower v1024i1 load/store (#126969)
Support moving PPC dense math register values to and from storage with
LLVM IR load/store.
Added:
llvm/test/CodeGen/PowerPC/v1024ls.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d6c8e8d506799..91df5f467e59c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1355,10 +1355,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v256i1, Custom);
}
if (Subtarget.hasMMA()) {
- if (Subtarget.isISAFuture())
+ if (Subtarget.isISAFuture()) {
addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
- else
+ addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
+ setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
+ } else {
addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
+ }
setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
setOperationAction(ISD::STORE, MVT::v512i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
@@ -11758,6 +11762,64 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return Op;
}
+SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ SDValue LoadChain = LN->getChain();
+ SDValue BasePtr = LN->getBasePtr();
+ EVT VT = Op.getValueType();
+
+ // Type v1024i1 is used for Dense Math dmr registers.
+ assert(VT == MVT::v1024i1 && "Unsupported type.");
+ assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
+ "Dense Math support required.");
+ assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
+
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> LoadChains;
+ SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
+ SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
+ MachineMemOperand *MMO = LN->getMemOperand();
+ unsigned NumVecs = VT.getSizeInBits() / 256;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ MachineMemOperand *NewMMO =
+ DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
+ if (Idx > 0) {
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(32, dl, BasePtr.getValueType()));
+ LoadOps[2] = BasePtr;
+ }
+ SDValue Ld = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(MVT::v256i1, MVT::Other),
+ LoadOps, MVT::v256i1, NewMMO);
+ LoadChains.push_back(Ld.getValue(1));
+ Loads.push_back(Ld);
+ }
+
+ if (Subtarget.isLittleEndian()) {
+ std::reverse(Loads.begin(), Loads.end());
+ std::reverse(LoadChains.begin(), LoadChains.end());
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1, Loads[0],
+ Loads[1]),
+ 0);
+ SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
+ SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
+ Loads[2], Loads[3]),
+ 0);
+ SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
+ SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
+ const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+ SDValue Value =
+ SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
+
+ SDValue RetOps[] = {Value, TF};
+ return DAG.getMergeValues(RetOps, dl);
+}
+
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -11766,6 +11828,9 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SDValue BasePtr = LN->getBasePtr();
EVT VT = Op.getValueType();
+ if (VT == MVT::v1024i1)
+ return LowerDMFVectorLoad(Op, DAG);
+
if (VT != MVT::v256i1 && VT != MVT::v512i1)
return Op;
@@ -11803,6 +11868,69 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
return DAG.getMergeValues(RetOps, dl);
}
+SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDLoc dl(Op);
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ SDValue StoreChain = SN->getChain();
+ SDValue BasePtr = SN->getBasePtr();
+ SmallVector<SDValue, 4> Values;
+ SmallVector<SDValue, 4> Stores;
+ EVT VT = SN->getValue().getValueType();
+
+ // Type v1024i1 is used for Dense Math dmr registers.
+ assert(VT == MVT::v1024i1 && "Unsupported type.");
+ assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
+ "Dense Math support required.");
+ assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
+
+ SDValue Lo(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+ SDValue Hi(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+ EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+
+ if (Subtarget.isLittleEndian())
+ std::reverse(Values.begin(), Values.end());
+
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SmallVector<SDValue, 4> Ops{
+ StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
+ Values[0], BasePtr};
+ MachineMemOperand *MMO = SN->getMemOperand();
+ unsigned NumVecs = VT.getSizeInBits() / 256;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ MachineMemOperand *NewMMO =
+ DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
+ if (Idx > 0) {
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(32, dl, BasePtr.getValueType()));
+ Ops[3] = BasePtr;
+ }
+ Ops[2] = Values[Idx];
+ SDValue St = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops,
+ MVT::v256i1, NewMMO);
+ Stores.push_back(St);
+ }
+
+ SDValue TF = DAG.getTokenFactor(dl, Stores);
+ return TF;
+}
+
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -11813,6 +11941,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue Value2 = SN->getValue();
EVT StoreVT = Value.getValueType();
+ if (StoreVT == MVT::v1024i1)
+ return LowerDMFVectorStore(Op, DAG);
+
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
return Op;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 514329bbe92d7..1f22aa16a89be 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1344,6 +1344,8 @@ namespace llvm {
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/llvm/test/CodeGen/PowerPC/v1024ls.ll b/llvm/test/CodeGen/PowerPC/v1024ls.ll
new file mode 100644
index 0000000000000..c7f6911f9ddbc
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/v1024ls.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @v1024ls(ptr nocapture readonly %vqp, ptr nocapture %resp) {
+; CHECK-LABEL: v1024ls:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: stxvp vsp34, 96(r4)
+; CHECK-NEXT: stxvp vsp36, 64(r4)
+; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT: stxvp vsp34, 32(r4)
+; CHECK-NEXT: stxvp vsp36, 0(r4)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: v1024ls:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r4)
+; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r4)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r4)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vqp, align 64
+ store <1024 x i1> %0, ptr %resp, align 64
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
More information about the llvm-commits
mailing list