[llvm] [PowerPC] custom lower v1024i1 load/store (PR #126969)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 12:07:13 PST 2025
================
@@ -11796,9 +11805,36 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
std::reverse(LoadChains.begin(), LoadChains.end());
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- SDValue Value =
- DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
- dl, VT, Loads);
+ SDValue Value;
+ if (VT == MVT::v1024i1) {
+ SmallVector<SDValue, 4> Pairs;
+ SDValue Vsx0Idx = DAG.getTargetConstant(PPC::sub_vsx0, dl, MVT::i32);
+ SDValue Vsx1Idx = DAG.getTargetConstant(PPC::sub_vsx1, dl, MVT::i32);
+ SDValue VSRpRC = DAG.getTargetConstant(PPC::VSRpRCRegClassID, dl, MVT::i32);
+ NumVecs >>= 1;
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ const SDValue Ops[] = {VSRpRC, Loads[Idx * 2], Vsx0Idx,
+ Loads[Idx * 2 + 1], Vsx1Idx};
+ Pairs.push_back(SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v256i1, Ops), 0));
+ }
+ SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1,
+ Pairs[0], Pairs[1]),
+ 0);
+ SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
+ SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
+ Pairs[2], Pairs[3]),
+ 0);
+ SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
+ SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
+ const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+ Value = SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
----------------
RolandF77 wrote:
It was suggested to us that ACC_BUILD and PAIR_BUILD should be removed for one. Also the opcodes only have a single use so they add complexity without providing much value. And yes the sequence is complicated, and 512 bit dense math is already done in lowering for precedent.
https://github.com/llvm/llvm-project/pull/126969
More information about the llvm-commits
mailing list