[llvm] [PowerPC] custom lower v1024i1 load/store (PR #126969)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 12 12:46:50 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: None (RolandF77)

<details>
<summary>Changes</summary>

Support moving PPC dense math register values to and from storage with LLVM IR load/store.

---
Full diff: https://github.com/llvm/llvm-project/pull/126969.diff


4 Files Affected:

- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+75-11) 
- (modified) llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll (+4-4) 
- (modified) llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll (+53-53) 
- (added) llvm/test/CodeGen/PowerPC/v1024ls.ll (+65) 


``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index bdc1ac7c7da58..300fa716297bd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1363,6 +1363,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
   }
+  if (Subtarget.isISAFuture()) {
+    setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
+    setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
+    addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
+  }
 
   if (Subtarget.has64BitSupport())
     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
@@ -11766,9 +11771,13 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
   SDValue BasePtr = LN->getBasePtr();
   EVT VT = Op.getValueType();
 
-  if (VT != MVT::v256i1 && VT != MVT::v512i1)
+  if (VT != MVT::v256i1 && VT != MVT::v512i1 && VT != MVT::v1024i1)
     return Op;
 
+  // Used for dense math registers.
+  assert((VT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
+         "Type unsupported for this processor");
+
   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
   // 2 or 4 vsx registers.
@@ -11796,9 +11805,36 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
     std::reverse(LoadChains.begin(), LoadChains.end());
   }
   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
-  SDValue Value =
-      DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
-                  dl, VT, Loads);
+  SDValue Value;
+  if (VT == MVT::v1024i1) {
+    SmallVector<SDValue, 4> Pairs;
+    SDValue Vsx0Idx = DAG.getTargetConstant(PPC::sub_vsx0, dl, MVT::i32);
+    SDValue Vsx1Idx = DAG.getTargetConstant(PPC::sub_vsx1, dl, MVT::i32);
+    SDValue VSRpRC = DAG.getTargetConstant(PPC::VSRpRCRegClassID, dl, MVT::i32);
+    NumVecs >>= 1;
+    for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+      const SDValue Ops[] = {VSRpRC, Loads[Idx * 2], Vsx0Idx,
+                             Loads[Idx * 2 + 1], Vsx1Idx};
+      Pairs.push_back(SDValue(
+          DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v256i1, Ops), 0));
+    }
+    SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1,
+                                  Pairs[0], Pairs[1]),
+               0);
+    SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
+    SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
+                                  Pairs[2], Pairs[3]),
+               0);
+    SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
+    SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
+    const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+    Value = SDValue(
+        DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
+  } else {
+    Value =
+        DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
+                    dl, VT, Loads);
+  }
   SDValue RetOps[] = {Value, TF};
   return DAG.getMergeValues(RetOps, dl);
 }
@@ -11810,12 +11846,17 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
   SDValue StoreChain = SN->getChain();
   SDValue BasePtr = SN->getBasePtr();
   SDValue Value = SN->getValue();
-  SDValue Value2 = SN->getValue();
   EVT StoreVT = Value.getValueType();
+  SmallVector<SDValue, 4> ValueVec;
 
-  if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
+  if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1 &&
+      StoreVT != MVT::v1024i1)
     return Op;
 
+  // Used for dense math registers.
+  assert((StoreVT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
+         "Type unsupported for this processor");
+
   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
   // underlying registers individually.
@@ -11832,20 +11873,43 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
       MachineSDNode *ExtNode = DAG.getMachineNode(
           PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
 
-      Value = SDValue(ExtNode, 0);
-      Value2 = SDValue(ExtNode, 1);
+      ValueVec.push_back(SDValue(ExtNode, 0));
+      ValueVec.push_back(SDValue(ExtNode, 1));
     } else
       Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
     NumVecs = 4;
+
+  } else if (StoreVT == MVT::v1024i1) {
+    SDValue Lo(DAG.getMachineNode(
+                   TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+                   Op.getOperand(1),
+                   DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+               0);
+    SDValue Hi(DAG.getMachineNode(
+                   TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+                   Op.getOperand(1),
+                   DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+               0);
+    EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
+    MachineSDNode *ExtNode =
+        DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
+    ValueVec.push_back(SDValue(ExtNode, 0));
+    ValueVec.push_back(SDValue(ExtNode, 1));
+    ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
+    ValueVec.push_back(SDValue(ExtNode, 0));
+    ValueVec.push_back(SDValue(ExtNode, 1));
+    NumVecs = 8;
   }
   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
     SDValue Elt;
     if (Subtarget.isISAFuture()) {
       VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
-      Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
-                        Idx > 1 ? Value2 : Value,
-                        DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
+      unsigned Pairx =
+          Subtarget.isLittleEndian() ? (NumVecs - Idx - 1) / 2 : Idx / 2;
+      Elt = DAG.getNode(
+          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, ValueVec[Pairx],
+          DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
     } else
       Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
                         DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
index 5ca8c7b02cab4..c8ead89f96d66 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
@@ -46,10 +46,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
 ; CHECK-NEXT:    xvf16ger2pp wacc0, v28, v30
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r30)
-; CHECK-NEXT:    stxv v5, 32(r30)
-; CHECK-NEXT:    stxv v2, 16(r30)
-; CHECK-NEXT:    stxv v3, 0(r30)
+; CHECK-NEXT:    stxv v2, 48(r30)
+; CHECK-NEXT:    stxv v3, 32(r30)
+; CHECK-NEXT:    stxv v4, 16(r30)
+; CHECK-NEXT:    stxv v5, 0(r30)
 ; CHECK-NEXT:    lxv v31, 144(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv v30, 128(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv v29, 112(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
index 158ec7a3427c8..b3e4392b8d0e3 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
@@ -31,10 +31,10 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
 ; CHECK-NEXT:    vmr v3, v2
 ; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r3)
-; CHECK-NEXT:    stxv v5, 32(r3)
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    stxv v2, 48(r3)
+; CHECK-NEXT:    stxv v3, 32(r3)
+; CHECK-NEXT:    stxv v4, 16(r3)
+; CHECK-NEXT:    stxv v5, 0(r3)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: ass_acc:
@@ -55,7 +55,7 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
 ; CHECK-O0-NEXT:    vmr v3, v4
 ; CHECK-O0-NEXT:    vmr v2, v4
 ; CHECK-O0-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    stxv vs0, 48(r3)
 ; CHECK-O0-NEXT:    xxlor vs0, v5, v5
@@ -121,10 +121,10 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-NEXT:    lxv v4, 48(r3)
 ; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r7)
-; CHECK-NEXT:    stxv v5, 32(r7)
-; CHECK-NEXT:    stxv v2, 16(r7)
-; CHECK-NEXT:    stxv v3, 0(r7)
+; CHECK-NEXT:    stxv v2, 48(r7)
+; CHECK-NEXT:    stxv v3, 32(r7)
+; CHECK-NEXT:    stxv v4, 16(r7)
+; CHECK-NEXT:    stxv v5, 0(r7)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: ld_st_xxmtacc:
@@ -154,7 +154,7 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-O0-NEXT:    lxv vs0, 48(r3)
 ; CHECK-O0-NEXT:    xxlor v2, vs0, vs0
 ; CHECK-O0-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    stxv vs0, 48(r7)
 ; CHECK-O0-NEXT:    xxlor vs0, v5, v5
@@ -236,10 +236,10 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-NEXT:    xvi4ger8pp wacc0, v2, v2
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r7)
-; CHECK-NEXT:    stxv v5, 32(r7)
-; CHECK-NEXT:    stxv v2, 16(r7)
-; CHECK-NEXT:    stxv v3, 0(r7)
+; CHECK-NEXT:    stxv v2, 48(r7)
+; CHECK-NEXT:    stxv v3, 32(r7)
+; CHECK-NEXT:    stxv v4, 16(r7)
+; CHECK-NEXT:    stxv v5, 0(r7)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: ld_op_st_xxmtacc:
@@ -271,7 +271,7 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-O0-NEXT:    xxlor v4, vs0, vs0
 ; CHECK-O0-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp32, 0
 ; CHECK-O0-NEXT:    xvi4ger8pp wacc0, v2, v2
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    stxv vs0, 48(r7)
 ; CHECK-O0-NEXT:    xxlor vs0, v5, v5
@@ -356,14 +356,14 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-NEXT:    lxv v4, 48(r3)
 ; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r3)
-; CHECK-NEXT:    stxv v5, 32(r3)
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
-; CHECK-NEXT:    stxv v4, 48(r7)
-; CHECK-NEXT:    stxv v5, 32(r7)
-; CHECK-NEXT:    stxv v2, 16(r7)
-; CHECK-NEXT:    stxv v3, 0(r7)
+; CHECK-NEXT:    stxv v2, 48(r3)
+; CHECK-NEXT:    stxv v3, 32(r3)
+; CHECK-NEXT:    stxv v4, 16(r3)
+; CHECK-NEXT:    stxv v5, 0(r3)
+; CHECK-NEXT:    stxv v2, 48(r7)
+; CHECK-NEXT:    stxv v3, 32(r7)
+; CHECK-NEXT:    stxv v4, 16(r7)
+; CHECK-NEXT:    stxv v5, 0(r7)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: ld_st_xxmfacc:
@@ -397,7 +397,7 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-O0-NEXT:    lxv vs0, 48(r3)
 ; CHECK-O0-NEXT:    xxlor v2, vs0, vs0
 ; CHECK-O0-NEXT:    dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs3, v4, v4
 ; CHECK-O0-NEXT:    stxv vs3, 48(r3)
 ; CHECK-O0-NEXT:    xxlor vs2, v5, v5
@@ -496,10 +496,10 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp32, vsp36, 0
 ; CHECK-NEXT:    xvi4ger8pp wacc0, v2, v2
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r7)
-; CHECK-NEXT:    stxv v5, 32(r7)
-; CHECK-NEXT:    stxv v2, 16(r7)
-; CHECK-NEXT:    stxv v3, 0(r7)
+; CHECK-NEXT:    stxv v2, 48(r7)
+; CHECK-NEXT:    stxv v3, 32(r7)
+; CHECK-NEXT:    stxv v4, 16(r7)
+; CHECK-NEXT:    stxv v5, 0(r7)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: ld_op_st_xxmfacc:
@@ -531,7 +531,7 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
 ; CHECK-O0-NEXT:    xxlor v4, vs0, vs0
 ; CHECK-O0-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp32, 0
 ; CHECK-O0-NEXT:    xvi4ger8pp wacc0, v2, v2
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    stxv vs0, 48(r7)
 ; CHECK-O0-NEXT:    xxlor vs0, v5, v5
@@ -621,10 +621,10 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2)
 ; CHECK-NEXT:    xvf64gerpp wacc0, vsp34, v5
 ; CHECK-NEXT:    xvf64gerpp wacc0, vsp36, v4
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r3)
-; CHECK-NEXT:    stxv v5, 32(r3)
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    stxv v2, 48(r3)
+; CHECK-NEXT:    stxv v3, 32(r3)
+; CHECK-NEXT:    stxv v4, 16(r3)
+; CHECK-NEXT:    stxv v5, 0(r3)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: cmplx_xxmacc:
@@ -673,7 +673,7 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2)
 ; CHECK-O0-NEXT:    xvf64gerpp wacc0, vsp32, vs0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    xvf64gerpp wacc0, vsp34, vs0
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    stxv vs0, 48(r3)
 ; CHECK-O0-NEXT:    xxlor vs0, v5, v5
@@ -783,10 +783,10 @@ define void @int_xxsetaccz(ptr %ptr) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsetaccz wacc0
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r3)
-; CHECK-NEXT:    stxv v5, 32(r3)
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    stxv v2, 48(r3)
+; CHECK-NEXT:    stxv v3, 32(r3)
+; CHECK-NEXT:    stxv v4, 16(r3)
+; CHECK-NEXT:    stxv v5, 0(r3)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: int_xxsetaccz:
@@ -802,7 +802,7 @@ define void @int_xxsetaccz(ptr %ptr) {
 ; CHECK-O0-LABEL: int_xxsetaccz:
 ; CHECK-O0:       # %bb.0: # %entry
 ; CHECK-O0-NEXT:    xxsetaccz wacc0
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    stxv vs0, 48(r3)
 ; CHECK-O0-NEXT:    xxlor vs0, v5, v5
@@ -946,14 +946,14 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
 ; CHECK-NEXT:    xxsetaccz wacc0
 ; CHECK-NEXT:    xvf32gerpp wacc0, v2, v2
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r3)
-; CHECK-NEXT:    stxv v5, 32(r3)
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
-; CHECK-NEXT:    stxv v4, 112(r3)
-; CHECK-NEXT:    stxv v5, 96(r3)
-; CHECK-NEXT:    stxv v2, 80(r3)
-; CHECK-NEXT:    stxv v3, 64(r3)
+; CHECK-NEXT:    stxv v2, 48(r3)
+; CHECK-NEXT:    stxv v3, 32(r3)
+; CHECK-NEXT:    stxv v4, 16(r3)
+; CHECK-NEXT:    stxv v5, 0(r3)
+; CHECK-NEXT:    stxv v2, 112(r3)
+; CHECK-NEXT:    stxv v3, 96(r3)
+; CHECK-NEXT:    stxv v4, 80(r3)
+; CHECK-NEXT:    stxv v5, 64(r3)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testcse:
@@ -975,7 +975,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
 ; CHECK-O0:       # %bb.0: # %entry
 ; CHECK-O0-NEXT:    xxsetaccz wacc0
 ; CHECK-O0-NEXT:    xvf32gerpp wacc0, v2, v2
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs3, v4, v4
 ; CHECK-O0-NEXT:    stxv vs3, 48(r3)
 ; CHECK-O0-NEXT:    xxlor vs2, v5, v5
@@ -1065,10 +1065,10 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
 ; CHECK-NEXT:    plxvp vsp36, 8(r4), 0
 ; CHECK-NEXT:    pmxvf64gernn wacc0, vsp36, v2, 0, 0
 ; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
-; CHECK-NEXT:    stxv v4, 48(r7)
-; CHECK-NEXT:    stxv v5, 32(r7)
-; CHECK-NEXT:    stxv v2, 16(r7)
-; CHECK-NEXT:    stxv v3, 0(r7)
+; CHECK-NEXT:    stxv v2, 48(r7)
+; CHECK-NEXT:    stxv v3, 32(r7)
+; CHECK-NEXT:    stxv v4, 16(r7)
+; CHECK-NEXT:    stxv v5, 0(r7)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_ldst_1:
@@ -1104,7 +1104,7 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
 ; CHECK-O0-NEXT:    plxvp vsp34, 8(r4), 0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    pmxvf64gernn wacc0, vsp34, vs0, 0, 0
-; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-O0-NEXT:    dmxxextfdmr512 wacc0, vsp36, vsp34, 0
 ; CHECK-O0-NEXT:    xxlor vs0, v4, v4
 ; CHECK-O0-NEXT:    stxv vs0, 48(r7)
 ; CHECK-O0-NEXT:    xxlor vs0, v5, v5
diff --git a/llvm/test/CodeGen/PowerPC/v1024ls.ll b/llvm/test/CodeGen/PowerPC/v1024ls.ll
new file mode 100644
index 0000000000000..97668009cb0d7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/v1024ls.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -disable-auto-paired-vec-st=false \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -disable-auto-paired-vec-st=false \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @v1024ls(ptr nocapture readonly %vqp, ptr nocapture %resp)  {
+; CHECK-LABEL: v1024ls:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv v3, 0(r3)
+; CHECK-NEXT:    lxv v5, 32(r3)
+; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxv v4, 48(r3)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT:    lxv v3, 64(r3)
+; CHECK-NEXT:    lxv v5, 96(r3)
+; CHECK-NEXT:    lxv v2, 80(r3)
+; CHECK-NEXT:    lxv v4, 112(r3)
+; CHECK-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    stxv v2, 112(r4)
+; CHECK-NEXT:    stxv v3, 96(r4)
+; CHECK-NEXT:    stxv v4, 80(r4)
+; CHECK-NEXT:    stxv v5, 64(r4)
+; CHECK-NEXT:    dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT:    stxv v2, 48(r4)
+; CHECK-NEXT:    stxv v3, 32(r4)
+; CHECK-NEXT:    stxv v4, 16(r4)
+; CHECK-NEXT:    stxv v5, 0(r4)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: v1024ls:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v3, 112(r3)
+; CHECK-BE-NEXT:    lxv v5, 80(r3)
+; CHECK-BE-NEXT:    lxv v2, 96(r3)
+; CHECK-BE-NEXT:    lxv v4, 64(r3)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxv v3, 48(r3)
+; CHECK-BE-NEXT:    lxv v5, 16(r3)
+; CHECK-BE-NEXT:    lxv v2, 32(r3)
+; CHECK-BE-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-NEXT:    dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-BE-NEXT:    stxv v5, 112(r4)
+; CHECK-BE-NEXT:    stxv v4, 96(r4)
+; CHECK-BE-NEXT:    stxv v3, 80(r4)
+; CHECK-BE-NEXT:    stxv v2, 64(r4)
+; CHECK-BE-NEXT:    dmxxextfdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-BE-NEXT:    stxv v5, 48(r4)
+; CHECK-BE-NEXT:    stxv v4, 32(r4)
+; CHECK-BE-NEXT:    stxv v3, 16(r4)
+; CHECK-BE-NEXT:    stxv v2, 0(r4)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = load <1024 x i1>, ptr %vqp, align 64
+  store <1024 x i1> %0, ptr %resp, align 64
+  ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()

``````````

</details>


https://github.com/llvm/llvm-project/pull/126969


More information about the llvm-commits mailing list