[llvm] [PowerPC] Add code to spill and restore DMRp registers (PR #142443)

Lei Huang via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 2 12:21:42 PDT 2025


https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/142443

>From bbf720f7d57b34fc2ba763866262a78092981090 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 29 May 2025 08:59:22 -0500
Subject: [PATCH 1/8] reorder insert/extract to utilize less registers

---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 20 ++++++------
 llvm/test/CodeGen/PowerPC/dmr-spill.ll      | 36 ++++++++++-----------
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 45183af0b7984..a396164b997f8 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1530,10 +1530,6 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
   Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
   Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
 
-  BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2)
-      .addDef(VSRpReg3)
-      .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi));
-
   BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0)
       .addDef(VSRpReg1)
       .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_lo));
@@ -1544,6 +1540,11 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
   addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
                         .addReg(VSRpReg1, RegState::Kill),
                     FrameIndex, IsLittleEndian ? 64 : 32);
+
+  BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2)
+      .addDef(VSRpReg3)
+      .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi));
+
   addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
                         .addReg(VSRpReg2, RegState::Kill),
                     FrameIndex, IsLittleEndian ? 32 : 64);
@@ -1578,6 +1579,12 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
                     FrameIndex, IsLittleEndian ? 96 : 0);
   addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
                     FrameIndex, IsLittleEndian ? 64 : 32);
+
+  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
+          TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
+      .addReg(VSRpReg0, RegState::Kill)
+      .addReg(VSRpReg1, RegState::Kill);
+
   addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
                     FrameIndex, IsLittleEndian ? 32 : 64);
   addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
@@ -1589,11 +1596,6 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
       .addReg(VSRpReg2, RegState::Kill)
       .addReg(VSRpReg3, RegState::Kill);
 
-  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
-          TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
-      .addReg(VSRpReg0, RegState::Kill)
-      .addReg(VSRpReg1, RegState::Kill);
-
   // Discard the pseudo instruction.
   MBB.erase(II);
 }
diff --git a/llvm/test/CodeGen/PowerPC/dmr-spill.ll b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
index b224643a6dd9f..afd3ab0dee1b1 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
@@ -30,19 +30,19 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
 ; CHECK-NEXT:    lxv v3, 0(r4)
 ; CHECK-NEXT:    lxv vs0, 0(r5)
 ; CHECK-NEXT:    dmxvbf16gerx2pp dmr0, vsp34, vs0
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-NEXT:    stxvp vsp36, 128(r1)
+; CHECK-NEXT:    stxvp vsp34, 96(r1)
 ; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
-; CHECK-NEXT:    dmxxextfdmr512 vsp38, vsp32, wacc0, 0
-; CHECK-NEXT:    stxvp vsp38, 128(r1)
-; CHECK-NEXT:    stxvp vsp32, 96(r1)
 ; CHECK-NEXT:    stxvp vsp36, 64(r1)
 ; CHECK-NEXT:    stxvp vsp34, 32(r1)
 ; CHECK-NEXT:    bl dummy_func at notoc
 ; CHECK-NEXT:    lxvp vsp34, 128(r1)
 ; CHECK-NEXT:    lxvp vsp36, 96(r1)
-; CHECK-NEXT:    lxvp vsp32, 64(r1)
-; CHECK-NEXT:    lxvp vsp38, 32(r1)
-; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
 ; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    lxvp vsp34, 64(r1)
+; CHECK-NEXT:    lxvp vsp36, 32(r1)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
 ; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
 ; CHECK-NEXT:    stxvp vsp34, 96(r30)
 ; CHECK-NEXT:    stxvp vsp36, 64(r30)
@@ -72,20 +72,20 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
 ; AIX-NEXT:    lxv v3, 16(r4)
 ; AIX-NEXT:    lxv vs0, 0(r5)
 ; AIX-NEXT:    dmxvbf16gerx2pp dmr0, vsp34, vs0
+; AIX-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; AIX-NEXT:    stxvp vsp36, 112(r1)
+; AIX-NEXT:    stxvp vsp34, 144(r1)
 ; AIX-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
-; AIX-NEXT:    dmxxextfdmr512 vsp38, vsp32, wacc0, 0
-; AIX-NEXT:    stxvp vsp38, 112(r1)
-; AIX-NEXT:    stxvp vsp32, 144(r1)
 ; AIX-NEXT:    stxvp vsp36, 176(r1)
 ; AIX-NEXT:    stxvp vsp34, 208(r1)
 ; AIX-NEXT:    bl .dummy_func[PR]
 ; AIX-NEXT:    nop
 ; AIX-NEXT:    lxvp vsp34, 112(r1)
 ; AIX-NEXT:    lxvp vsp36, 144(r1)
-; AIX-NEXT:    lxvp vsp32, 176(r1)
-; AIX-NEXT:    lxvp vsp38, 208(r1)
-; AIX-NEXT:    dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
 ; AIX-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX-NEXT:    lxvp vsp34, 176(r1)
+; AIX-NEXT:    lxvp vsp36, 208(r1)
+; AIX-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
 ; AIX-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; AIX-NEXT:    stxvp vsp36, 96(r31)
 ; AIX-NEXT:    stxvp vsp34, 64(r31)
@@ -115,20 +115,20 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
 ; AIX32-NEXT:    lxv v3, 16(r4)
 ; AIX32-NEXT:    lxv vs0, 0(r5)
 ; AIX32-NEXT:    dmxvbf16gerx2pp dmr0, vsp34, vs0
+; AIX32-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; AIX32-NEXT:    stxvp vsp36, 64(r1)
+; AIX32-NEXT:    stxvp vsp34, 96(r1)
 ; AIX32-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
-; AIX32-NEXT:    dmxxextfdmr512 vsp38, vsp32, wacc0, 0
-; AIX32-NEXT:    stxvp vsp38, 64(r1)
-; AIX32-NEXT:    stxvp vsp32, 96(r1)
 ; AIX32-NEXT:    stxvp vsp36, 128(r1)
 ; AIX32-NEXT:    stxvp vsp34, 160(r1)
 ; AIX32-NEXT:    bl .dummy_func[PR]
 ; AIX32-NEXT:    nop
 ; AIX32-NEXT:    lxvp vsp34, 64(r1)
 ; AIX32-NEXT:    lxvp vsp36, 96(r1)
-; AIX32-NEXT:    lxvp vsp32, 128(r1)
-; AIX32-NEXT:    lxvp vsp38, 160(r1)
-; AIX32-NEXT:    dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
 ; AIX32-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX32-NEXT:    lxvp vsp34, 128(r1)
+; AIX32-NEXT:    lxvp vsp36, 160(r1)
+; AIX32-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
 ; AIX32-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
 ; AIX32-NEXT:    stxvp vsp36, 96(r31)
 ; AIX32-NEXT:    stxvp vsp34, 64(r31)

>From 0c3c65945e31e24680353f3e7829198bd98b5bd6 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 28 May 2025 14:39:00 -0500
Subject: [PATCH 2/8] initial frameworkd

---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp    |  2 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.h      |  9 ++++
 llvm/lib/Target/PowerPC/PPCInstrMMA.td      |  4 ++
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 51 +++++++++++++++++++++
 llvm/lib/Target/PowerPC/PPCRegisterInfo.h   |  4 ++
 5 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 57c86d9e5de64..7c1550e99bae1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1926,7 +1926,7 @@ unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
   } else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) {
     llvm_unreachable("TODO: Implement spill DMRROWp regclass!");
   } else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) {
-    llvm_unreachable("TODO: Implement spill DMRp regclass!");
+    OpcodeIndex = SOK_DMRpSpill;
   } else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) {
     OpcodeIndex = SOK_DMRSpill;
   } else {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index a27b5718ec89d..7931a9e3ae131 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -81,6 +81,7 @@ enum SpillOpcodeKey {
   SOK_AccumulatorSpill,
   SOK_UAccumulatorSpill,
   SOK_WAccumulatorSpill,
+  SOK_DMRpSpill,
   SOK_DMRSpill,
   SOK_SPESpill,
   SOK_PairedG8Spill,
@@ -119,6 +120,7 @@ enum PPCMachineCombinerPattern : unsigned {
    NoInstr,                                                                    \
    NoInstr,                                                                    \
    NoInstr,                                                                    \
+   NoInstr,                                                                    \
    PPC::EVLDD,                                                                 \
    PPC::RESTORE_QUADWORD}
 
@@ -140,6 +142,7 @@ enum PPCMachineCombinerPattern : unsigned {
    NoInstr,                                                                    \
    NoInstr,                                                                    \
    NoInstr,                                                                    \
+   NoInstr,                                                                    \
    PPC::RESTORE_QUADWORD}
 
 #define Pwr10LoadOpcodes                                                       \
@@ -160,6 +163,7 @@ enum PPCMachineCombinerPattern : unsigned {
    NoInstr,                                                                    \
    NoInstr,                                                                    \
    NoInstr,                                                                    \
+   NoInstr,                                                                    \
    PPC::RESTORE_QUADWORD}
 
 #define FutureLoadOpcodes                                                      \
@@ -178,6 +182,7 @@ enum PPCMachineCombinerPattern : unsigned {
    PPC::RESTORE_ACC,                                                           \
    PPC::RESTORE_UACC,                                                          \
    PPC::RESTORE_WACC,                                                          \
+   PPC::RESTORE_DMRP,                                                          \
    PPC::RESTORE_DMR,                                                           \
    NoInstr,                                                                    \
    PPC::RESTORE_QUADWORD}
@@ -199,6 +204,7 @@ enum PPCMachineCombinerPattern : unsigned {
    NoInstr,                                                                    \
    NoInstr,                                                                    \
    NoInstr,                                                                    \
+   NoInstr,                                                                    \
    PPC::EVSTDD,                                                                \
    PPC::SPILL_QUADWORD}
 
@@ -220,6 +226,7 @@ enum PPCMachineCombinerPattern : unsigned {
    NoInstr,                                                                    \
    NoInstr,                                                                    \
    NoInstr,                                                                    \
+   NoInstr,                                                                    \
    PPC::SPILL_QUADWORD}
 
 #define Pwr10StoreOpcodes                                                      \
@@ -240,6 +247,7 @@ enum PPCMachineCombinerPattern : unsigned {
    NoInstr,                                                                    \
    NoInstr,                                                                    \
    NoInstr,                                                                    \
+   NoInstr,                                                                    \
    PPC::SPILL_QUADWORD}
 
 #define FutureStoreOpcodes                                                     \
@@ -258,6 +266,7 @@ enum PPCMachineCombinerPattern : unsigned {
    PPC::SPILL_ACC,                                                             \
    PPC::SPILL_UACC,                                                            \
    PPC::SPILL_WACC,                                                            \
+   PPC::SPILL_DMRP,                                                            \
    PPC::SPILL_DMR,                                                             \
    NoInstr,                                                                    \
    PPC::SPILL_QUADWORD}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index 82e4a60e0a728..d5e367be4ba3f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -565,12 +565,16 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
   let mayStore = 1 in {
     def SPILL_WACC: PPCEmitTimePseudo<(outs), (ins wacc:$AT, memrix16:$dst),
                                       "#SPILL_WACC", []>;
+    def SPILL_DMRP: PPCEmitTimePseudo<(outs), (ins dmrprc:$AT, memrix16:$dst),
+                                      "#SPILL_DMRP", []>;
     def SPILL_DMR: PPCEmitTimePseudo<(outs), (ins dmr:$AT, memrix16:$dst),
                                       "#SPILL_DMR", []>;
   }
   let mayLoad = 1, hasSideEffects = 0 in {
     def RESTORE_WACC: PPCEmitTimePseudo<(outs wacc:$AT), (ins memrix16:$src),
                                         "#RESTORE_WACC", []>;
+    def RESTORE_DMRP: PPCEmitTimePseudo<(outs dmrprc:$AT), (ins memrix16:$src),
+                                        "#RESTORE_DMRP", []>;
     def RESTORE_DMR: PPCEmitTimePseudo<(outs dmr:$AT), (ins memrix16:$src),
                                         "#RESTORE_DMR", []>;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index a396164b997f8..0f5d0fcfdc5b7 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1556,6 +1556,49 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
+/// lowerDMRPRestore - Generate the code to restore the DMR register.
+void PPCRegisterInfo::lowerDMRPRestore(MachineBasicBlock::iterator II,
+                                      unsigned FrameIndex) const {
+  MachineInstr &MI = *II; // <DestReg> = RESTORE_WACC <offset>
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  bool IsLittleEndian = Subtarget.isLittleEndian();
+
+  const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
+  Register DestReg = MI.getOperand(0).getReg();
+
+  Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+  Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+  Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
+  Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
+
+  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
+                    FrameIndex, IsLittleEndian ? 96 : 0);
+  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
+                    FrameIndex, IsLittleEndian ? 64 : 32);
+  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
+                    FrameIndex, IsLittleEndian ? 32 : 64);
+  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
+                    FrameIndex, IsLittleEndian ? 0 : 96);
+
+  // Kill virtual registers (killedRegState::Killed).
+  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512_HI),
+          TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi))
+      .addReg(VSRpReg2, RegState::Kill)
+      .addReg(VSRpReg3, RegState::Kill);
+
+  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
+          TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
+      .addReg(VSRpReg0, RegState::Kill)
+      .addReg(VSRpReg1, RegState::Kill);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
 /// lowerDMRRestore - Generate the code to restore the DMR register.
 void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
                                       unsigned FrameIndex) const {
@@ -1762,6 +1805,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   case PPC::RESTORE_WACC:
     lowerWACCRestore(II, FrameIndex);
     return true;
+  case PPC::SPILL_DMRP:
+    break;
+    lowerDMRPSpilling(II, FrameIndex);
+    return true;
+  case PPC::RESTORE_DMRP:
+    break;
+    lowerDMRPRestore(II, FrameIndex);
+    return true;
   case PPC::SPILL_DMR:
     lowerDMRSpilling(II, FrameIndex);
     return true;
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 4b66ece534112..aaa15cdc3456f 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -151,6 +151,10 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
   void lowerQuadwordRestore(MachineBasicBlock::iterator II,
                             unsigned FrameIndex) const;
 
+  void lowerDMRPSpilling(MachineBasicBlock::iterator II,
+                         unsigned FrameIndex) const;
+  void lowerDMRPRestore(MachineBasicBlock::iterator II,
+                        unsigned FrameIndex) const;
   void lowerDMRSpilling(MachineBasicBlock::iterator II,
                         unsigned FrameIndex) const;
   void lowerDMRRestore(MachineBasicBlock::iterator II,

>From f4ad39846580ba534b4a2465d64a350e2af28643 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 29 May 2025 14:14:05 -0500
Subject: [PATCH 3/8] add code to spill and restore dmrp

---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 173 +++++++---------
 llvm/test/CodeGen/PowerPC/dmrp-spill.ll     | 213 ++++++++++++++++++++
 2 files changed, 286 insertions(+), 100 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/dmrp-spill.ll

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 0f5d0fcfdc5b7..dbf3c8153d261 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1523,77 +1523,45 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
   // DMR is made up of WACC and WACC_HI, so DMXXEXTFDMR512 to spill
   // the corresponding 512 bits.
   const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
-  Register SrcReg = MI.getOperand(0).getReg();
 
-  Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
+  auto spillDMR = [&](Register SrcReg, int BEIdx, int LEIdx) {
+    Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+    Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+    Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
+    Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
 
-  BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0)
-      .addDef(VSRpReg1)
-      .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_lo));
+    BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0)
+        .addDef(VSRpReg1)
+        .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_lo));
 
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
-                        .addReg(VSRpReg0, RegState::Kill),
-                    FrameIndex, IsLittleEndian ? 96 : 0);
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
-                        .addReg(VSRpReg1, RegState::Kill),
-                    FrameIndex, IsLittleEndian ? 64 : 32);
-
-  BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2)
-      .addDef(VSRpReg3)
-      .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi));
-
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
-                        .addReg(VSRpReg2, RegState::Kill),
-                    FrameIndex, IsLittleEndian ? 32 : 64);
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
-                        .addReg(VSRpReg3, RegState::Kill),
-                    FrameIndex, IsLittleEndian ? 0 : 96);
-
-  // Discard the pseudo instruction.
-  MBB.erase(II);
-}
-
-/// lowerDMRPRestore - Generate the code to restore the DMR register.
-void PPCRegisterInfo::lowerDMRPRestore(MachineBasicBlock::iterator II,
-                                      unsigned FrameIndex) const {
-  MachineInstr &MI = *II; // <DestReg> = RESTORE_WACC <offset>
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
-  DebugLoc DL = MI.getDebugLoc();
-  bool IsLittleEndian = Subtarget.isLittleEndian();
-
-  const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
-  Register DestReg = MI.getOperand(0).getReg();
+    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+                          .addReg(VSRpReg0, RegState::Kill),
+                      FrameIndex, IsLittleEndian ? LEIdx : BEIdx);
+    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+                          .addReg(VSRpReg1, RegState::Kill),
+                      FrameIndex, IsLittleEndian ? LEIdx - 32 : BEIdx + 32);
 
-  Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
+    BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2)
+        .addDef(VSRpReg3)
+        .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi));
 
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
-                    FrameIndex, IsLittleEndian ? 96 : 0);
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
-                    FrameIndex, IsLittleEndian ? 64 : 32);
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
-                    FrameIndex, IsLittleEndian ? 32 : 64);
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
-                    FrameIndex, IsLittleEndian ? 0 : 96);
-
-  // Kill virtual registers (killedRegState::Killed).
-  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512_HI),
-          TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi))
-      .addReg(VSRpReg2, RegState::Kill)
-      .addReg(VSRpReg3, RegState::Kill);
-
-  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
-          TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
-      .addReg(VSRpReg0, RegState::Kill)
-      .addReg(VSRpReg1, RegState::Kill);
+    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+                          .addReg(VSRpReg2, RegState::Kill),
+                      FrameIndex, IsLittleEndian ? LEIdx - 64 : BEIdx + 64);
+    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+                          .addReg(VSRpReg3, RegState::Kill),
+                      FrameIndex, IsLittleEndian ? BEIdx : LEIdx);
+  };
+
+  if (MI.getOpcode() == PPC::SPILL_DMRP) {
+    spillDMR(
+        TargetRegisterInfo::getSubReg(MI.getOperand(0).getReg(), PPC::sub_dmr1),
+        0, 96);
+    spillDMR(
+        TargetRegisterInfo::getSubReg(MI.getOperand(0).getReg(), PPC::sub_dmr0),
+        128, 224);
+  } else
+    spillDMR(MI.getOperand(0).getReg(), 0, 96);
 
   // Discard the pseudo instruction.
   MBB.erase(II);
@@ -1602,7 +1570,7 @@ void PPCRegisterInfo::lowerDMRPRestore(MachineBasicBlock::iterator II,
 /// lowerDMRRestore - Generate the code to restore the DMR register.
 void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
                                       unsigned FrameIndex) const {
-  MachineInstr &MI = *II; // <DestReg> = RESTORE_WACC <offset>
+  MachineInstr &MI = *II; // <DestReg> = RESTORE_DMR[P] <offset>
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
@@ -1611,33 +1579,44 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
   bool IsLittleEndian = Subtarget.isLittleEndian();
 
   const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
-  Register DestReg = MI.getOperand(0).getReg();
-
-  Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
-  Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
-
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
-                    FrameIndex, IsLittleEndian ? 96 : 0);
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
-                    FrameIndex, IsLittleEndian ? 64 : 32);
-
-  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
-          TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
-      .addReg(VSRpReg0, RegState::Kill)
-      .addReg(VSRpReg1, RegState::Kill);
 
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
-                    FrameIndex, IsLittleEndian ? 32 : 64);
-  addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
-                    FrameIndex, IsLittleEndian ? 0 : 96);
-
-  // Kill virtual registers (killedRegState::Killed).
-  BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512_HI),
-          TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi))
-      .addReg(VSRpReg2, RegState::Kill)
-      .addReg(VSRpReg3, RegState::Kill);
+  auto restoreDMR = [&](Register DestReg, int BEIdx, int LEIdx) {
+    Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+    Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+    Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
+    Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
+
+    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
+                      FrameIndex, IsLittleEndian ? LEIdx : BEIdx);
+    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
+                      FrameIndex, IsLittleEndian ? LEIdx - 32 : BEIdx + 32);
+
+    BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
+            TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
+        .addReg(VSRpReg0, RegState::Kill)
+        .addReg(VSRpReg1, RegState::Kill);
+
+    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
+                      FrameIndex, IsLittleEndian ? LEIdx - 64 : BEIdx + 64);
+    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
+                      FrameIndex, IsLittleEndian ? BEIdx : LEIdx);
+
+    // Kill virtual registers (killedRegState::Killed).
+    BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512_HI),
+            TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi))
+        .addReg(VSRpReg2, RegState::Kill)
+        .addReg(VSRpReg3, RegState::Kill);
+  };
+
+  if (MI.getOpcode() == PPC::RESTORE_DMRP) {
+    restoreDMR(
+        TargetRegisterInfo::getSubReg(MI.getOperand(0).getReg(), PPC::sub_dmr1),
+        0, 96);
+    restoreDMR(
+        TargetRegisterInfo::getSubReg(MI.getOperand(0).getReg(), PPC::sub_dmr0),
+        128, 224);
+  } else
+    restoreDMR(MI.getOperand(0).getReg(), 0, 96);
 
   // Discard the pseudo instruction.
   MBB.erase(II);
@@ -1806,16 +1785,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     lowerWACCRestore(II, FrameIndex);
     return true;
   case PPC::SPILL_DMRP:
-    break;
-    lowerDMRPSpilling(II, FrameIndex);
-    return true;
-  case PPC::RESTORE_DMRP:
-    break;
-    lowerDMRPRestore(II, FrameIndex);
-    return true;
   case PPC::SPILL_DMR:
     lowerDMRSpilling(II, FrameIndex);
     return true;
+  case PPC::RESTORE_DMRP:
   case PPC::RESTORE_DMR:
     lowerDMRRestore(II, FrameIndex);
     return true;
diff --git a/llvm/test/CodeGen/PowerPC/dmrp-spill.ll b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll
new file mode 100644
index 0000000000000..62d42d4a26d52
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32
+
+declare void @dummy_func()
+declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32)
+
+define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) nounwind {
+; CHECK-LABEL: test_dmsha3hash:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -304(r1)
+; CHECK-NEXT:    std r30, 288(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxvp vsp34, 0(r3)
+; CHECK-NEXT:    lxvp vsp36, 32(r3)
+; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r3)
+; CHECK-NEXT:    lxvp vsp36, 96(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT:    lxvp vsp34, 128(r3)
+; CHECK-NEXT:    lxvp vsp36, 160(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 192(r3)
+; CHECK-NEXT:    lxvp vsp36, 224(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    dmsha3hash dmrp0, 5
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc1, 0
+; CHECK-NEXT:    stxvp vsp36, 128(r1)
+; CHECK-NEXT:    stxvp vsp34, 96(r1)
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1
+; CHECK-NEXT:    stxvp vsp36, 64(r1)
+; CHECK-NEXT:    stxvp vsp34, 32(r1)
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-NEXT:    stxvp vsp36, 256(r1)
+; CHECK-NEXT:    stxvp vsp34, 224(r1)
+; CHECK-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp36, 192(r1)
+; CHECK-NEXT:    stxvp vsp34, 160(r1)
+; CHECK-NEXT:    bl dummy_func at notoc
+; CHECK-NEXT:    lxvp vsp34, 128(r1)
+; CHECK-NEXT:    lxvp vsp36, 96(r1)
+; CHECK-NEXT:    dmxxinstdmr512 wacc1, vsp34, vsp36, 0
+; CHECK-NEXT:    lxvp vsp34, 64(r1)
+; CHECK-NEXT:    lxvp vsp36, 32(r1)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1
+; CHECK-NEXT:    lxvp vsp34, 256(r1)
+; CHECK-NEXT:    lxvp vsp36, 224(r1)
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT:    lxvp vsp34, 192(r1)
+; CHECK-NEXT:    lxvp vsp36, 160(r1)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 224(r30)
+; CHECK-NEXT:    stxvp vsp36, 192(r30)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 160(r30)
+; CHECK-NEXT:    stxvp vsp36, 128(r30)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r30)
+; CHECK-NEXT:    stxvp vsp36, 64(r30)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r30)
+; CHECK-NEXT:    stxvp vsp36, 0(r30)
+; CHECK-NEXT:    ld r30, 288(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 304
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; AIX-LABEL: test_dmsha3hash:
+; AIX:       # %bb.0: # %entry
+; AIX-NEXT:    mflr r0
+; AIX-NEXT:    std r0, 16(r1)
+; AIX-NEXT:    stdu r1, -384(r1)
+; AIX-NEXT:    std r31, 376(r1) # 8-byte Folded Spill
+; AIX-NEXT:    lxvp vsp34, 224(r3)
+; AIX-NEXT:    lxvp vsp36, 192(r3)
+; AIX-NEXT:    mr r31, r4
+; AIX-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; AIX-NEXT:    lxvp vsp34, 160(r3)
+; AIX-NEXT:    lxvp vsp36, 128(r3)
+; AIX-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; AIX-NEXT:    lxvp vsp34, 96(r3)
+; AIX-NEXT:    lxvp vsp36, 64(r3)
+; AIX-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; AIX-NEXT:    lxvp vsp34, 32(r3)
+; AIX-NEXT:    lxvp vsp36, 0(r3)
+; AIX-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; AIX-NEXT:    dmsha3hash dmrp0, 5
+; AIX-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc1, 0
+; AIX-NEXT:    stxvp vsp36, 112(r1)
+; AIX-NEXT:    stxvp vsp34, 144(r1)
+; AIX-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1
+; AIX-NEXT:    stxvp vsp36, 176(r1)
+; AIX-NEXT:    stxvp vsp34, 208(r1)
+; AIX-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; AIX-NEXT:    stxvp vsp36, 240(r1)
+; AIX-NEXT:    stxvp vsp34, 272(r1)
+; AIX-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; AIX-NEXT:    stxvp vsp36, 304(r1)
+; AIX-NEXT:    stxvp vsp34, 336(r1)
+; AIX-NEXT:    bl .dummy_func[PR]
+; AIX-NEXT:    nop
+; AIX-NEXT:    lxvp vsp34, 112(r1)
+; AIX-NEXT:    lxvp vsp36, 144(r1)
+; AIX-NEXT:    dmxxinstdmr512 wacc1, vsp34, vsp36, 0
+; AIX-NEXT:    lxvp vsp34, 176(r1)
+; AIX-NEXT:    lxvp vsp36, 208(r1)
+; AIX-NEXT:    dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1
+; AIX-NEXT:    lxvp vsp34, 240(r1)
+; AIX-NEXT:    lxvp vsp36, 272(r1)
+; AIX-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX-NEXT:    lxvp vsp34, 304(r1)
+; AIX-NEXT:    lxvp vsp36, 336(r1)
+; AIX-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; AIX-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; AIX-NEXT:    stxvp vsp36, 224(r31)
+; AIX-NEXT:    stxvp vsp34, 192(r31)
+; AIX-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; AIX-NEXT:    stxvp vsp36, 160(r31)
+; AIX-NEXT:    stxvp vsp34, 128(r31)
+; AIX-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; AIX-NEXT:    stxvp vsp36, 96(r31)
+; AIX-NEXT:    stxvp vsp34, 64(r31)
+; AIX-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; AIX-NEXT:    stxvp vsp36, 32(r31)
+; AIX-NEXT:    stxvp vsp34, 0(r31)
+; AIX-NEXT:    ld r31, 376(r1) # 8-byte Folded Reload
+; AIX-NEXT:    addi r1, r1, 384
+; AIX-NEXT:    ld r0, 16(r1)
+; AIX-NEXT:    mtlr r0
+; AIX-NEXT:    blr
+;
+; AIX32-LABEL: test_dmsha3hash:
+; AIX32:       # %bb.0: # %entry
+; AIX32-NEXT:    mflr r0
+; AIX32-NEXT:    stw r0, 8(r1)
+; AIX32-NEXT:    stwu r1, -336(r1)
+; AIX32-NEXT:    stw r31, 332(r1) # 4-byte Folded Spill
+; AIX32-NEXT:    lxvp vsp34, 224(r3)
+; AIX32-NEXT:    lxvp vsp36, 192(r3)
+; AIX32-NEXT:    mr r31, r4
+; AIX32-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; AIX32-NEXT:    lxvp vsp34, 160(r3)
+; AIX32-NEXT:    lxvp vsp36, 128(r3)
+; AIX32-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; AIX32-NEXT:    lxvp vsp34, 96(r3)
+; AIX32-NEXT:    lxvp vsp36, 64(r3)
+; AIX32-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; AIX32-NEXT:    lxvp vsp34, 32(r3)
+; AIX32-NEXT:    lxvp vsp36, 0(r3)
+; AIX32-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; AIX32-NEXT:    dmsha3hash dmrp0, 5
+; AIX32-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc1, 0
+; AIX32-NEXT:    stxvp vsp36, 64(r1)
+; AIX32-NEXT:    stxvp vsp34, 96(r1)
+; AIX32-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1
+; AIX32-NEXT:    stxvp vsp36, 128(r1)
+; AIX32-NEXT:    stxvp vsp34, 160(r1)
+; AIX32-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; AIX32-NEXT:    stxvp vsp36, 192(r1)
+; AIX32-NEXT:    stxvp vsp34, 224(r1)
+; AIX32-NEXT:    dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; AIX32-NEXT:    stxvp vsp36, 256(r1)
+; AIX32-NEXT:    stxvp vsp34, 288(r1)
+; AIX32-NEXT:    bl .dummy_func[PR]
+; AIX32-NEXT:    nop
+; AIX32-NEXT:    lxvp vsp34, 64(r1)
+; AIX32-NEXT:    lxvp vsp36, 96(r1)
+; AIX32-NEXT:    dmxxinstdmr512 wacc1, vsp34, vsp36, 0
+; AIX32-NEXT:    lxvp vsp34, 128(r1)
+; AIX32-NEXT:    lxvp vsp36, 160(r1)
+; AIX32-NEXT:    dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1
+; AIX32-NEXT:    lxvp vsp34, 192(r1)
+; AIX32-NEXT:    lxvp vsp36, 224(r1)
+; AIX32-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX32-NEXT:    lxvp vsp34, 256(r1)
+; AIX32-NEXT:    lxvp vsp36, 288(r1)
+; AIX32-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; AIX32-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; AIX32-NEXT:    stxvp vsp36, 224(r31)
+; AIX32-NEXT:    stxvp vsp34, 192(r31)
+; AIX32-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; AIX32-NEXT:    stxvp vsp36, 160(r31)
+; AIX32-NEXT:    stxvp vsp34, 128(r31)
+; AIX32-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; AIX32-NEXT:    stxvp vsp36, 96(r31)
+; AIX32-NEXT:    stxvp vsp34, 64(r31)
+; AIX32-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; AIX32-NEXT:    stxvp vsp36, 32(r31)
+; AIX32-NEXT:    stxvp vsp34, 0(r31)
+; AIX32-NEXT:    lwz r31, 332(r1) # 4-byte Folded Reload
+; AIX32-NEXT:    addi r1, r1, 336
+; AIX32-NEXT:    lwz r0, 8(r1)
+; AIX32-NEXT:    mtlr r0
+; AIX32-NEXT:    blr
+  entry:
+    %0 = load <2048 x i1>, ptr %vopp, align 64
+    %2 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5)
+    tail call void @dummy_func()
+    %3 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5)
+    store <2048 x i1> %2, ptr %resp, align 64
+    ret void
+}

>From 7aa8a197a35ea74d48a7e5e381937f13ddd7e03a Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 29 May 2025 14:38:16 -0500
Subject: [PATCH 4/8] clean up dmrstore

---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 42 +++++++++------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index dbf3c8153d261..6e2d19b1c2774 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1581,31 +1581,23 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
   const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
 
   auto restoreDMR = [&](Register DestReg, int BEIdx, int LEIdx) {
-    Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
-    Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
-    Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
-    Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
-
-    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
-                      FrameIndex, IsLittleEndian ? LEIdx : BEIdx);
-    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
-                      FrameIndex, IsLittleEndian ? LEIdx - 32 : BEIdx + 32);
-
-    BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
-            TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
-        .addReg(VSRpReg0, RegState::Kill)
-        .addReg(VSRpReg1, RegState::Kill);
-
-    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
-                      FrameIndex, IsLittleEndian ? LEIdx - 64 : BEIdx + 64);
-    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
-                      FrameIndex, IsLittleEndian ? BEIdx : LEIdx);
-
-    // Kill virtual registers (killedRegState::Killed).
-    BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512_HI),
-            TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi))
-        .addReg(VSRpReg2, RegState::Kill)
-        .addReg(VSRpReg3, RegState::Kill);
+    auto restoreWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE, int IdxLE) {
+      Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+      Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+
+      addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
+                        FrameIndex, IsLittleEndian ? IdxLE: IdxBE);
+      addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
+                        FrameIndex, IsLittleEndian ? IdxLE - 32 : IdxBE + 32);
+
+      // Kill virtual registers (killedRegState::Killed).
+      BuildMI(MBB, II, DL, TII.get(Opc),
+              TargetRegisterInfo::getSubReg(DestReg, RegIdx))
+          .addReg(VSRpReg0, RegState::Kill)
+          .addReg(VSRpReg1, RegState::Kill);
+    };
+    restoreWACC(PPC::DMXXINSTDMR512, PPC::sub_wacc_lo, BEIdx, LEIdx);
+    restoreWACC(PPC::DMXXINSTDMR512_HI, PPC::sub_wacc_hi, BEIdx+64, LEIdx-64);
   };
 
   if (MI.getOpcode() == PPC::RESTORE_DMRP) {

>From 59a447eedf6d0300ac89fbe77ed4da38feacc021 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 29 May 2025 14:55:12 -0500
Subject: [PATCH 5/8] cleanup dmr spill code

---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 69 +++++++++------------
 1 file changed, 28 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 6e2d19b1c2774..60bbbfcfe1cbf 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1525,43 +1525,31 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
   const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
 
   auto spillDMR = [&](Register SrcReg, int BEIdx, int LEIdx) {
-    Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
-    Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
-    Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
-    Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
-
-    BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0)
-        .addDef(VSRpReg1)
-        .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_lo));
-
-    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
-                          .addReg(VSRpReg0, RegState::Kill),
-                      FrameIndex, IsLittleEndian ? LEIdx : BEIdx);
-    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
-                          .addReg(VSRpReg1, RegState::Kill),
-                      FrameIndex, IsLittleEndian ? LEIdx - 32 : BEIdx + 32);
+    auto spillWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE, int IdxLE) {
+      Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+      Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
 
-    BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2)
-        .addDef(VSRpReg3)
-        .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi));
+      BuildMI(MBB, II, DL, TII.get(Opc), VSRpReg0)
+          .addDef(VSRpReg1)
+          .addReg(TargetRegisterInfo::getSubReg(SrcReg, RegIdx));
 
-    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
-                          .addReg(VSRpReg2, RegState::Kill),
-                      FrameIndex, IsLittleEndian ? LEIdx - 64 : BEIdx + 64);
-    addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
-                          .addReg(VSRpReg3, RegState::Kill),
-                      FrameIndex, IsLittleEndian ? BEIdx : LEIdx);
+      addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+                            .addReg(VSRpReg0, RegState::Kill),
+                        FrameIndex, IsLittleEndian ? IdxLE : IdxBE);
+      addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+                            .addReg(VSRpReg1, RegState::Kill),
+                        FrameIndex, IsLittleEndian ? IdxLE - 32 : IdxBE + 32);
+    };
+    spillWACC(PPC::DMXXEXTFDMR512, PPC::sub_wacc_lo, BEIdx, LEIdx);
+    spillWACC(PPC::DMXXEXTFDMR512_HI, PPC::sub_wacc_hi, BEIdx + 64, LEIdx - 64);
   };
 
+  Register SrcReg = MI.getOperand(0).getReg();
   if (MI.getOpcode() == PPC::SPILL_DMRP) {
-    spillDMR(
-        TargetRegisterInfo::getSubReg(MI.getOperand(0).getReg(), PPC::sub_dmr1),
-        0, 96);
-    spillDMR(
-        TargetRegisterInfo::getSubReg(MI.getOperand(0).getReg(), PPC::sub_dmr0),
-        128, 224);
+    spillDMR(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_dmr1), 0, 96);
+    spillDMR(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_dmr0), 128, 224);
   } else
-    spillDMR(MI.getOperand(0).getReg(), 0, 96);
+    spillDMR(SrcReg, 0, 96);
 
   // Discard the pseudo instruction.
   MBB.erase(II);
@@ -1581,12 +1569,13 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
   const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
 
   auto restoreDMR = [&](Register DestReg, int BEIdx, int LEIdx) {
-    auto restoreWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE, int IdxLE) {
+    auto restoreWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE,
+                           int IdxLE) {
       Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
       Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
 
       addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
-                        FrameIndex, IsLittleEndian ? IdxLE: IdxBE);
+                        FrameIndex, IsLittleEndian ? IdxLE : IdxBE);
       addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
                         FrameIndex, IsLittleEndian ? IdxLE - 32 : IdxBE + 32);
 
@@ -1597,18 +1586,16 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
           .addReg(VSRpReg1, RegState::Kill);
     };
     restoreWACC(PPC::DMXXINSTDMR512, PPC::sub_wacc_lo, BEIdx, LEIdx);
-    restoreWACC(PPC::DMXXINSTDMR512_HI, PPC::sub_wacc_hi, BEIdx+64, LEIdx-64);
+    restoreWACC(PPC::DMXXINSTDMR512_HI, PPC::sub_wacc_hi, BEIdx + 64,
+                LEIdx - 64);
   };
 
+  Register DestReg = MI.getOperand(0).getReg();
   if (MI.getOpcode() == PPC::RESTORE_DMRP) {
-    restoreDMR(
-        TargetRegisterInfo::getSubReg(MI.getOperand(0).getReg(), PPC::sub_dmr1),
-        0, 96);
-    restoreDMR(
-        TargetRegisterInfo::getSubReg(MI.getOperand(0).getReg(), PPC::sub_dmr0),
-        128, 224);
+    restoreDMR(TargetRegisterInfo::getSubReg(DestReg, PPC::sub_dmr1), 0, 96);
+    restoreDMR(TargetRegisterInfo::getSubReg(DestReg, PPC::sub_dmr0), 128, 224);
   } else
-    restoreDMR(MI.getOperand(0).getReg(), 0, 96);
+    restoreDMR(DestReg, 0, 96);
 
   // Discard the pseudo instruction.
   MBB.erase(II);

>From 1f52b64dfc8e0173e612a8b0b2b23e80304a5a64 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 29 May 2025 14:57:24 -0500
Subject: [PATCH 6/8] remove header for functions no longer needed

---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index aaa15cdc3456f..4b66ece534112 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -151,10 +151,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
   void lowerQuadwordRestore(MachineBasicBlock::iterator II,
                             unsigned FrameIndex) const;
 
-  void lowerDMRPSpilling(MachineBasicBlock::iterator II,
-                         unsigned FrameIndex) const;
-  void lowerDMRPRestore(MachineBasicBlock::iterator II,
-                        unsigned FrameIndex) const;
   void lowerDMRSpilling(MachineBasicBlock::iterator II,
                         unsigned FrameIndex) const;
   void lowerDMRRestore(MachineBasicBlock::iterator II,

>From 9ef357f26dd0af0c14c60c647600f7cf0905edc3 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Mon, 2 Jun 2025 12:32:43 -0500
Subject: [PATCH 7/8] fix spacing

---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 60bbbfcfe1cbf..e2c73e74c0ead 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1523,7 +1523,6 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
   // DMR is made up of WACC and WACC_HI, so DMXXEXTFDMR512 to spill
   // the corresponding 512 bits.
   const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
-
   auto spillDMR = [&](Register SrcReg, int BEIdx, int LEIdx) {
     auto spillWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE, int IdxLE) {
       Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
@@ -1567,7 +1566,6 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
   bool IsLittleEndian = Subtarget.isLittleEndian();
 
   const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
-
   auto restoreDMR = [&](Register DestReg, int BEIdx, int LEIdx) {
     auto restoreWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE,
                            int IdxLE) {

>From e62e44bddec9cee4459053c8be2759bd084c4337 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Mon, 2 Jun 2025 14:21:28 -0500
Subject: [PATCH 8/8] update name class change

---
 llvm/lib/Target/PowerPC/PPCInstrMMA.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index d5e367be4ba3f..436715a0e4ab1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -565,7 +565,7 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
   let mayStore = 1 in {
     def SPILL_WACC: PPCEmitTimePseudo<(outs), (ins wacc:$AT, memrix16:$dst),
                                       "#SPILL_WACC", []>;
-    def SPILL_DMRP: PPCEmitTimePseudo<(outs), (ins dmrprc:$AT, memrix16:$dst),
+    def SPILL_DMRP: PPCEmitTimePseudo<(outs), (ins dmrp:$AT, memrix16:$dst),
                                       "#SPILL_DMRP", []>;
     def SPILL_DMR: PPCEmitTimePseudo<(outs), (ins dmr:$AT, memrix16:$dst),
                                       "#SPILL_DMR", []>;
@@ -573,7 +573,7 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
   let mayLoad = 1, hasSideEffects = 0 in {
     def RESTORE_WACC: PPCEmitTimePseudo<(outs wacc:$AT), (ins memrix16:$src),
                                         "#RESTORE_WACC", []>;
-    def RESTORE_DMRP: PPCEmitTimePseudo<(outs dmrprc:$AT), (ins memrix16:$src),
+    def RESTORE_DMRP: PPCEmitTimePseudo<(outs dmrp:$AT), (ins memrix16:$src),
                                         "#RESTORE_DMRP", []>;
     def RESTORE_DMR: PPCEmitTimePseudo<(outs dmr:$AT), (ins memrix16:$src),
                                         "#RESTORE_DMR", []>;



More information about the llvm-commits mailing list