[llvm] [PowerPC] Spill and restore DMR register (PR #141530)
Lei Huang via llvm-commits
llvm-commits at lists.llvm.org
Mon May 26 13:47:17 PDT 2025
https://github.com/lei137 created https://github.com/llvm/llvm-project/pull/141530
Add spilling and restoring of DMR registers.
>From d9fe53412c07b7fb7e78de9d21a043b1d8b9a487 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Mon, 26 May 2025 15:31:56 -0500
Subject: [PATCH] PowerPC: Spill DMR register
---
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 8 ++
llvm/lib/Target/PowerPC/PPCInstrInfo.h | 22 +--
llvm/lib/Target/PowerPC/PPCInstrMMA.td | 4 +
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 97 +++++++++++++
llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 5 +
llvm/test/CodeGen/PowerPC/dmr-spill.ll | 151 ++++++++++++++++++++
6 files changed, 277 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/dmr-spill.ll
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 4d4a3efd1098e..2b71606f92eea 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1921,6 +1921,14 @@ unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
OpcodeIndex = SOK_PairedVecSpill;
} else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_PairedG8Spill;
+ } else if (PPC::DMRROWRCRegClass.hasSubClassEq(RC)) {
+ llvm_unreachable("TODO: Implement spill DMRROW regclass!");
+ } else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) {
+ llvm_unreachable("TODO: Implement spill DMRROWp regclass!");
+ } else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) {
+ llvm_unreachable("TODO: Implement spill DMRp regclass!");
+ } else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) {
+ OpcodeIndex = SOK_DMRSpill;
} else {
llvm_unreachable("Unknown regclass!");
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 83bc39e0c06bd..1c7d0f2e91e77 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -81,6 +81,7 @@ enum SpillOpcodeKey {
SOK_AccumulatorSpill,
SOK_UAccumulatorSpill,
SOK_WAccumulatorSpill,
+ SOK_DMRSpill,
SOK_SPESpill,
SOK_PairedG8Spill,
SOK_LastOpcodeSpill // This must be last on the enum.
@@ -105,8 +106,8 @@ enum PPCMachineCombinerPattern : unsigned {
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
- PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr, PPC::EVLDD, \
- PPC::RESTORE_QUADWORD \
+ PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr, NoInstr, \
+ PPC::EVLDD, PPC::RESTORE_QUADWORD \
}
#define Pwr9LoadOpcodes \
@@ -114,7 +115,7 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, \
- NoInstr, NoInstr, PPC::RESTORE_QUADWORD \
+ NoInstr, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \
}
#define Pwr10LoadOpcodes \
@@ -122,7 +123,7 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \
- PPC::RESTORE_UACC, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \
+ PPC::RESTORE_UACC, NoInstr, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \
}
#define FutureLoadOpcodes \
@@ -130,15 +131,16 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \
- PPC::RESTORE_UACC, PPC::RESTORE_WACC, NoInstr, PPC::RESTORE_QUADWORD \
+ PPC::RESTORE_UACC, PPC::RESTORE_WACC, PPC::RESTORE_DMR, NoInstr, \
+ PPC::RESTORE_QUADWORD \
}
#define Pwr8StoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \
- PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr, PPC::EVSTDD, \
- PPC::SPILL_QUADWORD \
+ PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr, NoInstr, \
+ PPC::EVSTDD, PPC::SPILL_QUADWORD \
}
#define Pwr9StoreOpcodes \
@@ -146,7 +148,7 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr, NoInstr, \
- PPC::SPILL_QUADWORD \
+ NoInstr, PPC::SPILL_QUADWORD \
}
#define Pwr10StoreOpcodes \
@@ -154,7 +156,7 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \
- NoInstr, NoInstr, PPC::SPILL_QUADWORD \
+ NoInstr, NoInstr, NoInstr, PPC::SPILL_QUADWORD \
}
#define FutureStoreOpcodes \
@@ -162,7 +164,7 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \
- PPC::SPILL_WACC, NoInstr, PPC::SPILL_QUADWORD \
+ PPC::SPILL_WACC, PPC::SPILL_DMR, NoInstr, PPC::SPILL_QUADWORD \
}
// Initialize arrays for load and store spill opcodes on supported subtargets.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index 23b951871d5f4..a424ac0352e19 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -565,10 +565,14 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
let mayStore = 1 in {
def SPILL_WACC: PPCEmitTimePseudo<(outs), (ins wacc:$AT, memrix16:$dst),
"#SPILL_WACC", []>;
+ def SPILL_DMR: PPCEmitTimePseudo<(outs), (ins dmr:$AT, memrix16:$dst),
+ "#SPILL_DMR", []>;
}
let mayLoad = 1, hasSideEffects = 0 in {
def RESTORE_WACC: PPCEmitTimePseudo<(outs wacc:$AT), (ins memrix16:$src),
"#RESTORE_WACC", []>;
+ def RESTORE_DMR: PPCEmitTimePseudo<(outs dmr:$AT), (ins memrix16:$src),
+ "#RESTORE_DMR", []>;
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 51902ad218d1c..ed760b5d56a09 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1509,6 +1509,97 @@ void PPCRegisterInfo::lowerQuadwordRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
+/// lowerDMRSpilling - Generate the code for spilling the DMR register.
+void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ MachineInstr &MI = *II; // SPILL_DMR <SrcReg>, <offset>
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ bool IsLittleEndian = Subtarget.isLittleEndian();
+
+ // DMR is made up of WACC and WACC_HI, so DMXXEXTFDMR512 to spill
+ // the corresponding 512 bits.
+ const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
+ Register SrcReg = MI.getOperand(0).getReg();
+
+ Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+ Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+ Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
+ Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
+
+ BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2)
+ .addDef(VSRpReg3)
+ .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi));
+
+ BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0)
+ .addDef(VSRpReg1)
+ .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_lo));
+
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(VSRpReg0, RegState::Kill),
+ FrameIndex, IsLittleEndian ? 96 : 0);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(VSRpReg1, RegState::Kill),
+ FrameIndex, IsLittleEndian ? 64 : 32);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(VSRpReg2, RegState::Kill),
+ FrameIndex, IsLittleEndian ? 32 : 64);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(VSRpReg3, RegState::Kill),
+ FrameIndex, IsLittleEndian ? 0 : 96);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+/// lowerDMRRestore - Generate the code to restore the DMR register.
+void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ MachineInstr &MI = *II; // <DestReg> = RESTORE_WACC <offset>
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ bool IsLittleEndian = Subtarget.isLittleEndian();
+
+ const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
+ Register DestReg = MI.getOperand(0).getReg();
+
+ Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+ Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+ Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
+ Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
+
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
+ FrameIndex, IsLittleEndian ? 96 : 0);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
+ FrameIndex, IsLittleEndian ? 64 : 32);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
+ FrameIndex, IsLittleEndian ? 32 : 64);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
+ FrameIndex, IsLittleEndian ? 0 : 96);
+
+ // Kill virtual registers (killedRegState::Killed).
+ BuildMI(MBB, II, DL,
+ TII.get(PPC::DMXXINSTDMR512_HI),
+ TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi))
+ .addReg(VSRpReg2, RegState::Kill)
+ .addReg(VSRpReg3, RegState::Kill);
+
+ BuildMI(MBB, II, DL,
+ TII.get(PPC::DMXXINSTDMR512),
+ TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
+ .addReg(VSRpReg0, RegState::Kill)
+ .addReg(VSRpReg1, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
Register Reg, int &FrameIdx) const {
// For the nonvolatile condition registers (CR2, CR3, CR4) return true to
@@ -1671,6 +1762,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case PPC::RESTORE_WACC:
lowerWACCRestore(II, FrameIndex);
return true;
+ case PPC::SPILL_DMR:
+ lowerDMRSpilling(II, FrameIndex);
+ return true;
+ case PPC::RESTORE_DMR:
+ lowerDMRRestore(II, FrameIndex);
+ return true;
case PPC::SPILL_QUADWORD:
lowerQuadwordSpilling(II, FrameIndex);
return true;
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 005d890c57c93..7b6cab72e5170 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -151,6 +151,11 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
void lowerQuadwordRestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
+ void lowerDMRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerDMRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg,
MCRegister SrcReg);
diff --git a/llvm/test/CodeGen/PowerPC/dmr-spill.ll b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
new file mode 100644
index 0000000000000..b224643a6dd9f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32
+
+declare <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>)
+declare void @dummy_func()
+
+define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
+; CHECK-LABEL: spillDMRreg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r0, 16(r1)
+; CHECK-NEXT: stdu r1, -176(r1)
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: mr r30, r6
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxv v2, 16(r4)
+; CHECK-NEXT: lxv v3, 0(r4)
+; CHECK-NEXT: lxv vs0, 0(r5)
+; CHECK-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
+; CHECK-NEXT: stxvp vsp38, 128(r1)
+; CHECK-NEXT: stxvp vsp32, 96(r1)
+; CHECK-NEXT: stxvp vsp36, 64(r1)
+; CHECK-NEXT: stxvp vsp34, 32(r1)
+; CHECK-NEXT: bl dummy_func at notoc
+; CHECK-NEXT: lxvp vsp34, 128(r1)
+; CHECK-NEXT: lxvp vsp36, 96(r1)
+; CHECK-NEXT: lxvp vsp32, 64(r1)
+; CHECK-NEXT: lxvp vsp38, 32(r1)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r30)
+; CHECK-NEXT: stxvp vsp36, 64(r30)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r30)
+; CHECK-NEXT: stxvp vsp36, 0(r30)
+; CHECK-NEXT: addi r1, r1, 176
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; AIX-LABEL: spillDMRreg:
+; AIX: # %bb.0:
+; AIX-NEXT: mflr r0
+; AIX-NEXT: std r0, 16(r1)
+; AIX-NEXT: stdu r1, -256(r1)
+; AIX-NEXT: std r31, 248(r1) # 8-byte Folded Spill
+; AIX-NEXT: lxvp vsp34, 96(r3)
+; AIX-NEXT: lxvp vsp36, 64(r3)
+; AIX-NEXT: mr r31, r6
+; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; AIX-NEXT: lxvp vsp34, 32(r3)
+; AIX-NEXT: lxvp vsp36, 0(r3)
+; AIX-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; AIX-NEXT: lxv v2, 0(r4)
+; AIX-NEXT: lxv v3, 16(r4)
+; AIX-NEXT: lxv vs0, 0(r5)
+; AIX-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
+; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; AIX-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
+; AIX-NEXT: stxvp vsp38, 112(r1)
+; AIX-NEXT: stxvp vsp32, 144(r1)
+; AIX-NEXT: stxvp vsp36, 176(r1)
+; AIX-NEXT: stxvp vsp34, 208(r1)
+; AIX-NEXT: bl .dummy_func[PR]
+; AIX-NEXT: nop
+; AIX-NEXT: lxvp vsp34, 112(r1)
+; AIX-NEXT: lxvp vsp36, 144(r1)
+; AIX-NEXT: lxvp vsp32, 176(r1)
+; AIX-NEXT: lxvp vsp38, 208(r1)
+; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
+; AIX-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; AIX-NEXT: stxvp vsp36, 96(r31)
+; AIX-NEXT: stxvp vsp34, 64(r31)
+; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; AIX-NEXT: stxvp vsp36, 32(r31)
+; AIX-NEXT: stxvp vsp34, 0(r31)
+; AIX-NEXT: ld r31, 248(r1) # 8-byte Folded Reload
+; AIX-NEXT: addi r1, r1, 256
+; AIX-NEXT: ld r0, 16(r1)
+; AIX-NEXT: mtlr r0
+; AIX-NEXT: blr
+;
+; AIX32-LABEL: spillDMRreg:
+; AIX32: # %bb.0:
+; AIX32-NEXT: mflr r0
+; AIX32-NEXT: stw r0, 8(r1)
+; AIX32-NEXT: stwu r1, -208(r1)
+; AIX32-NEXT: stw r31, 204(r1) # 4-byte Folded Spill
+; AIX32-NEXT: lxvp vsp34, 96(r3)
+; AIX32-NEXT: lxvp vsp36, 64(r3)
+; AIX32-NEXT: mr r31, r6
+; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; AIX32-NEXT: lxvp vsp34, 32(r3)
+; AIX32-NEXT: lxvp vsp36, 0(r3)
+; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; AIX32-NEXT: lxv v2, 0(r4)
+; AIX32-NEXT: lxv v3, 16(r4)
+; AIX32-NEXT: lxv vs0, 0(r5)
+; AIX32-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
+; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; AIX32-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
+; AIX32-NEXT: stxvp vsp38, 64(r1)
+; AIX32-NEXT: stxvp vsp32, 96(r1)
+; AIX32-NEXT: stxvp vsp36, 128(r1)
+; AIX32-NEXT: stxvp vsp34, 160(r1)
+; AIX32-NEXT: bl .dummy_func[PR]
+; AIX32-NEXT: nop
+; AIX32-NEXT: lxvp vsp34, 64(r1)
+; AIX32-NEXT: lxvp vsp36, 96(r1)
+; AIX32-NEXT: lxvp vsp32, 128(r1)
+; AIX32-NEXT: lxvp vsp38, 160(r1)
+; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
+; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; AIX32-NEXT: stxvp vsp36, 96(r31)
+; AIX32-NEXT: stxvp vsp34, 64(r31)
+; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; AIX32-NEXT: stxvp vsp36, 32(r31)
+; AIX32-NEXT: stxvp vsp34, 0(r31)
+; AIX32-NEXT: lwz r31, 204(r1) # 4-byte Folded Reload
+; AIX32-NEXT: addi r1, r1, 208
+; AIX32-NEXT: lwz r0, 8(r1)
+; AIX32-NEXT: mtlr r0
+; AIX32-NEXT: blr
+ %v.dmr = load <1024 x i1>, ptr %vop, align 64
+ %v1 = load <256 x i1>, ptr %vpp, align 32
+ %v2 = load <16 x i8>, ptr %vcp, align 32
+ %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
+ tail call void @dummy_func()
+ %call2 = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
+ store <1024 x i1> %call, ptr %resp, align 64
+ ret void
+}
More information about the llvm-commits
mailing list