[llvm] 82acd8c - [PowerPC] Add code to spill and restore DMRp registers (#142443)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 18 10:51:00 PDT 2025
Author: Lei Huang
Date: 2025-06-18T13:50:57-04:00
New Revision: 82acd8c377e9ed267195afdbde16eedebabc648c
URL: https://github.com/llvm/llvm-project/commit/82acd8c377e9ed267195afdbde16eedebabc648c
DIFF: https://github.com/llvm/llvm-project/commit/82acd8c377e9ed267195afdbde16eedebabc648c.diff
LOG: [PowerPC] Add code to spill and restore DMRp registers (#142443)
Added:
llvm/test/CodeGen/PowerPC/dmrp-spill.ll
Modified:
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/lib/Target/PowerPC/PPCInstrMMA.td
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
llvm/test/CodeGen/PowerPC/dmr-spill.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 57c86d9e5de64..7c1550e99bae1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1926,7 +1926,7 @@ unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
} else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) {
llvm_unreachable("TODO: Implement spill DMRROWp regclass!");
} else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) {
- llvm_unreachable("TODO: Implement spill DMRp regclass!");
+ OpcodeIndex = SOK_DMRpSpill;
} else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) {
OpcodeIndex = SOK_DMRSpill;
} else {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index a27b5718ec89d..7931a9e3ae131 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -81,6 +81,7 @@ enum SpillOpcodeKey {
SOK_AccumulatorSpill,
SOK_UAccumulatorSpill,
SOK_WAccumulatorSpill,
+ SOK_DMRpSpill,
SOK_DMRSpill,
SOK_SPESpill,
SOK_PairedG8Spill,
@@ -119,6 +120,7 @@ enum PPCMachineCombinerPattern : unsigned {
NoInstr, \
NoInstr, \
NoInstr, \
+ NoInstr, \
PPC::EVLDD, \
PPC::RESTORE_QUADWORD}
@@ -140,6 +142,7 @@ enum PPCMachineCombinerPattern : unsigned {
NoInstr, \
NoInstr, \
NoInstr, \
+ NoInstr, \
PPC::RESTORE_QUADWORD}
#define Pwr10LoadOpcodes \
@@ -160,6 +163,7 @@ enum PPCMachineCombinerPattern : unsigned {
NoInstr, \
NoInstr, \
NoInstr, \
+ NoInstr, \
PPC::RESTORE_QUADWORD}
#define FutureLoadOpcodes \
@@ -178,6 +182,7 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::RESTORE_ACC, \
PPC::RESTORE_UACC, \
PPC::RESTORE_WACC, \
+ PPC::RESTORE_DMRP, \
PPC::RESTORE_DMR, \
NoInstr, \
PPC::RESTORE_QUADWORD}
@@ -199,6 +204,7 @@ enum PPCMachineCombinerPattern : unsigned {
NoInstr, \
NoInstr, \
NoInstr, \
+ NoInstr, \
PPC::EVSTDD, \
PPC::SPILL_QUADWORD}
@@ -220,6 +226,7 @@ enum PPCMachineCombinerPattern : unsigned {
NoInstr, \
NoInstr, \
NoInstr, \
+ NoInstr, \
PPC::SPILL_QUADWORD}
#define Pwr10StoreOpcodes \
@@ -240,6 +247,7 @@ enum PPCMachineCombinerPattern : unsigned {
NoInstr, \
NoInstr, \
NoInstr, \
+ NoInstr, \
PPC::SPILL_QUADWORD}
#define FutureStoreOpcodes \
@@ -258,6 +266,7 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::SPILL_ACC, \
PPC::SPILL_UACC, \
PPC::SPILL_WACC, \
+ PPC::SPILL_DMRP, \
PPC::SPILL_DMR, \
NoInstr, \
PPC::SPILL_QUADWORD}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index 82e4a60e0a728..436715a0e4ab1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -565,12 +565,16 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
let mayStore = 1 in {
def SPILL_WACC: PPCEmitTimePseudo<(outs), (ins wacc:$AT, memrix16:$dst),
"#SPILL_WACC", []>;
+ def SPILL_DMRP: PPCEmitTimePseudo<(outs), (ins dmrp:$AT, memrix16:$dst),
+ "#SPILL_DMRP", []>;
def SPILL_DMR: PPCEmitTimePseudo<(outs), (ins dmr:$AT, memrix16:$dst),
"#SPILL_DMR", []>;
}
let mayLoad = 1, hasSideEffects = 0 in {
def RESTORE_WACC: PPCEmitTimePseudo<(outs wacc:$AT), (ins memrix16:$src),
"#RESTORE_WACC", []>;
+ def RESTORE_DMRP: PPCEmitTimePseudo<(outs dmrp:$AT), (ins memrix16:$src),
+ "#RESTORE_DMRP", []>;
def RESTORE_DMR: PPCEmitTimePseudo<(outs dmr:$AT), (ins memrix16:$src),
"#RESTORE_DMR", []>;
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index ea34c1aba82e3..76dca4794e050 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1519,33 +1519,32 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
// DMR is made up of WACC and WACC_HI, so DMXXEXTFDMR512 to spill
// the corresponding 512 bits.
const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
- Register SrcReg = MI.getOperand(0).getReg();
-
- Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
- Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
- Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
- Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
+ auto spillDMR = [&](Register SrcReg, int BEIdx, int LEIdx) {
+ auto spillWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE, int IdxLE) {
+ Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+ Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+
+ BuildMI(MBB, II, DL, TII.get(Opc), VSRpReg0)
+ .addDef(VSRpReg1)
+ .addReg(TargetRegisterInfo::getSubReg(SrcReg, RegIdx));
+
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(VSRpReg0, RegState::Kill),
+ FrameIndex, IsLittleEndian ? IdxLE : IdxBE);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(VSRpReg1, RegState::Kill),
+ FrameIndex, IsLittleEndian ? IdxLE - 32 : IdxBE + 32);
+ };
+ spillWACC(PPC::DMXXEXTFDMR512, PPC::sub_wacc_lo, BEIdx, LEIdx);
+ spillWACC(PPC::DMXXEXTFDMR512_HI, PPC::sub_wacc_hi, BEIdx + 64, LEIdx - 64);
+ };
- BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2)
- .addDef(VSRpReg3)
- .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi));
-
- BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0)
- .addDef(VSRpReg1)
- .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_lo));
-
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
- .addReg(VSRpReg0, RegState::Kill),
- FrameIndex, IsLittleEndian ? 96 : 0);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
- .addReg(VSRpReg1, RegState::Kill),
- FrameIndex, IsLittleEndian ? 64 : 32);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
- .addReg(VSRpReg2, RegState::Kill),
- FrameIndex, IsLittleEndian ? 32 : 64);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
- .addReg(VSRpReg3, RegState::Kill),
- FrameIndex, IsLittleEndian ? 0 : 96);
+ Register SrcReg = MI.getOperand(0).getReg();
+ if (MI.getOpcode() == PPC::SPILL_DMRP) {
+ spillDMR(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_dmr1), 0, 96);
+ spillDMR(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_dmr0), 128, 224);
+ } else
+ spillDMR(SrcReg, 0, 96);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -1554,7 +1553,7 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
/// lowerDMRRestore - Generate the code to restore the DMR register.
void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const {
- MachineInstr &MI = *II; // <DestReg> = RESTORE_WACC <offset>
+ MachineInstr &MI = *II; // <DestReg> = RESTORE_DMR[P] <offset>
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
@@ -1563,32 +1562,34 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
bool IsLittleEndian = Subtarget.isLittleEndian();
const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
- Register DestReg = MI.getOperand(0).getReg();
-
- Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
- Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
- Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
- Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
+ auto restoreDMR = [&](Register DestReg, int BEIdx, int LEIdx) {
+ auto restoreWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE,
+ int IdxLE) {
+ Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
+ Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
+
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
+ FrameIndex, IsLittleEndian ? IdxLE : IdxBE);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
+ FrameIndex, IsLittleEndian ? IdxLE - 32 : IdxBE + 32);
+
+ // Kill virtual registers (killedRegState::Killed).
+ BuildMI(MBB, II, DL, TII.get(Opc),
+ TargetRegisterInfo::getSubReg(DestReg, RegIdx))
+ .addReg(VSRpReg0, RegState::Kill)
+ .addReg(VSRpReg1, RegState::Kill);
+ };
+ restoreWACC(PPC::DMXXINSTDMR512, PPC::sub_wacc_lo, BEIdx, LEIdx);
+ restoreWACC(PPC::DMXXINSTDMR512_HI, PPC::sub_wacc_hi, BEIdx + 64,
+ LEIdx - 64);
+ };
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
- FrameIndex, IsLittleEndian ? 96 : 0);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
- FrameIndex, IsLittleEndian ? 64 : 32);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
- FrameIndex, IsLittleEndian ? 32 : 64);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
- FrameIndex, IsLittleEndian ? 0 : 96);
-
- // Kill virtual registers (killedRegState::Killed).
- BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512_HI),
- TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi))
- .addReg(VSRpReg2, RegState::Kill)
- .addReg(VSRpReg3, RegState::Kill);
-
- BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
- TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
- .addReg(VSRpReg0, RegState::Kill)
- .addReg(VSRpReg1, RegState::Kill);
+ Register DestReg = MI.getOperand(0).getReg();
+ if (MI.getOpcode() == PPC::RESTORE_DMRP) {
+ restoreDMR(TargetRegisterInfo::getSubReg(DestReg, PPC::sub_dmr1), 0, 96);
+ restoreDMR(TargetRegisterInfo::getSubReg(DestReg, PPC::sub_dmr0), 128, 224);
+ } else
+ restoreDMR(DestReg, 0, 96);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -1756,9 +1757,11 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case PPC::RESTORE_WACC:
lowerWACCRestore(II, FrameIndex);
return true;
+ case PPC::SPILL_DMRP:
case PPC::SPILL_DMR:
lowerDMRSpilling(II, FrameIndex);
return true;
+ case PPC::RESTORE_DMRP:
case PPC::RESTORE_DMR:
lowerDMRRestore(II, FrameIndex);
return true;
diff --git a/llvm/test/CodeGen/PowerPC/dmr-spill.ll b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
index c1b01cd2d3fd5..983fce4127af4 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
@@ -30,19 +30,19 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv vs0, 0(r5)
; CHECK-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-NEXT: stxvp vsp36, 128(r1)
+; CHECK-NEXT: stxvp vsp34, 96(r1)
; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
-; CHECK-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
-; CHECK-NEXT: stxvp vsp38, 128(r1)
-; CHECK-NEXT: stxvp vsp32, 96(r1)
; CHECK-NEXT: stxvp vsp36, 64(r1)
; CHECK-NEXT: stxvp vsp34, 32(r1)
; CHECK-NEXT: bl dummy_func at notoc
; CHECK-NEXT: lxvp vsp34, 128(r1)
; CHECK-NEXT: lxvp vsp36, 96(r1)
-; CHECK-NEXT: lxvp vsp32, 64(r1)
-; CHECK-NEXT: lxvp vsp38, 32(r1)
-; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: lxvp vsp34, 64(r1)
+; CHECK-NEXT: lxvp vsp36, 32(r1)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxvp vsp34, 96(r30)
; CHECK-NEXT: stxvp vsp36, 64(r30)
@@ -72,20 +72,20 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
; AIX-NEXT: lxv v3, 16(r4)
; AIX-NEXT: lxv vs0, 0(r5)
; AIX-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
+; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; AIX-NEXT: stxvp vsp36, 112(r1)
+; AIX-NEXT: stxvp vsp34, 144(r1)
; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
-; AIX-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
-; AIX-NEXT: stxvp vsp38, 112(r1)
-; AIX-NEXT: stxvp vsp32, 144(r1)
; AIX-NEXT: stxvp vsp36, 176(r1)
; AIX-NEXT: stxvp vsp34, 208(r1)
; AIX-NEXT: bl .dummy_func[PR]
; AIX-NEXT: nop
; AIX-NEXT: lxvp vsp34, 112(r1)
; AIX-NEXT: lxvp vsp36, 144(r1)
-; AIX-NEXT: lxvp vsp32, 176(r1)
-; AIX-NEXT: lxvp vsp38, 208(r1)
-; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
; AIX-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX-NEXT: lxvp vsp34, 176(r1)
+; AIX-NEXT: lxvp vsp36, 208(r1)
+; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; AIX-NEXT: stxvp vsp36, 96(r31)
; AIX-NEXT: stxvp vsp34, 64(r31)
@@ -115,20 +115,20 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
; AIX32-NEXT: lxv v3, 16(r4)
; AIX32-NEXT: lxv vs0, 0(r5)
; AIX32-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
+; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; AIX32-NEXT: stxvp vsp36, 64(r1)
+; AIX32-NEXT: stxvp vsp34, 96(r1)
; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
-; AIX32-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
-; AIX32-NEXT: stxvp vsp38, 64(r1)
-; AIX32-NEXT: stxvp vsp32, 96(r1)
; AIX32-NEXT: stxvp vsp36, 128(r1)
; AIX32-NEXT: stxvp vsp34, 160(r1)
; AIX32-NEXT: bl .dummy_func[PR]
; AIX32-NEXT: nop
; AIX32-NEXT: lxvp vsp34, 64(r1)
; AIX32-NEXT: lxvp vsp36, 96(r1)
-; AIX32-NEXT: lxvp vsp32, 128(r1)
-; AIX32-NEXT: lxvp vsp38, 160(r1)
-; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX32-NEXT: lxvp vsp34, 128(r1)
+; AIX32-NEXT: lxvp vsp36, 160(r1)
+; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
; AIX32-NEXT: stxvp vsp36, 96(r31)
; AIX32-NEXT: stxvp vsp34, 64(r31)
diff --git a/llvm/test/CodeGen/PowerPC/dmrp-spill.ll b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll
new file mode 100644
index 0000000000000..62d42d4a26d52
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32
+
+declare void @dummy_func()
+declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32)
+
+define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) nounwind {
+; CHECK-LABEL: test_dmsha3hash:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: std r0, 16(r1)
+; CHECK-NEXT: stdu r1, -304(r1)
+; CHECK-NEXT: std r30, 288(r1) # 8-byte Folded Spill
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: mr r30, r4
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: lxvp vsp34, 128(r3)
+; CHECK-NEXT: lxvp vsp36, 160(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 192(r3)
+; CHECK-NEXT: lxvp vsp36, 224(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmsha3hash dmrp0, 5
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc1, 0
+; CHECK-NEXT: stxvp vsp36, 128(r1)
+; CHECK-NEXT: stxvp vsp34, 96(r1)
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp36, 64(r1)
+; CHECK-NEXT: stxvp vsp34, 32(r1)
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; CHECK-NEXT: stxvp vsp36, 256(r1)
+; CHECK-NEXT: stxvp vsp34, 224(r1)
+; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp36, 192(r1)
+; CHECK-NEXT: stxvp vsp34, 160(r1)
+; CHECK-NEXT: bl dummy_func at notoc
+; CHECK-NEXT: lxvp vsp34, 128(r1)
+; CHECK-NEXT: lxvp vsp36, 96(r1)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp34, vsp36, 0
+; CHECK-NEXT: lxvp vsp34, 64(r1)
+; CHECK-NEXT: lxvp vsp36, 32(r1)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1
+; CHECK-NEXT: lxvp vsp34, 256(r1)
+; CHECK-NEXT: lxvp vsp36, 224(r1)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; CHECK-NEXT: lxvp vsp34, 192(r1)
+; CHECK-NEXT: lxvp vsp36, 160(r1)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r30)
+; CHECK-NEXT: stxvp vsp36, 192(r30)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r30)
+; CHECK-NEXT: stxvp vsp36, 128(r30)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT: stxvp vsp34, 96(r30)
+; CHECK-NEXT: stxvp vsp36, 64(r30)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp34, 32(r30)
+; CHECK-NEXT: stxvp vsp36, 0(r30)
+; CHECK-NEXT: ld r30, 288(r1) # 8-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 304
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; AIX-LABEL: test_dmsha3hash:
+; AIX: # %bb.0: # %entry
+; AIX-NEXT: mflr r0
+; AIX-NEXT: std r0, 16(r1)
+; AIX-NEXT: stdu r1, -384(r1)
+; AIX-NEXT: std r31, 376(r1) # 8-byte Folded Spill
+; AIX-NEXT: lxvp vsp34, 224(r3)
+; AIX-NEXT: lxvp vsp36, 192(r3)
+; AIX-NEXT: mr r31, r4
+; AIX-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; AIX-NEXT: lxvp vsp34, 160(r3)
+; AIX-NEXT: lxvp vsp36, 128(r3)
+; AIX-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; AIX-NEXT: lxvp vsp34, 96(r3)
+; AIX-NEXT: lxvp vsp36, 64(r3)
+; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; AIX-NEXT: lxvp vsp34, 32(r3)
+; AIX-NEXT: lxvp vsp36, 0(r3)
+; AIX-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; AIX-NEXT: dmsha3hash dmrp0, 5
+; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc1, 0
+; AIX-NEXT: stxvp vsp36, 112(r1)
+; AIX-NEXT: stxvp vsp34, 144(r1)
+; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1
+; AIX-NEXT: stxvp vsp36, 176(r1)
+; AIX-NEXT: stxvp vsp34, 208(r1)
+; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; AIX-NEXT: stxvp vsp36, 240(r1)
+; AIX-NEXT: stxvp vsp34, 272(r1)
+; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; AIX-NEXT: stxvp vsp36, 304(r1)
+; AIX-NEXT: stxvp vsp34, 336(r1)
+; AIX-NEXT: bl .dummy_func[PR]
+; AIX-NEXT: nop
+; AIX-NEXT: lxvp vsp34, 112(r1)
+; AIX-NEXT: lxvp vsp36, 144(r1)
+; AIX-NEXT: dmxxinstdmr512 wacc1, vsp34, vsp36, 0
+; AIX-NEXT: lxvp vsp34, 176(r1)
+; AIX-NEXT: lxvp vsp36, 208(r1)
+; AIX-NEXT: dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1
+; AIX-NEXT: lxvp vsp34, 240(r1)
+; AIX-NEXT: lxvp vsp36, 272(r1)
+; AIX-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX-NEXT: lxvp vsp34, 304(r1)
+; AIX-NEXT: lxvp vsp36, 336(r1)
+; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; AIX-NEXT: stxvp vsp36, 224(r31)
+; AIX-NEXT: stxvp vsp34, 192(r31)
+; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; AIX-NEXT: stxvp vsp36, 160(r31)
+; AIX-NEXT: stxvp vsp34, 128(r31)
+; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; AIX-NEXT: stxvp vsp36, 96(r31)
+; AIX-NEXT: stxvp vsp34, 64(r31)
+; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; AIX-NEXT: stxvp vsp36, 32(r31)
+; AIX-NEXT: stxvp vsp34, 0(r31)
+; AIX-NEXT: ld r31, 376(r1) # 8-byte Folded Reload
+; AIX-NEXT: addi r1, r1, 384
+; AIX-NEXT: ld r0, 16(r1)
+; AIX-NEXT: mtlr r0
+; AIX-NEXT: blr
+;
+; AIX32-LABEL: test_dmsha3hash:
+; AIX32: # %bb.0: # %entry
+; AIX32-NEXT: mflr r0
+; AIX32-NEXT: stw r0, 8(r1)
+; AIX32-NEXT: stwu r1, -336(r1)
+; AIX32-NEXT: stw r31, 332(r1) # 4-byte Folded Spill
+; AIX32-NEXT: lxvp vsp34, 224(r3)
+; AIX32-NEXT: lxvp vsp36, 192(r3)
+; AIX32-NEXT: mr r31, r4
+; AIX32-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; AIX32-NEXT: lxvp vsp34, 160(r3)
+; AIX32-NEXT: lxvp vsp36, 128(r3)
+; AIX32-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; AIX32-NEXT: lxvp vsp34, 96(r3)
+; AIX32-NEXT: lxvp vsp36, 64(r3)
+; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; AIX32-NEXT: lxvp vsp34, 32(r3)
+; AIX32-NEXT: lxvp vsp36, 0(r3)
+; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; AIX32-NEXT: dmsha3hash dmrp0, 5
+; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc1, 0
+; AIX32-NEXT: stxvp vsp36, 64(r1)
+; AIX32-NEXT: stxvp vsp34, 96(r1)
+; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1
+; AIX32-NEXT: stxvp vsp36, 128(r1)
+; AIX32-NEXT: stxvp vsp34, 160(r1)
+; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0
+; AIX32-NEXT: stxvp vsp36, 192(r1)
+; AIX32-NEXT: stxvp vsp34, 224(r1)
+; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
+; AIX32-NEXT: stxvp vsp36, 256(r1)
+; AIX32-NEXT: stxvp vsp34, 288(r1)
+; AIX32-NEXT: bl .dummy_func[PR]
+; AIX32-NEXT: nop
+; AIX32-NEXT: lxvp vsp34, 64(r1)
+; AIX32-NEXT: lxvp vsp36, 96(r1)
+; AIX32-NEXT: dmxxinstdmr512 wacc1, vsp34, vsp36, 0
+; AIX32-NEXT: lxvp vsp34, 128(r1)
+; AIX32-NEXT: lxvp vsp36, 160(r1)
+; AIX32-NEXT: dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1
+; AIX32-NEXT: lxvp vsp34, 192(r1)
+; AIX32-NEXT: lxvp vsp36, 224(r1)
+; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
+; AIX32-NEXT: lxvp vsp34, 256(r1)
+; AIX32-NEXT: lxvp vsp36, 288(r1)
+; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
+; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; AIX32-NEXT: stxvp vsp36, 224(r31)
+; AIX32-NEXT: stxvp vsp34, 192(r31)
+; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; AIX32-NEXT: stxvp vsp36, 160(r31)
+; AIX32-NEXT: stxvp vsp34, 128(r31)
+; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; AIX32-NEXT: stxvp vsp36, 96(r31)
+; AIX32-NEXT: stxvp vsp34, 64(r31)
+; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; AIX32-NEXT: stxvp vsp36, 32(r31)
+; AIX32-NEXT: stxvp vsp34, 0(r31)
+; AIX32-NEXT: lwz r31, 332(r1) # 4-byte Folded Reload
+; AIX32-NEXT: addi r1, r1, 336
+; AIX32-NEXT: lwz r0, 8(r1)
+; AIX32-NEXT: mtlr r0
+; AIX32-NEXT: blr
+ entry:
+ %0 = load <2048 x i1>, ptr %vopp, align 64
+ %2 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5)
+ tail call void @dummy_func()
+ %3 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5)
+ store <2048 x i1> %2, ptr %resp, align 64
+ ret void
+}
More information about the llvm-commits
mailing list