[llvm] [PowerPC] Add DMR and WACC COPY support (PR #149129)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 09:02:50 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Maryam Moghadas (maryammo)
<details>
<summary>Changes</summary>
This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns.
---
Full diff: https://github.com/llvm/llvm-project/pull/149129.diff
7 Files Affected:
- (modified) llvm/lib/Target/PowerPC/CMakeLists.txt (+1-1)
- (modified) llvm/lib/Target/PowerPC/PPC.h (+2-2)
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+43)
- (modified) llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (+2-2)
- (renamed) llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp (+35-8)
- (added) llvm/test/CodeGen/PowerPC/dmr-copy.ll (+245)
- (modified) llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn (+1-1)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 3808a26a0b92a..a5e1522753c8b 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -50,7 +50,7 @@ add_llvm_target(PowerPCCodeGen
PPCTargetTransformInfo.cpp
PPCTOCRegDeps.cpp
PPCTLSDynamicCall.cpp
- PPCVSXCopy.cpp
+ PPCVSXWACCCopy.cpp
PPCReduceCRLogicals.cpp
PPCVSXFMAMutate.cpp
PPCVSXSwapRemoval.cpp
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 124dac4584312..a8f0f215ebee5 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -39,7 +39,7 @@ class ModulePass;
FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM);
FunctionPass *createPPCTOCRegDepsPass();
FunctionPass *createPPCEarlyReturnPass();
- FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXWACCCopyPass();
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCVSXSwapRemovalPass();
FunctionPass *createPPCReduceCRLogicalsPass();
@@ -64,7 +64,7 @@ class ModulePass;
void initializePPCLoopInstrFormPrepPass(PassRegistry&);
void initializePPCTOCRegDepsPass(PassRegistry&);
void initializePPCEarlyReturnPass(PassRegistry&);
- void initializePPCVSXCopyPass(PassRegistry&);
+ void initializePPCVSXWACCCopyPass(PassRegistry&);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCVSXSwapRemovalPass(PassRegistry&);
void initializePPCReduceCRLogicalsPass(PassRegistry&);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 7c1550e99bae1..7cb7e05b55ca0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -1863,6 +1864,48 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcRegSub1)
.addReg(SrcRegSub1, getKillRegState(KillSrc));
return;
+ } else if ((PPC::WACCRCRegClass.contains(DestReg) ||
+ PPC::WACC_HIRCRegClass.contains(DestReg)) &&
+ (PPC::WACCRCRegClass.contains(SrcReg) ||
+ PPC::WACC_HIRCRegClass.contains(SrcReg))) {
+
+ Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512
+ : PPC::DMXXEXTFDMR512_HI;
+
+ RegScavenger RS;
+ RS.enterBasicBlockEnd(MBB);
+ RS.backward(std::next(I));
+
+ Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
+
+ RS.setRegUsed(TmpReg1);
+ Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
+
+ BuildMI(MBB, I, DL, get(Opc))
+ .addReg(TmpReg1, RegState::Define)
+ .addReg(TmpReg2, RegState::Define)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512
+ : PPC::DMXXINSTDMR512_HI;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(TmpReg1, RegState::Kill)
+ .addReg(TmpReg2, RegState::Kill);
+
+ return;
+ } else if (PPC::DMRRCRegClass.contains(DestReg) &&
+ PPC::DMRRCRegClass.contains(SrcReg)) {
+
+ BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ return;
+
} else
llvm_unreachable("Impossible reg-to-reg copy");
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index b5c6ac111dff0..ae92d5eab20cd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -129,7 +129,7 @@ LLVMInitializePowerPCTarget() {
initializePPCLoopInstrFormPrepPass(PR);
initializePPCTOCRegDepsPass(PR);
initializePPCEarlyReturnPass(PR);
- initializePPCVSXCopyPass(PR);
+ initializePPCVSXWACCCopyPass(PR);
initializePPCVSXFMAMutatePass(PR);
initializePPCVSXSwapRemovalPass(PR);
initializePPCReduceCRLogicalsPass(PR);
@@ -528,7 +528,7 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
- addPass(createPPCVSXCopyPass());
+ addPass(createPPCVSXWACCCopyPass());
return false;
}
diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
similarity index 76%
rename from llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
rename to llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
index 794095cd43769..044c945fc2049 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
@@ -1,4 +1,4 @@
-//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
+//===-------------- PPCVSXWACCCopy.cpp - VSX and WACC Copy Legalization ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
//
// A pass which deals with the complexity of generating legal VSX register
// copies to/from register classes which partially overlap with the VSX
-// register file.
+// register file and combines the wacc/wacc_hi copies when needed.
//
//===----------------------------------------------------------------------===//
@@ -29,12 +29,12 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-vsx-copy"
namespace {
- // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // PPCVSXWACCCopy pass - For copies between VSX registers and non-VSX registers
// (Altivec and scalar floating-point registers), we need to transform the
// copies into subregister copies with other restrictions.
- struct PPCVSXCopy : public MachineFunctionPass {
+ struct PPCVSXWACCCopy : public MachineFunctionPass {
static char ID;
- PPCVSXCopy() : MachineFunctionPass(ID) {}
+ PPCVSXWACCCopy() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
@@ -122,6 +122,33 @@ namespace {
// Transform the original copy into a subregister extraction copy.
SrcMO.setReg(NewVReg);
SrcMO.setSubReg(PPC::sub_64);
+ } else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) &&
+ IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) {
+ // Matches the pattern:
+ // %a:waccrc = COPY %b.sub_wacc_hi:dmrrc
+ // %c:wacc_hirc = COPY %a:waccrc
+ // And replaces it with:
+ // %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(SrcMO.getReg());
+ if (!DefMI || !DefMI->isCopy())
+ continue;
+
+ MachineOperand &OrigSrc = DefMI->getOperand(1);
+
+ if (!IsRegInClass(OrigSrc.getReg(), &PPC::DMRRCRegClass, MRI))
+ continue;
+
+ if (OrigSrc.getSubReg() != PPC::sub_wacc_hi)
+ continue;
+
+ // Rewrite the second copy to use the original register's subreg
+ SrcMO.setReg(OrigSrc.getReg());
+ SrcMO.setSubReg(PPC::sub_wacc_hi);
+ Changed = true;
+
+ // Remove the intermediate copy if safe
+ if (MRI.use_nodbg_empty(DefMI->getOperand(0).getReg()))
+ DefMI->eraseFromParent();
}
}
@@ -151,9 +178,9 @@ namespace {
};
} // end anonymous namespace
-INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+INITIALIZE_PASS(PPCVSXWACCCopy, DEBUG_TYPE,
"PowerPC VSX Copy Legalization", false, false)
-char PPCVSXCopy::ID = 0;
+char PPCVSXWACCCopy::ID = 0;
FunctionPass*
-llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+llvm::createPPCVSXWACCCopyPass() { return new PPCVSXWACCCopy(); }
diff --git a/llvm/test/CodeGen/PowerPC/dmr-copy.ll b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
new file mode 100644
index 0000000000000..d5a24309f94d5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noundef %vc, ptr noundef %resp) #0 {
+; CHECK-LABEL: test_wacc_copy:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: std r31, -8(r1)
+; CHECK-NEXT: std r30, -16(r1)
+; CHECK-NEXT: mr r30, r1
+; CHECK-NEXT: clrldi r0, r1, 57
+; CHECK-NEXT: subfic r0, r0, -384
+; CHECK-NEXT: stdux r1, r1, r0
+; CHECK-NEXT: .cfi_def_cfa_register r30
+; CHECK-NEXT: .cfi_offset r31, -8
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: mr r31, r1
+; CHECK-NEXT: std r3, 360(r31)
+; CHECK-NEXT: std r4, 352(r31)
+; CHECK-NEXT: stxv v2, 336(r31)
+; CHECK-NEXT: std r7, 328(r31)
+; CHECK-NEXT: ld r3, 360(r31)
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r31)
+; CHECK-NEXT: stxvp vsp36, 192(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r31)
+; CHECK-NEXT: stxvp vsp36, 128(r31)
+; CHECK-NEXT: ld r3, 352(r31)
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: stxv v2, 112(r31)
+; CHECK-NEXT: stxv v3, 96(r31)
+; CHECK-NEXT: lxv v2, 112(r31)
+; CHECK-NEXT: lxv v3, 96(r31)
+; CHECK-NEXT: lxv vs0, 336(r31)
+; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r31)
+; CHECK-NEXT: stxvp vsp36, 192(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r31)
+; CHECK-NEXT: stxvp vsp36, 128(r31)
+; CHECK-NEXT: lxvp vsp34, 128(r31)
+; CHECK-NEXT: lxvp vsp36, 160(r31)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 192(r31)
+; CHECK-NEXT: lxvp vsp36, 224(r31)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: ld r3, 328(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r3)
+; CHECK-NEXT: stxvp vsp36, 64(r3)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r3)
+; CHECK-NEXT: stxvp vsp36, 0(r3)
+; CHECK-NEXT: mr r1, r30
+; CHECK-NEXT: ld r31, -8(r1)
+; CHECK-NEXT: ld r30, -16(r1)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_wacc_copy:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: std r31, -8(r1)
+; CHECK-BE-NEXT: std r30, -16(r1)
+; CHECK-BE-NEXT: mr r30, r1
+; CHECK-BE-NEXT: clrldi r0, r1, 57
+; CHECK-BE-NEXT: subfic r0, r0, -384
+; CHECK-BE-NEXT: stdux r1, r1, r0
+; CHECK-BE-NEXT: mr r31, r1
+; CHECK-BE-NEXT: std r3, 360(r31)
+; CHECK-BE-NEXT: std r4, 352(r31)
+; CHECK-BE-NEXT: stxv v2, 336(r31)
+; CHECK-BE-NEXT: std r5, 328(r31)
+; CHECK-BE-NEXT: ld r3, 360(r31)
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT: ld r3, 352(r31)
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: stxv v3, 112(r31)
+; CHECK-BE-NEXT: stxv v2, 96(r31)
+; CHECK-BE-NEXT: lxv v2, 96(r31)
+; CHECK-BE-NEXT: lxv v3, 112(r31)
+; CHECK-BE-NEXT: lxv vs0, 336(r31)
+; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT: lxvp vsp34, 224(r31)
+; CHECK-BE-NEXT: lxvp vsp36, 192(r31)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 160(r31)
+; CHECK-BE-NEXT: lxvp vsp36, 128(r31)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: ld r3, 328(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r3)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r3)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r3)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r3)
+; CHECK-BE-NEXT: mr r1, r30
+; CHECK-BE-NEXT: ld r31, -8(r1)
+; CHECK-BE-NEXT: ld r30, -16(r1)
+; CHECK-BE-NEXT: blr
+entry:
+ %vdmrp.addr = alloca ptr, align 8
+ %vpp.addr = alloca ptr, align 8
+ %vc.addr = alloca <16 x i8>, align 16
+ %resp.addr = alloca ptr, align 8
+ %vdmr = alloca <1024 x i1>, align 128
+ %vp = alloca <256 x i1>, align 32
+ store ptr %vdmrp, ptr %vdmrp.addr, align 8
+ store ptr %vpp, ptr %vpp.addr, align 8
+ store <16 x i8> %vc, ptr %vc.addr, align 16
+ store ptr %resp, ptr %resp.addr, align 8
+ %0 = load ptr, ptr %vdmrp.addr, align 8
+ %1 = load <1024 x i1>, ptr %0, align 128
+ store <1024 x i1> %1, ptr %vdmr, align 128
+ %2 = load ptr, ptr %vpp.addr, align 8
+ %3 = load <256 x i1>, ptr %2, align 32
+ store <256 x i1> %3, ptr %vp, align 32
+ %4 = load <256 x i1>, ptr %vp, align 32
+ %5 = load <16 x i8>, ptr %vc.addr, align 16
+ %6 = call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %4, <16 x i8> %5)
+ store <1024 x i1> %6, ptr %vdmr, align 128
+ %7 = load <1024 x i1>, ptr %vdmr, align 128
+ %8 = load ptr, ptr %resp.addr, align 8
+ store <1024 x i1> %7, ptr %8, align 128
+ ret void
+}
+
+define void @foo(ptr noundef readonly captures(none) %p1, ptr noundef readonly captures(none) %p2, ptr noundef writeonly captures(none) initializes((0, 128)) %res1, ptr noundef writeonly captures(none) initializes((0, 128)) %res2) local_unnamed_addr #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmmr dmr2, dmr0
+; CHECK-NEXT: dmxor dmr2, dmr1
+; CHECK-NEXT: lxvp vsp34, 0(r4)
+; CHECK-NEXT: lxvp vsp36, 32(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r4)
+; CHECK-NEXT: lxvp vsp36, 96(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmxor dmr0, dmr1
+; CHECK-NEXT: dmmr dmr1, dmr2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: dmmr dmr0, dmr0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: foo:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmmr dmr2, dmr0
+; CHECK-BE-NEXT: dmxor dmr2, dmr1
+; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxor dmr0, dmr1
+; CHECK-BE-NEXT: dmmr dmr1, dmr2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: dmmr dmr0, dmr0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ %1 = load <1024 x i1>, ptr %p1, align 128
+ %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %1)
+ %3 = load <1024 x i1>, ptr %p2, align 128
+ %4 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %3)
+ %5 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %2)
+ store <1024 x i1> %5, ptr %res1, align 128
+ %6 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %4)
+ store <1024 x i1> %6, ptr %res2, align 128
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="future" "target-features"="+64bit,+allow-unaligned-fp-access,+altivec,+bpermd,+cmpb,+crbits,+crypto,+direct-move,+extdiv,+fast-MFLR,+fcpsgn,+fpcvt,+fprnd,+fpu,+fre,+fres,+frsqrte,+frsqrtes,+fsqrt,+fuse-add-logical,+fuse-arith-add,+fuse-logical,+fuse-logical-add,+fuse-sha3,+fuse-store,+fusion,+hard-float,+icbt,+isa-future-instructions,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+isel,+ldbrx,+lfiwax,+mfocrf,+mma,+paired-vector-memops,+partword-atomics,+pcrelative-memops,+popcntd,+power10-vector,+power8-altivec,+power8-vector,+power9-altivec,+power9-vector,+ppc-postra-sched,+ppc-prera-sched,+predictable-select-expensive,+prefix-instrs,+quadword-atomics,+recipprec,+stfiwx,+two-const-nr,+vsx" }
+
+
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
index ea3615cee392a..8ab54156a8af2 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
@@ -93,7 +93,7 @@ static_library("LLVMPowerPCCodeGen") {
"PPCTargetMachine.cpp",
"PPCTargetObjectFile.cpp",
"PPCTargetTransformInfo.cpp",
- "PPCVSXCopy.cpp",
+ "PPCVSXWACCCopy.cpp",
"PPCVSXFMAMutate.cpp",
"PPCVSXSwapRemoval.cpp",
]
``````````
</details>
https://github.com/llvm/llvm-project/pull/149129
More information about the llvm-commits
mailing list