[llvm] 242d51a - [PowerPC] Add DMR and WACC COPY support (#149129)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 08:07:27 PDT 2025
Author: Maryam Moghadas
Date: 2025-08-27T11:07:24-04:00
New Revision: 242d51afe59ec87c31c35eb07c34075866269bd8
URL: https://github.com/llvm/llvm-project/commit/242d51afe59ec87c31c35eb07c34075866269bd8
DIFF: https://github.com/llvm/llvm-project/commit/242d51afe59ec87c31c35eb07c34075866269bd8.diff
LOG: [PowerPC] Add DMR and WACC COPY support (#149129)
This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC
register classes and extends the PPCVSXCopy pass to handle specific WACC
copy patterns.
Added:
llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
llvm/test/CodeGen/PowerPC/dmr-copy.ll
Modified:
llvm/lib/Target/PowerPC/CMakeLists.txt
llvm/lib/Target/PowerPC/PPC.h
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
Removed:
llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
################################################################################
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 1e39f01fd7aa5..2182039e0eef8 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -49,7 +49,7 @@ add_llvm_target(PowerPCCodeGen
PPCTargetTransformInfo.cpp
PPCTOCRegDeps.cpp
PPCTLSDynamicCall.cpp
- PPCVSXCopy.cpp
+ PPCVSXWACCCopy.cpp
PPCReduceCRLogicals.cpp
PPCVSXFMAMutate.cpp
PPCVSXSwapRemoval.cpp
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 124dac4584312..a7cd5cde16b4f 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -39,7 +39,7 @@ class ModulePass;
FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM);
FunctionPass *createPPCTOCRegDepsPass();
FunctionPass *createPPCEarlyReturnPass();
- FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXWACCCopyPass();
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCVSXSwapRemovalPass();
FunctionPass *createPPCReduceCRLogicalsPass();
@@ -64,7 +64,7 @@ class ModulePass;
void initializePPCLoopInstrFormPrepPass(PassRegistry&);
void initializePPCTOCRegDepsPass(PassRegistry&);
void initializePPCEarlyReturnPass(PassRegistry&);
- void initializePPCVSXCopyPass(PassRegistry&);
+ void initializePPCVSXWACCCopyPass(PassRegistry &);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCVSXSwapRemovalPass(PassRegistry&);
void initializePPCReduceCRLogicalsPass(PassRegistry&);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 7c1550e99bae1..7cb7e05b55ca0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -1863,6 +1864,48 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcRegSub1)
.addReg(SrcRegSub1, getKillRegState(KillSrc));
return;
+ } else if ((PPC::WACCRCRegClass.contains(DestReg) ||
+ PPC::WACC_HIRCRegClass.contains(DestReg)) &&
+ (PPC::WACCRCRegClass.contains(SrcReg) ||
+ PPC::WACC_HIRCRegClass.contains(SrcReg))) {
+
+ Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512
+ : PPC::DMXXEXTFDMR512_HI;
+
+ RegScavenger RS;
+ RS.enterBasicBlockEnd(MBB);
+ RS.backward(std::next(I));
+
+ Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
+
+ RS.setRegUsed(TmpReg1);
+ Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
+
+ BuildMI(MBB, I, DL, get(Opc))
+ .addReg(TmpReg1, RegState::Define)
+ .addReg(TmpReg2, RegState::Define)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512
+ : PPC::DMXXINSTDMR512_HI;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(TmpReg1, RegState::Kill)
+ .addReg(TmpReg2, RegState::Kill);
+
+ return;
+ } else if (PPC::DMRRCRegClass.contains(DestReg) &&
+ PPC::DMRRCRegClass.contains(SrcReg)) {
+
+ BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ return;
+
} else
llvm_unreachable("Impossible reg-to-reg copy");
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index b5c6ac111dff0..ae92d5eab20cd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -129,7 +129,7 @@ LLVMInitializePowerPCTarget() {
initializePPCLoopInstrFormPrepPass(PR);
initializePPCTOCRegDepsPass(PR);
initializePPCEarlyReturnPass(PR);
- initializePPCVSXCopyPass(PR);
+ initializePPCVSXWACCCopyPass(PR);
initializePPCVSXFMAMutatePass(PR);
initializePPCVSXSwapRemovalPass(PR);
initializePPCReduceCRLogicalsPass(PR);
@@ -528,7 +528,7 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
- addPass(createPPCVSXCopyPass());
+ addPass(createPPCVSXWACCCopyPass());
return false;
}
diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
deleted file mode 100644
index 794095cd43769..0000000000000
--- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// A pass which deals with the complexity of generating legal VSX register
-// copies to/from register classes which partially overlap with the VSX
-// register file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPC.h"
-#include "PPCInstrInfo.h"
-#include "PPCTargetMachine.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "ppc-vsx-copy"
-
-namespace {
- // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
- // (Altivec and scalar floating-point registers), we need to transform the
- // copies into subregister copies with other restrictions.
- struct PPCVSXCopy : public MachineFunctionPass {
- static char ID;
- PPCVSXCopy() : MachineFunctionPass(ID) {}
-
- const TargetInstrInfo *TII;
-
- bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
- MachineRegisterInfo &MRI) {
- if (Register::isVirtualRegister(Reg)) {
- return RC->hasSubClassEq(MRI.getRegClass(Reg));
- } else if (RC->contains(Reg)) {
- return true;
- }
-
- return false;
- }
-
- bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
- }
-
- bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
- }
-
- bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
- }
-
- bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
- }
-
- bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) {
- return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI);
- }
-
-protected:
- bool processBlock(MachineBasicBlock &MBB) {
- bool Changed = false;
-
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- for (MachineInstr &MI : MBB) {
- if (!MI.isFullCopy())
- continue;
-
- MachineOperand &DstMO = MI.getOperand(0);
- MachineOperand &SrcMO = MI.getOperand(1);
-
- if ( IsVSReg(DstMO.getReg(), MRI) &&
- !IsVSReg(SrcMO.getReg(), MRI)) {
- // This is a copy *to* a VSX register from a non-VSX register.
- Changed = true;
-
- const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass;
- assert((IsF8Reg(SrcMO.getReg(), MRI) ||
- IsVSSReg(SrcMO.getReg(), MRI) ||
- IsVSFReg(SrcMO.getReg(), MRI)) &&
- "Unknown source for a VSX copy");
-
- Register NewVReg = MRI.createVirtualRegister(SrcRC);
- BuildMI(MBB, MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
- .addImm(1) // add 1, not 0, because there is no implicit clearing
- // of the high bits.
- .add(SrcMO)
- .addImm(PPC::sub_64);
-
- // The source of the original copy is now the new virtual register.
- SrcMO.setReg(NewVReg);
- } else if (!IsVSReg(DstMO.getReg(), MRI) &&
- IsVSReg(SrcMO.getReg(), MRI)) {
- // This is a copy *from* a VSX register to a non-VSX register.
- Changed = true;
-
- const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass;
- assert((IsF8Reg(DstMO.getReg(), MRI) ||
- IsVSFReg(DstMO.getReg(), MRI) ||
- IsVSSReg(DstMO.getReg(), MRI)) &&
- "Unknown destination for a VSX copy");
-
- // Copy the VSX value into a new VSX register of the correct subclass.
- Register NewVReg = MRI.createVirtualRegister(DstRC);
- BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
- NewVReg)
- .add(SrcMO);
-
- // Transform the original copy into a subregister extraction copy.
- SrcMO.setReg(NewVReg);
- SrcMO.setSubReg(PPC::sub_64);
- }
- }
-
- return Changed;
- }
-
-public:
- bool runOnMachineFunction(MachineFunction &MF) override {
- // If we don't have VSX on the subtarget, don't do anything.
- const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
- if (!STI.hasVSX())
- return false;
- TII = STI.getInstrInfo();
-
- bool Changed = false;
-
- for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
- if (processBlock(B))
- Changed = true;
-
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
- } // end anonymous namespace
-
-INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
- "PowerPC VSX Copy Legalization", false, false)
-
-char PPCVSXCopy::ID = 0;
-FunctionPass*
-llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
diff --git a/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
new file mode 100644
index 0000000000000..2ec566ddb0b8e
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
@@ -0,0 +1,182 @@
+//===--------- PPCVSXWACCCopy.cpp - VSX and WACC Copy Legalization --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass which deals with the complexity of generating legal VSX register
+// copies to/from register classes which partially overlap with the VSX
+// register file and combines the wacc/wacc_hi copies when needed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace {
+// PPCVSXWACCCopy pass - For copies between VSX registers and non-VSX registers
+// (Altivec and scalar floating-point registers), we need to transform the
+// copies into subregister copies with other restrictions.
+struct PPCVSXWACCCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXWACCCopy() : MachineFunctionPass(ID) {}
+
+ const TargetInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (Register::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
+
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
+
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
+
+ bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
+ }
+
+ bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI);
+ }
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI.getOperand(0);
+ MachineOperand &SrcMO = MI.getOperand(1);
+
+ if (IsVSReg(DstMO.getReg(), MRI) && !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) || IsVSSReg(SrcMO.getReg(), MRI) ||
+ IsVSFReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ Register NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .add(SrcMO)
+ .addImm(PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) || IsVSFReg(DstMO.getReg(), MRI) ||
+ IsVSSReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ Register NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg)
+ .add(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(PPC::sub_64);
+ } else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) &&
+ IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) {
+ // Matches the pattern:
+ // %a:waccrc = COPY %b.sub_wacc_hi:dmrrc
+ // %c:wacc_hirc = COPY %a:waccrc
+ // And replaces it with:
+ // %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(SrcMO.getReg());
+ if (!DefMI || !DefMI->isCopy())
+ continue;
+
+ MachineOperand &OrigSrc = DefMI->getOperand(1);
+
+ if (!IsRegInClass(OrigSrc.getReg(), &PPC::DMRRCRegClass, MRI))
+ continue;
+
+ if (OrigSrc.getSubReg() != PPC::sub_wacc_hi)
+ continue;
+
+ // Rewrite the second copy to use the original register's subreg
+ SrcMO.setReg(OrigSrc.getReg());
+ SrcMO.setSubReg(PPC::sub_wacc_hi);
+ Changed = true;
+
+ // Remove the intermediate copy if safe
+ if (MRI.use_nodbg_empty(DefMI->getOperand(0).getReg()))
+ DefMI->eraseFromParent();
+ }
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // If we don't have VSX on the subtarget, don't do anything.
+ const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
+ if (!STI.hasVSX())
+ return false;
+ TII = STI.getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
+ if (processBlock(B))
+ Changed = true;
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // end anonymous namespace
+
+INITIALIZE_PASS(PPCVSXWACCCopy, DEBUG_TYPE, "PowerPC VSX Copy Legalization",
+ false, false)
+
+char PPCVSXWACCCopy::ID = 0;
+FunctionPass *llvm::createPPCVSXWACCCopyPass() { return new PPCVSXWACCCopy(); }
diff --git a/llvm/test/CodeGen/PowerPC/dmr-copy.ll b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
new file mode 100644
index 0000000000000..d5a24309f94d5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noundef %vc, ptr noundef %resp) #0 {
+; CHECK-LABEL: test_wacc_copy:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: std r31, -8(r1)
+; CHECK-NEXT: std r30, -16(r1)
+; CHECK-NEXT: mr r30, r1
+; CHECK-NEXT: clrldi r0, r1, 57
+; CHECK-NEXT: subfic r0, r0, -384
+; CHECK-NEXT: stdux r1, r1, r0
+; CHECK-NEXT: .cfi_def_cfa_register r30
+; CHECK-NEXT: .cfi_offset r31, -8
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: mr r31, r1
+; CHECK-NEXT: std r3, 360(r31)
+; CHECK-NEXT: std r4, 352(r31)
+; CHECK-NEXT: stxv v2, 336(r31)
+; CHECK-NEXT: std r7, 328(r31)
+; CHECK-NEXT: ld r3, 360(r31)
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r31)
+; CHECK-NEXT: stxvp vsp36, 192(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r31)
+; CHECK-NEXT: stxvp vsp36, 128(r31)
+; CHECK-NEXT: ld r3, 352(r31)
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: stxv v2, 112(r31)
+; CHECK-NEXT: stxv v3, 96(r31)
+; CHECK-NEXT: lxv v2, 112(r31)
+; CHECK-NEXT: lxv v3, 96(r31)
+; CHECK-NEXT: lxv vs0, 336(r31)
+; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r31)
+; CHECK-NEXT: stxvp vsp36, 192(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r31)
+; CHECK-NEXT: stxvp vsp36, 128(r31)
+; CHECK-NEXT: lxvp vsp34, 128(r31)
+; CHECK-NEXT: lxvp vsp36, 160(r31)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 192(r31)
+; CHECK-NEXT: lxvp vsp36, 224(r31)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: ld r3, 328(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r3)
+; CHECK-NEXT: stxvp vsp36, 64(r3)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r3)
+; CHECK-NEXT: stxvp vsp36, 0(r3)
+; CHECK-NEXT: mr r1, r30
+; CHECK-NEXT: ld r31, -8(r1)
+; CHECK-NEXT: ld r30, -16(r1)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_wacc_copy:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: std r31, -8(r1)
+; CHECK-BE-NEXT: std r30, -16(r1)
+; CHECK-BE-NEXT: mr r30, r1
+; CHECK-BE-NEXT: clrldi r0, r1, 57
+; CHECK-BE-NEXT: subfic r0, r0, -384
+; CHECK-BE-NEXT: stdux r1, r1, r0
+; CHECK-BE-NEXT: mr r31, r1
+; CHECK-BE-NEXT: std r3, 360(r31)
+; CHECK-BE-NEXT: std r4, 352(r31)
+; CHECK-BE-NEXT: stxv v2, 336(r31)
+; CHECK-BE-NEXT: std r5, 328(r31)
+; CHECK-BE-NEXT: ld r3, 360(r31)
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT: ld r3, 352(r31)
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: stxv v3, 112(r31)
+; CHECK-BE-NEXT: stxv v2, 96(r31)
+; CHECK-BE-NEXT: lxv v2, 96(r31)
+; CHECK-BE-NEXT: lxv v3, 112(r31)
+; CHECK-BE-NEXT: lxv vs0, 336(r31)
+; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT: lxvp vsp34, 224(r31)
+; CHECK-BE-NEXT: lxvp vsp36, 192(r31)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 160(r31)
+; CHECK-BE-NEXT: lxvp vsp36, 128(r31)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: ld r3, 328(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r3)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r3)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r3)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r3)
+; CHECK-BE-NEXT: mr r1, r30
+; CHECK-BE-NEXT: ld r31, -8(r1)
+; CHECK-BE-NEXT: ld r30, -16(r1)
+; CHECK-BE-NEXT: blr
+entry:
+ %vdmrp.addr = alloca ptr, align 8
+ %vpp.addr = alloca ptr, align 8
+ %vc.addr = alloca <16 x i8>, align 16
+ %resp.addr = alloca ptr, align 8
+ %vdmr = alloca <1024 x i1>, align 128
+ %vp = alloca <256 x i1>, align 32
+ store ptr %vdmrp, ptr %vdmrp.addr, align 8
+ store ptr %vpp, ptr %vpp.addr, align 8
+ store <16 x i8> %vc, ptr %vc.addr, align 16
+ store ptr %resp, ptr %resp.addr, align 8
+ %0 = load ptr, ptr %vdmrp.addr, align 8
+ %1 = load <1024 x i1>, ptr %0, align 128
+ store <1024 x i1> %1, ptr %vdmr, align 128
+ %2 = load ptr, ptr %vpp.addr, align 8
+ %3 = load <256 x i1>, ptr %2, align 32
+ store <256 x i1> %3, ptr %vp, align 32
+ %4 = load <256 x i1>, ptr %vp, align 32
+ %5 = load <16 x i8>, ptr %vc.addr, align 16
+ %6 = call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %4, <16 x i8> %5)
+ store <1024 x i1> %6, ptr %vdmr, align 128
+ %7 = load <1024 x i1>, ptr %vdmr, align 128
+ %8 = load ptr, ptr %resp.addr, align 8
+ store <1024 x i1> %7, ptr %8, align 128
+ ret void
+}
+
+define void @foo(ptr noundef readonly captures(none) %p1, ptr noundef readonly captures(none) %p2, ptr noundef writeonly captures(none) initializes((0, 128)) %res1, ptr noundef writeonly captures(none) initializes((0, 128)) %res2) local_unnamed_addr #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmmr dmr2, dmr0
+; CHECK-NEXT: dmxor dmr2, dmr1
+; CHECK-NEXT: lxvp vsp34, 0(r4)
+; CHECK-NEXT: lxvp vsp36, 32(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r4)
+; CHECK-NEXT: lxvp vsp36, 96(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmxor dmr0, dmr1
+; CHECK-NEXT: dmmr dmr1, dmr2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: dmmr dmr0, dmr0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: foo:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmmr dmr2, dmr0
+; CHECK-BE-NEXT: dmxor dmr2, dmr1
+; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxor dmr0, dmr1
+; CHECK-BE-NEXT: dmmr dmr1, dmr2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: dmmr dmr0, dmr0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ %1 = load <1024 x i1>, ptr %p1, align 128
+ %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %1)
+ %3 = load <1024 x i1>, ptr %p2, align 128
+ %4 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %3)
+ %5 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %2)
+ store <1024 x i1> %5, ptr %res1, align 128
+ %6 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %4)
+ store <1024 x i1> %6, ptr %res2, align 128
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="future" "target-features"="+64bit,+allow-unaligned-fp-access,+altivec,+bpermd,+cmpb,+crbits,+crypto,+direct-move,+extdiv,+fast-MFLR,+fcpsgn,+fpcvt,+fprnd,+fpu,+fre,+fres,+frsqrte,+frsqrtes,+fsqrt,+fuse-add-logical,+fuse-arith-add,+fuse-logical,+fuse-logical-add,+fuse-sha3,+fuse-store,+fusion,+hard-float,+icbt,+isa-future-instructions,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+isel,+ldbrx,+lfiwax,+mfocrf,+mma,+paired-vector-memops,+partword-atomics,+pcrelative-memops,+popcntd,+power10-vector,+power8-altivec,+power8-vector,+power9-altivec,+power9-vector,+ppc-postra-sched,+ppc-prera-sched,+predictable-select-expensive,+prefix-instrs,+quadword-atomics,+recipprec,+stfiwx,+two-const-nr,+vsx" }
+
+
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
index c304e9382de35..16685ad26cf11 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
@@ -92,7 +92,7 @@ static_library("LLVMPowerPCCodeGen") {
"PPCTargetMachine.cpp",
"PPCTargetObjectFile.cpp",
"PPCTargetTransformInfo.cpp",
- "PPCVSXCopy.cpp",
+ "PPCVSXWACCCopy.cpp",
"PPCVSXFMAMutate.cpp",
"PPCVSXSwapRemoval.cpp",
]
More information about the llvm-commits
mailing list