[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port GCNCreateVOPD to NPM (PR #130059)
Akshat Oke via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Mar 11 23:03:34 PDT 2025
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/130059
>From ee0ed7e6fdce69d98a05e42327a305228797a9de Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 5 Mar 2025 10:52:00 +0000
Subject: [PATCH 1/4] [AMDGPU][NPM] Port GCNCreateVOPD to NPM
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 7 ++-
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 +
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 +-
llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp | 53 ++++++++++++-------
4 files changed, 43 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 00355d8fb5e5f..95340f1287d8d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -358,6 +358,11 @@ class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);
};
+class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM);
+};
+
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -443,7 +448,7 @@ extern char &SIFormMemoryClausesID;
void initializeSIPostRABundlerLegacyPass(PassRegistry &);
extern char &SIPostRABundlerLegacyID;
-void initializeGCNCreateVOPDPass(PassRegistry &);
+void initializeGCNCreateVOPDLegacyPass(PassRegistry &);
extern char &GCNCreateVOPDID;
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 16ae23133a549..98b0bc7358e9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -104,6 +104,7 @@ MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUse
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
+MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 856b5eb359c49..b06e87baa4ea9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -547,7 +547,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
initializeSIFormMemoryClausesLegacyPass(*PR);
initializeSIPostRABundlerLegacyPass(*PR);
- initializeGCNCreateVOPDPass(*PR);
+ initializeGCNCreateVOPDLegacyPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
initializeAMDGPUExternalAAWrapperPass(*PR);
@@ -2150,7 +2150,7 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
- // TODO: addPass(GCNCreateVOPDPass());
+ addPass(GCNCreateVOPDPass());
}
// TODO: addPass(SIMemoryLegalizerPass());
// TODO: addPass(SIInsertWaitcntsPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 798279b279da3..32a26469d616b 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "gcn-create-vopd"
@@ -36,7 +37,7 @@ using namespace llvm;
namespace {
-class GCNCreateVOPD : public MachineFunctionPass {
+class GCNCreateVOPD {
private:
class VOPDCombineInfo {
public:
@@ -49,20 +50,8 @@ class GCNCreateVOPD : public MachineFunctionPass {
};
public:
- static char ID;
const GCNSubtarget *ST = nullptr;
- GCNCreateVOPD() : MachineFunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- StringRef getPassName() const override {
- return "GCN Create VOPD Instructions";
- }
-
bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
auto *FirstMI = CI.FirstMI;
auto *SecondMI = CI.SecondMI;
@@ -112,9 +101,7 @@ class GCNCreateVOPD : public MachineFunctionPass {
return true;
}
- bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(MF.getFunction()))
- return false;
+ bool run(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
return false;
@@ -163,11 +150,39 @@ class GCNCreateVOPD : public MachineFunctionPass {
}
};
+class GCNCreateVOPDLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+ GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "GCN Create VOPD Instructions";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ return GCNCreateVOPD().run(MF);
+ }
+};
+
} // namespace
-char GCNCreateVOPD::ID = 0;
+PreservedAnalyses llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &AM) {
+ if (!GCNCreateVOPD().run(MF))
+ return PreservedAnalyses::all();
+ return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
+}
+
+char GCNCreateVOPDLegacy::ID = 0;
-char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID;
+char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID;
-INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions",
+INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions",
false, false)
>From 94ec994f55a8b02e8b070e82d6253eb9de67ca97 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Mon, 10 Mar 2025 04:27:24 +0000
Subject: [PATCH 2/4] clang format
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 +-
llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp | 186 ++++++++++++-----------
2 files changed, 97 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 95340f1287d8d..96f23432685de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -360,7 +360,8 @@ class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> {
public:
- PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM);
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &AM);
};
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 32a26469d616b..22123f738c948 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -49,105 +49,108 @@ class GCNCreateVOPD {
MachineInstr *SecondMI;
};
-public:
- const GCNSubtarget *ST = nullptr;
-
- bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
- auto *FirstMI = CI.FirstMI;
- auto *SecondMI = CI.SecondMI;
- unsigned Opc1 = FirstMI->getOpcode();
- unsigned Opc2 = SecondMI->getOpcode();
- unsigned EncodingFamily =
- AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
- int NewOpcode =
- AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
- AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
- assert(NewOpcode != -1 &&
- "Should have previously determined this as a possible VOPD\n");
-
- auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
- FirstMI->getDebugLoc(), SII->get(NewOpcode))
- .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
-
- namespace VOPD = AMDGPU::VOPD;
- MachineInstr *MI[] = {FirstMI, SecondMI};
- auto InstInfo =
- AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
-
- for (auto CompIdx : VOPD::COMPONENTS) {
- auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
- VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
- }
-
- for (auto CompIdx : VOPD::COMPONENTS) {
- auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
- for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
- auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+ public:
+ const GCNSubtarget *ST = nullptr;
+
+ bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
+ auto *FirstMI = CI.FirstMI;
+ auto *SecondMI = CI.SecondMI;
+ unsigned Opc1 = FirstMI->getOpcode();
+ unsigned Opc2 = SecondMI->getOpcode();
+ unsigned EncodingFamily =
+ AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
+ int NewOpcode =
+ AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+ AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+ assert(NewOpcode != -1 &&
+ "Should have previously determined this as a possible VOPD\n");
+
+ auto VOPDInst =
+ BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(),
+ SII->get(NewOpcode))
+ .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
+
+ namespace VOPD = AMDGPU::VOPD;
+ MachineInstr *MI[] = {FirstMI, SecondMI};
+ auto InstInfo =
+ AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
+
+ for (auto CompIdx : VOPD::COMPONENTS) {
+ auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
}
- }
- SII->fixImplicitOperands(*VOPDInst);
- for (auto CompIdx : VOPD::COMPONENTS)
- VOPDInst.copyImplicitOps(*MI[CompIdx]);
+ for (auto CompIdx : VOPD::COMPONENTS) {
+ auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+ for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum;
+ ++CompSrcIdx) {
+ auto MCOprIdx =
+ InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+ VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+ }
+ }
- LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
- << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
+ SII->fixImplicitOperands(*VOPDInst);
+ for (auto CompIdx : VOPD::COMPONENTS)
+ VOPDInst.copyImplicitOps(*MI[CompIdx]);
- for (auto CompIdx : VOPD::COMPONENTS)
- MI[CompIdx]->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
+ << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
- ++NumVOPDCreated;
- return true;
- }
+ for (auto CompIdx : VOPD::COMPONENTS)
+ MI[CompIdx]->eraseFromParent();
- bool run(MachineFunction &MF) {
- ST = &MF.getSubtarget<GCNSubtarget>();
- if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
- return false;
- LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
-
- const SIInstrInfo *SII = ST->getInstrInfo();
- bool Changed = false;
-
- SmallVector<VOPDCombineInfo> ReplaceCandidates;
-
- for (auto &MBB : MF) {
- auto MII = MBB.begin(), E = MBB.end();
- while (MII != E) {
- auto *FirstMI = &*MII;
- MII = next_nodbg(MII, MBB.end());
- if (MII == MBB.end())
- break;
- if (FirstMI->isDebugInstr())
- continue;
- auto *SecondMI = &*MII;
- unsigned Opc = FirstMI->getOpcode();
- unsigned Opc2 = SecondMI->getOpcode();
- llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
- llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
- VOPDCombineInfo CI;
-
- if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
- CI = VOPDCombineInfo(FirstMI, SecondMI);
- else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
- CI = VOPDCombineInfo(SecondMI, FirstMI);
- else
- continue;
- // checkVOPDRegConstraints cares about program order, but doReplace
- // cares about X-Y order in the constituted VOPD
- if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
- ReplaceCandidates.push_back(CI);
- ++MII;
+ ++NumVOPDCreated;
+ return true;
+ }
+
+ bool run(MachineFunction &MF) {
+ ST = &MF.getSubtarget<GCNSubtarget>();
+ if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
+ return false;
+ LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
+
+ const SIInstrInfo *SII = ST->getInstrInfo();
+ bool Changed = false;
+
+ SmallVector<VOPDCombineInfo> ReplaceCandidates;
+
+ for (auto &MBB : MF) {
+ auto MII = MBB.begin(), E = MBB.end();
+ while (MII != E) {
+ auto *FirstMI = &*MII;
+ MII = next_nodbg(MII, MBB.end());
+ if (MII == MBB.end())
+ break;
+ if (FirstMI->isDebugInstr())
+ continue;
+ auto *SecondMI = &*MII;
+ unsigned Opc = FirstMI->getOpcode();
+ unsigned Opc2 = SecondMI->getOpcode();
+ llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+ llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
+ VOPDCombineInfo CI;
+
+ if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+ CI = VOPDCombineInfo(FirstMI, SecondMI);
+ else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+ CI = VOPDCombineInfo(SecondMI, FirstMI);
+ else
+ continue;
+ // checkVOPDRegConstraints cares about program order, but doReplace
+ // cares about X-Y order in the constituted VOPD
+ if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+ ReplaceCandidates.push_back(CI);
+ ++MII;
+ }
}
}
- }
- for (auto &CI : ReplaceCandidates) {
- Changed |= doReplace(SII, CI);
- }
+ for (auto &CI : ReplaceCandidates) {
+ Changed |= doReplace(SII, CI);
+ }
- return Changed;
- }
+ return Changed;
+ }
};
class GCNCreateVOPDLegacy : public MachineFunctionPass {
@@ -173,8 +176,9 @@ class GCNCreateVOPDLegacy : public MachineFunctionPass {
} // namespace
-PreservedAnalyses llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
- MachineFunctionAnalysisManager &AM) {
+PreservedAnalyses
+llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &AM) {
if (!GCNCreateVOPD().run(MF))
return PreservedAnalyses::all();
return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
>From 612e0ab5752d887dae8104c9ed5a208e3a0be940 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 11 Mar 2025 09:03:31 +0000
Subject: [PATCH 3/4] add test
---
llvm/test/CodeGen/AMDGPU/vopd-combine.mir | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
index 8d5060177c63d..5a13401c1631c 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
@@ -4,6 +4,8 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
+
--- |
@lds = external addrspace(3) global [8 x i8]
define void @vopd_schedule() { ret void }
>From 1aeebc20c7edad2e6b6e2b1d410fb778016fc304 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 12 Mar 2025 06:03:07 +0000
Subject: [PATCH 4/4] format and sort registry
---
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +-
llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp | 181 +++++++++---------
2 files changed, 90 insertions(+), 93 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 98b0bc7358e9d..b1dba132e5bf6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -103,8 +103,8 @@ MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass())
MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
-MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
+MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 22123f738c948..ccc711a0bcc4e 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -49,108 +49,105 @@ class GCNCreateVOPD {
MachineInstr *SecondMI;
};
- public:
- const GCNSubtarget *ST = nullptr;
-
- bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
- auto *FirstMI = CI.FirstMI;
- auto *SecondMI = CI.SecondMI;
- unsigned Opc1 = FirstMI->getOpcode();
- unsigned Opc2 = SecondMI->getOpcode();
- unsigned EncodingFamily =
- AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
- int NewOpcode =
- AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
- AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
- assert(NewOpcode != -1 &&
- "Should have previously determined this as a possible VOPD\n");
-
- auto VOPDInst =
- BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(),
- SII->get(NewOpcode))
- .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
-
- namespace VOPD = AMDGPU::VOPD;
- MachineInstr *MI[] = {FirstMI, SecondMI};
- auto InstInfo =
- AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
-
- for (auto CompIdx : VOPD::COMPONENTS) {
- auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
- VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
- }
+public:
+ const GCNSubtarget *ST = nullptr;
+
+ bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
+ auto *FirstMI = CI.FirstMI;
+ auto *SecondMI = CI.SecondMI;
+ unsigned Opc1 = FirstMI->getOpcode();
+ unsigned Opc2 = SecondMI->getOpcode();
+ unsigned EncodingFamily =
+ AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
+ int NewOpcode =
+ AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+ AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+ assert(NewOpcode != -1 &&
+ "Should have previously determined this as a possible VOPD\n");
+
+ auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
+ FirstMI->getDebugLoc(), SII->get(NewOpcode))
+ .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
+
+ namespace VOPD = AMDGPU::VOPD;
+ MachineInstr *MI[] = {FirstMI, SecondMI};
+ auto InstInfo =
+ AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
+
+ for (auto CompIdx : VOPD::COMPONENTS) {
+ auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
+ VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+ }
- for (auto CompIdx : VOPD::COMPONENTS) {
- auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
- for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum;
- ++CompSrcIdx) {
- auto MCOprIdx =
- InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
- VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
- }
+ for (auto CompIdx : VOPD::COMPONENTS) {
+ auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+ for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
+ auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+ VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
}
+ }
- SII->fixImplicitOperands(*VOPDInst);
- for (auto CompIdx : VOPD::COMPONENTS)
- VOPDInst.copyImplicitOps(*MI[CompIdx]);
+ SII->fixImplicitOperands(*VOPDInst);
+ for (auto CompIdx : VOPD::COMPONENTS)
+ VOPDInst.copyImplicitOps(*MI[CompIdx]);
- LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
- << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
+ LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
+ << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
- for (auto CompIdx : VOPD::COMPONENTS)
- MI[CompIdx]->eraseFromParent();
+ for (auto CompIdx : VOPD::COMPONENTS)
+ MI[CompIdx]->eraseFromParent();
- ++NumVOPDCreated;
- return true;
- }
+ ++NumVOPDCreated;
+ return true;
+ }
- bool run(MachineFunction &MF) {
- ST = &MF.getSubtarget<GCNSubtarget>();
- if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
- return false;
- LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
-
- const SIInstrInfo *SII = ST->getInstrInfo();
- bool Changed = false;
-
- SmallVector<VOPDCombineInfo> ReplaceCandidates;
-
- for (auto &MBB : MF) {
- auto MII = MBB.begin(), E = MBB.end();
- while (MII != E) {
- auto *FirstMI = &*MII;
- MII = next_nodbg(MII, MBB.end());
- if (MII == MBB.end())
- break;
- if (FirstMI->isDebugInstr())
- continue;
- auto *SecondMI = &*MII;
- unsigned Opc = FirstMI->getOpcode();
- unsigned Opc2 = SecondMI->getOpcode();
- llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
- llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
- VOPDCombineInfo CI;
-
- if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
- CI = VOPDCombineInfo(FirstMI, SecondMI);
- else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
- CI = VOPDCombineInfo(SecondMI, FirstMI);
- else
- continue;
- // checkVOPDRegConstraints cares about program order, but doReplace
- // cares about X-Y order in the constituted VOPD
- if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
- ReplaceCandidates.push_back(CI);
- ++MII;
- }
+ bool run(MachineFunction &MF) {
+ ST = &MF.getSubtarget<GCNSubtarget>();
+ if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
+ return false;
+ LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
+
+ const SIInstrInfo *SII = ST->getInstrInfo();
+ bool Changed = false;
+
+ SmallVector<VOPDCombineInfo> ReplaceCandidates;
+
+ for (auto &MBB : MF) {
+ auto MII = MBB.begin(), E = MBB.end();
+ while (MII != E) {
+ auto *FirstMI = &*MII;
+ MII = next_nodbg(MII, MBB.end());
+ if (MII == MBB.end())
+ break;
+ if (FirstMI->isDebugInstr())
+ continue;
+ auto *SecondMI = &*MII;
+ unsigned Opc = FirstMI->getOpcode();
+ unsigned Opc2 = SecondMI->getOpcode();
+ llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+ llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
+ VOPDCombineInfo CI;
+
+ if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+ CI = VOPDCombineInfo(FirstMI, SecondMI);
+ else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+ CI = VOPDCombineInfo(SecondMI, FirstMI);
+ else
+ continue;
+ // checkVOPDRegConstraints cares about program order, but doReplace
+ // cares about X-Y order in the constituted VOPD
+ if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+ ReplaceCandidates.push_back(CI);
+ ++MII;
}
}
- for (auto &CI : ReplaceCandidates) {
- Changed |= doReplace(SII, CI);
- }
-
- return Changed;
}
+ for (auto &CI : ReplaceCandidates) {
+ Changed |= doReplace(SII, CI);
+ }
+
+ return Changed;
+ }
};
class GCNCreateVOPDLegacy : public MachineFunctionPass {
More information about the llvm-branch-commits
mailing list