[llvm-branch-commits] [llvm] 7ecbe0c - [NewPM][AMDGPU] Port amdgpu-lower-kernel-attributes
Arthur Eubanks via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Dec 29 10:31:17 PST 2020
Author: Arthur Eubanks
Date: 2020-12-29T10:26:06-08:00
New Revision: 7ecbe0c7a01848fce88dcf3b6977cec866e9938b
URL: https://github.com/llvm/llvm-project/commit/7ecbe0c7a01848fce88dcf3b6977cec866e9938b
DIFF: https://github.com/llvm/llvm-project/commit/7ecbe0c7a01848fce88dcf3b6977cec866e9938b.diff
LOG: [NewPM][AMDGPU] Port amdgpu-lower-kernel-attributes
And add it to the AMDGPU opt pipeline.
This is a function pass instead of a module pass (like the legacy pass)
because it's getting added to a CGSCCPassManager, and you can't put a
module pass in a CGSCCPassManager.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D93885
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.h
llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
llvm/tools/opt/opt.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6a0ba20e8026..623bbb2db325 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -115,6 +115,11 @@ ModulePass *createAMDGPULowerKernelAttributesPass();
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
extern char &AMDGPULowerKernelAttributesID;
+struct AMDGPULowerKernelAttributesPass
+ : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
extern char &AMDGPUPropagateAttributesEarlyID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index 00e12f808783..3406734d7c7f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -19,7 +19,9 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
@@ -41,16 +43,11 @@ enum DispatchPackedOffsets {
};
class AMDGPULowerKernelAttributes : public ModulePass {
- Module *Mod = nullptr;
-
public:
static char ID;
AMDGPULowerKernelAttributes() : ModulePass(ID) {}
- bool processUse(CallInst *CI);
-
- bool doInitialization(Module &M) override;
bool runOnModule(Module &M) override;
StringRef getPassName() const override {
@@ -64,12 +61,7 @@ class AMDGPULowerKernelAttributes : public ModulePass {
} // end anonymous namespace
-bool AMDGPULowerKernelAttributes::doInitialization(Module &M) {
- Mod = &M;
- return false;
-}
-
-bool AMDGPULowerKernelAttributes::processUse(CallInst *CI) {
+static bool processUse(CallInst *CI) {
Function *F = CI->getParent()->getParent();
auto MD = F->getMetadata("reqd_work_group_size");
@@ -89,7 +81,7 @@ bool AMDGPULowerKernelAttributes::processUse(CallInst *CI) {
Value *GridSizeY = nullptr;
Value *GridSizeZ = nullptr;
- const DataLayout &DL = Mod->getDataLayout();
+ const DataLayout &DL = F->getParent()->getDataLayout();
// We expect to see several GEP users, casted to the appropriate type and
// loaded.
@@ -239,7 +231,7 @@ bool AMDGPULowerKernelAttributes::runOnModule(Module &M) {
StringRef DispatchPtrName
= Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr);
- Function *DispatchPtr = Mod->getFunction(DispatchPtrName);
+ Function *DispatchPtr = M.getFunction(DispatchPtrName);
if (!DispatchPtr) // Dispatch ptr not used.
return false;
@@ -267,3 +259,22 @@ char AMDGPULowerKernelAttributes::ID = 0;
ModulePass *llvm::createAMDGPULowerKernelAttributesPass() {
return new AMDGPULowerKernelAttributes();
}
+
+PreservedAnalyses
+AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) {
+ StringRef DispatchPtrName =
+ Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr);
+
+ Function *DispatchPtr = F.getParent()->getFunction(DispatchPtrName);
+ if (!DispatchPtr) // Dispatch ptr not used.
+ return PreservedAnalyses::all();
+
+ for (Instruction &I : instructions(F)) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->getCalledFunction() == DispatchPtr)
+ processUse(CI);
+ }
+ }
+
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 26d76dd7fede..7a09c91e62d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -508,6 +508,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
PM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
return true;
}
+ if (PassName == "amdgpu-lower-kernel-attributes") {
+ PM.addPass(AMDGPULowerKernelAttributesPass());
+ return true;
+ }
return false;
});
@@ -530,6 +534,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB,
// but before SROA to increase SROA opportunities.
FPM.addPass(InferAddressSpacesPass());
+ // This should run after inlining to have any chance of doing
+ // anything, and before other cleanup optimizations.
+ FPM.addPass(AMDGPULowerKernelAttributesPass());
+
if (Level != PassBuilder::OptimizationLevel::O0) {
// Promote alloca to vector before SROA and loop unroll. If we
// manage to eliminate allocas before unroll we may choose to unroll
diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
index 40b1db00dcdb..49caf2ec755b 100644
--- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
@@ -1,4 +1,5 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -amdgpu-lower-kernel-attributes -instcombine %s | FileCheck -enable-var-scope %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine %s | FileCheck -enable-var-scope %s
target datalayout = "n32"
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index cd2bb7fd9833..094d79d742dd 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -464,7 +464,7 @@ struct TimeTracerRAII {
static bool shouldPinPassToLegacyPM(StringRef Pass) {
std::vector<StringRef> PassNameExactToIgnore = {
"amdgpu-simplifylib", "amdgpu-usenative", "amdgpu-promote-alloca",
- "amdgpu-promote-alloca-to-vector"};
+ "amdgpu-promote-alloca-to-vector", "amdgpu-lower-kernel-attributes"};
for (const auto &P : PassNameExactToIgnore)
if (Pass == P)
return false;
More information about the llvm-branch-commits
mailing list