[llvm] 660cae8 - Revert "[AMDGPU] [IndirectCalls] Don't propagate attributes to address taken functions and their callees"
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 23 18:34:56 PDT 2021
Author: Jon Chesterfield
Date: 2021-06-24T02:33:50+01:00
New Revision: 660cae84c3144a42272daa16415fc9a2532773c4
URL: https://github.com/llvm/llvm-project/commit/660cae84c3144a42272daa16415fc9a2532773c4
DIFF: https://github.com/llvm/llvm-project/commit/660cae84c3144a42272daa16415fc9a2532773c4.diff
LOG: Revert "[AMDGPU] [IndirectCalls] Don't propagate attributes to address taken functions and their callees"
This reverts commit 6a3beb1f68d6791a4cd0190f68b48510f754a00a.
Test case that triggers an infinite loop before the revert is at
the review for D103138.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll
Removed:
llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll
llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll
llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll
llvm/test/CodeGen/AMDGPU/propagate-attributes-launch-bounds.ll
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index fbad7d9fdcf20..0e4c26170a8fa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -30,13 +30,11 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/Analysis/CallGraph.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include <queue>
#define DEBUG_TYPE "amdgpu-propagate-attributes"
@@ -115,17 +113,12 @@ class AMDGPUPropagateAttributes {
// Clone functions as needed or just set attributes.
bool AllowClone;
- CallGraph *ModuleCG = nullptr;
-
// Option propagation roots.
SmallSet<Function *, 32> Roots;
// Clones of functions with their attributes.
SmallVector<Clone, 32> Clones;
- // To memoize address taken functions.
- SmallSet<Function *, 32> AddressTakenFunctions;
-
// Find a clone with required features.
Function *findFunction(const FnProperties &PropsNeeded,
Function *OrigF);
@@ -146,23 +139,14 @@ class AMDGPUPropagateAttributes {
bool process();
public:
- AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone)
- : TM(TM), AllowClone(AllowClone) {}
+ AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
+ TM(TM), AllowClone(AllowClone) {}
// Use F as a root and propagate its attributes.
bool process(Function &F);
// Propagate attributes starting from kernel functions.
- bool process(Module &M, CallGraph *CG);
-
- // Remove attributes from F.
- // This is used in presence of address taken functions.
- bool removeAttributes(Function *F);
-
- // Handle call graph rooted at address taken functions.
- // This function will erase all attributes present
- // on all functions called from address taken functions transitively.
- bool handleAddressTakenFunctions(CallGraph *CG);
+ bool process(Module &M);
};
// Allows to propagate attributes early, but no clonning is allowed as it must
@@ -198,7 +182,6 @@ class AMDGPUPropagateAttributesLate : public ModulePass {
*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnModule(Module &M) override;
};
@@ -216,49 +199,6 @@ INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
"Late propagate attributes from kernels to functions",
false, false)
-bool AMDGPUPropagateAttributes::removeAttributes(Function *F) {
- bool Changed = false;
- if (!F)
- return Changed;
- LLVM_DEBUG(dbgs() << "Removing attributes from " << F->getName() << '\n');
- for (unsigned I = 0; I < NumAttr; ++I) {
- if (F->hasFnAttribute(AttributeNames[I])) {
- F->removeFnAttr(AttributeNames[I]);
- Changed = true;
- }
- }
- return Changed;
-}
-
-bool AMDGPUPropagateAttributes::handleAddressTakenFunctions(CallGraph *CG) {
- assert(ModuleCG && "Call graph not present");
-
- bool Changed = false;
- SmallSet<CallGraphNode *, 32> Visited;
-
- for (Function *F : AddressTakenFunctions) {
- CallGraphNode *CGN = (*CG)[F];
- if (!Visited.count(CGN)) {
- Changed |= removeAttributes(F);
- Visited.insert(CGN);
- }
-
- std::queue<CallGraphNode *> SubGraph;
- SubGraph.push(CGN);
- while (!SubGraph.empty()) {
- CallGraphNode *CGN = SubGraph.front();
- SubGraph.pop();
- if (!Visited.count(CGN)) {
- Changed |= removeAttributes(CGN->getFunction());
- Visited.insert(CGN);
- }
- for (auto N : *CGN)
- SubGraph.push(N.second);
- }
- }
- return Changed;
-}
-
Function *
AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
Function *OrigF) {
@@ -270,12 +210,11 @@ AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
return nullptr;
}
-bool AMDGPUPropagateAttributes::process(Module &M, CallGraph *CG) {
+bool AMDGPUPropagateAttributes::process(Module &M) {
for (auto &F : M.functions())
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
Roots.insert(&F);
- ModuleCG = CG;
return process();
}
@@ -301,9 +240,6 @@ bool AMDGPUPropagateAttributes::process() {
if (F.isDeclaration())
continue;
- if (F.hasAddressTaken(nullptr, true, true, true))
- AddressTakenFunctions.insert(&F);
-
const FnProperties CalleeProps(*TM, F);
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
SmallSet<CallBase *, 32> Visited;
@@ -374,12 +310,6 @@ bool AMDGPUPropagateAttributes::process() {
Roots.clear();
Clones.clear();
- // Keep the post processing related to indirect
- // calls separate to handle them gracefully.
- // The core traversal need not be affected by this.
- if (AllowClone)
- Changed |= handleAddressTakenFunctions(ModuleCG);
-
return Changed;
}
@@ -459,10 +389,6 @@ bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
return AMDGPUPropagateAttributes(TM, false).process(F);
}
-void AMDGPUPropagateAttributesLate::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<CallGraphWrapperPass>();
-}
-
bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
if (!TM) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
@@ -471,8 +397,8 @@ bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
TM = &TPC->getTM<TargetMachine>();
}
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- return AMDGPUPropagateAttributes(TM, true).process(M, &CG);
+
+ return AMDGPUPropagateAttributes(TM, true).process(M);
}
FunctionPass
@@ -497,9 +423,8 @@ AMDGPUPropagateAttributesEarlyPass::run(Function &F,
}
PreservedAnalyses
-AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &MAM) {
- AMDGPUPropagateAttributes APA(&TM, true);
- CallGraph &CG = MAM.getResult<CallGraphAnalysis>(M);
- const bool Changed = APA.process(M, &CG);
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
+ return AMDGPUPropagateAttributes(&TM, true).process(M)
+ ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
}
diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
index 25051b885f9d3..c18898164e25c 100644
--- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
@@ -65,7 +65,6 @@
; GCN-O1-NEXT: AMDGPU Printf lowering
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Dominator Tree Construction
-; GCN-O1-NEXT: CallGraph Construction
; GCN-O1-NEXT: Late propagate attributes from kernels to functions
; GCN-O1-NEXT: Interprocedural Sparse Conditional Constant Propagation
; GCN-O1-NEXT: FunctionPass Manager
@@ -375,7 +374,6 @@
; GCN-O2-NEXT: AMDGPU Printf lowering
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Dominator Tree Construction
-; GCN-O2-NEXT: CallGraph Construction
; GCN-O2-NEXT: Late propagate attributes from kernels to functions
; GCN-O2-NEXT: Interprocedural Sparse Conditional Constant Propagation
; GCN-O2-NEXT: FunctionPass Manager
@@ -730,7 +728,6 @@
; GCN-O3-NEXT: AMDGPU Printf lowering
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Dominator Tree Construction
-; GCN-O3-NEXT: CallGraph Construction
; GCN-O3-NEXT: Late propagate attributes from kernels to functions
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Dominator Tree Construction
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll
deleted file mode 100644
index 394a390980aee..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s
-
-; CHECK-LABEL: define float @common() {
-define float @common() {
- ret float 0.0
-}
-
-; CHECK-LABEL: define float @foo() {
-define float @foo() {
- %direct_call = call contract float @common()
- ret float %direct_call
-}
-
-; CHECK-LABEL: define float @bar() {
-define float @bar() {
- ret float 0.0
-}
-
-; CHECK-LABEL: define float @baz() {
-define float @baz() {
- ret float 0.0
-}
-
-define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
- %fn = alloca float ()*
- switch i32 %type, label %sw.default [
- i32 1, label %sw.bb
- i32 2, label %sw.bb2
- i32 3, label %sw.bb3
- ]
-
-sw.bb:
- store float ()* @foo, float ()** %fn
- br label %sw.epilog
-
-sw.bb2:
- store float ()* @bar, float ()** %fn
- br label %sw.epilog
-
-sw.bb3:
- store float ()* @baz, float ()** %fn
- br label %sw.epilog
-
-sw.default:
- br label %sw.epilog
-
-sw.epilog:
- %fp = load float ()*, float ()** %fn
- %direct_call = call contract float @common()
- %indirect_call = call contract float %fp()
- store float %indirect_call, float* %result
- ret void
-}
-
-attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll
deleted file mode 100644
index f3e88ea129003..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s
-
-; Test to check if we skip propgating attributes even if
-; a function is called directly as well as
-; indirectly. "baz" is called directly as well indirectly.
-
-; CHECK-LABEL: define float @foo()
-define float @foo() #1 {
- ret float 0.0
-}
-
-; CHECK-LABEL: define float @bar()
-define float @bar() #1 {
- ret float 0.0
-}
-
-; CHECK-LABEL: define float @baz()
-define float @baz() #1 {
- ret float 0.0
-}
-
-define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
- %fn = alloca float ()*
- switch i32 %type, label %sw.default [
- i32 1, label %sw.bb
- i32 2, label %sw.bb2
- i32 3, label %sw.bb3
- ]
-
-sw.bb:
- store float ()* @foo, float ()** %fn
- br label %sw.epilog
-
-sw.bb2:
- store float ()* @bar, float ()** %fn
- br label %sw.epilog
-
-sw.bb3:
- store float ()* @baz, float ()** %fn
- br label %sw.epilog
-
-sw.default:
- br label %sw.epilog
-
-sw.epilog:
- %fp = load float ()*, float ()** %fn
- %direct_call = call contract float @baz()
- %indirect_call = call contract float %fp()
- store float %indirect_call, float* %result
- ret void
-}
-
-attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll
index 44d52fb934f67..370c5cc17e91f 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll
@@ -30,14 +30,11 @@ define private void @f(void ()* nocapture %0) #0 {
; In order to expose this bug, it is necessary that `g` have one of the
; propagated attributes, so that a clone and substitution would take place if g
; were actually the function being called.
-; CHECK-DAG: define private void @g.1() #0
-; CHECK-DAG: define internal void @g() #1
+; CHECK-DAG: define private void @g.1() #1
+; CHECK-DAG: define internal void @g() #2
define private void @g() #1 {
ret void
}
attributes #0 = { noinline }
attributes #1 = { noinline "amdgpu-waves-per-eu"="1,10" }
-
-; CHECK: attributes #0 = { noinline }
-; CHECK-NEXT: attributes #1 = { noinline "target-features"="+enable-ds128,+enable-prt-strict-null,+flat-address-space,+flat-for-global,+load-store-opt,+promote-alloca,+trap-handler,+unaligned-access-mode,-wavefrontsize16,-wavefrontsize32,+wavefrontsize64" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll
deleted file mode 100644
index 2401f4c81a753..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s
-
-; Test to check if we skip attributes on address
-; taken functions and its callees.
-
-; CHECK-LABEL: define float @bar() {
-define float @bar() {
- ret float 0.0
-}
-
-; CHECK-LABEL: define float @baz() {
-define float @baz() {
- ret float 0.0
-}
-
-; CHECK-LABEL: define float @foo() {
-define float @foo() {
- %v1 = call contract float @bar()
- %v2 = call contract float @baz()
- %v3 = fadd float %v1, %v2
- ret float %v3
-}
-
-; CHECK-LABEL: define amdgpu_kernel void @kernel(float* %result, i32 %type) #0 {
-define amdgpu_kernel void @kernel(float *%result, i32 %type) #1 {
- %fn = alloca float ()*
- store float ()* @foo, float ()** %fn
- %fp = load float ()*, float ()** %fn
- %indirect_call = call contract float %fp()
- store float %indirect_call, float* %result
- ret void
-}
-
-attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-launch-bounds.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-launch-bounds.ll
deleted file mode 100644
index 362c40293c39d..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-launch-bounds.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s
-
-; Test attributes on a function which
-; is called indirectly from two kernels
-; having
diff erent launch bounds.
-
-; This function should not have any attributes on it.
-; CHECK-LABEL: define float @foo() {
-define float @foo() {
- ret float 0.0
-}
-
-define amdgpu_kernel void @kernel1(float *%result, i32 %type) #1 {
- %fn = alloca float ()*
- store float ()* @foo, float ()** %fn
- %fp = load float ()*, float ()** %fn
- %indirect_call = call contract float %fp()
- store float %indirect_call, float* %result
- ret void
-}
-
-define amdgpu_kernel void @kernel2(float *%result, i32 %type) #2 {
- %fn = alloca float ()*
- store float ()* @foo, float ()** %fn
- %fp = load float ()*, float ()** %fn
- %indirect_call = call contract float %fp()
- store float %indirect_call, float* %result
- ret void
-}
-
-attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
-attributes #2 = { "amdgpu-flat-work-group-size"="1,512" }
More information about the llvm-commits
mailing list