[llvm-branch-commits] [llvm] dd8ae42 - [IndirectFunctions] Skip propagating attributes to address taken functions
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 20 23:10:18 PST 2021
Author: madhur13490
Date: 2021-01-21T07:04:28Z
New Revision: dd8ae42674b494e46ec40a22f40068db2b4a8b60
URL: https://github.com/llvm/llvm-project/commit/dd8ae42674b494e46ec40a22f40068db2b4a8b60
DIFF: https://github.com/llvm/llvm-project/commit/dd8ae42674b494e46ec40a22f40068db2b4a8b60.diff
LOG: [IndirectFunctions] Skip propagating attributes to address taken functions
In case of indirect calls or address taken functions,
skip propagating any attributes to them. We just
propagate features to such functions.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D94585
Added:
llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll
llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll
llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll
llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index cd71c7a16c73..b018bc2ece10 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -240,6 +240,14 @@ bool AMDGPUPropagateAttributes::process() {
if (F.isDeclaration())
continue;
+ // Skip propagating attributes and features to
+ // address taken functions.
+ if (F.hasAddressTaken()) {
+ if (!Roots.count(&F))
+ NewRoots.insert(&F);
+ continue;
+ }
+
const FnProperties CalleeProps(*TM, F);
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
SmallSet<CallBase *, 32> Visited;
@@ -259,7 +267,11 @@ bool AMDGPUPropagateAttributes::process() {
const FnProperties CallerProps(*TM, *Caller);
- if (CalleeProps == CallerProps) {
+ // Convergence is allowed if the caller has its
+ // address taken because all callee's (attributes + features)
+ // may not agree as the callee may be the target of
+ // more than one function (called directly or indirectly).
+ if (Caller->hasAddressTaken() || CalleeProps == CallerProps) {
if (!Roots.count(&F))
NewRoots.insert(&F);
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll
new file mode 100644
index 000000000000..2a219fb00ce4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early %s | FileCheck %s
+
+; Complicated call graph where a function is called
+; directly from a kernel abd also from a function
+; whose address is taken.
+
+; CHECK-LABEL: define float @common_callee.gc() #0 {
+define float @common_callee.gc() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @foo() {
+define float @foo() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @bar() {
+define float @bar() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz() {
+define float @baz() {
+ ret float 0.0
+}
+
+define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
+ %fn = alloca float ()*
+ switch i32 %type, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb:
+ store float ()* @foo, float ()** %fn
+ br label %sw.epilog
+
+sw.bb2:
+ store float ()* @bar, float ()** %fn
+ br label %sw.epilog
+
+sw.bb3:
+ store float ()* @baz, float ()** %fn
+ br label %sw.epilog
+
+sw.default:
+ br label %sw.epilog
+
+sw.epilog:
+ %fp = load float ()*, float ()** %fn
+ %direct_call = call contract float @common_callee.gc()
+ %indirect_call = call contract float %fp()
+ store float %indirect_call, float* %result
+ ret void
+}
+
+attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "target-features"="-wavefrontsize16,-wavefrontsize32,+wavefrontsize64" }
+attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll
new file mode 100644
index 000000000000..67716db771c2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early %s | FileCheck %s
+
+; Test to check if we skip propgating attributes even if
+; a function is called directly as well as
+; indirectly. "baz" is called directly as well indirectly.
+
+; CHECK-LABEL: define float @foo() {
+define float @foo() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @bar() {
+define float @bar() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz() {
+define float @baz() {
+ ret float 0.0
+}
+
+define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
+ %fn = alloca float ()*
+ switch i32 %type, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb:
+ store float ()* @foo, float ()** %fn
+ br label %sw.epilog
+
+sw.bb2:
+ store float ()* @bar, float ()** %fn
+ br label %sw.epilog
+
+sw.bb3:
+ store float ()* @baz, float ()** %fn
+ br label %sw.epilog
+
+sw.default:
+ br label %sw.epilog
+
+sw.epilog:
+ %fp = load float ()*, float ()** %fn
+ %direct_call = call contract float @baz()
+ %indirect_call = call contract float %fp()
+ store float %indirect_call, float* %result
+ ret void
+}
+
+attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll
new file mode 100644
index 000000000000..20f2f97b9dd4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early %s | FileCheck %s
+
+; Test to check if we skip attributes on address
+; taken functions but pass to direct callees.
+
+; CHECK-LABEL: define float @foo() {
+define float @foo() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @bar() {
+define float @bar() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz() {
+define float @baz() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz2() #0 {
+define float @baz2() {
+ ret float 0.0
+}
+
+define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
+ %fn = alloca float ()*
+ switch i32 %type, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb:
+ store float ()* @foo, float ()** %fn
+ br label %sw.epilog
+
+sw.bb2:
+ store float ()* @bar, float ()** %fn
+ br label %sw.epilog
+
+sw.bb3:
+ store float ()* @baz, float ()** %fn
+ br label %sw.epilog
+
+sw.default:
+ br label %sw.epilog
+
+sw.epilog:
+ %fp = load float ()*, float ()** %fn
+ %direct_call = call contract float @baz2()
+ %indirect_call = call contract float %fp()
+ store float %indirect_call, float* %result
+ ret void
+}
+
+attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "target-features"="-wavefrontsize16,-wavefrontsize32,+wavefrontsize64" }
+attributes #1 = { "amdgpu-flat-work-group-size"="1,256"}
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll
new file mode 100644
index 000000000000..647e6a4d9441
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-early %s | FileCheck %s
+
+; Test to check if we skip attributes on address
+; taken functions in a simple call graph.
+
+; CHECK-LABEL: define float @foo() {
+define float @foo() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @bar() {
+define float @bar() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: define float @baz() {
+define float @baz() {
+ ret float 0.0
+}
+
+define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
+ %fn = alloca float ()*
+ switch i32 %type, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb:
+ store float ()* @foo, float ()** %fn
+ br label %sw.epilog
+
+sw.bb2:
+ store float ()* @bar, float ()** %fn
+ br label %sw.epilog
+
+sw.bb3:
+ store float ()* @baz, float ()** %fn
+ br label %sw.epilog
+
+sw.default:
+ br label %sw.epilog
+
+sw.epilog:
+ %fp = load float ()*, float ()** %fn
+ %indirect_call = call contract float %fp()
+ store float %indirect_call, float* %result
+ ret void
+}
+
+attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
+
More information about the llvm-branch-commits
mailing list