[llvm] c0f3a3d - [Attributor][FIX] Avoid H2S on GPUs if the pointer can be shared
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 20 17:45:01 PDT 2023
Author: Johannes Doerfert
Date: 2023-03-20T17:44:24-07:00
New Revision: c0f3a3d7b50c6bec85b8c5eb8d821087499eec41
URL: https://github.com/llvm/llvm-project/commit/c0f3a3d7b50c6bec85b8c5eb8d821087499eec41
DIFF: https://github.com/llvm/llvm-project/commit/c0f3a3d7b50c6bec85b8c5eb8d821087499eec41.diff
LOG: [Attributor][FIX] Avoid H2S on GPUs if the pointer can be shared
If the stack is not accessible by other threads, e.g., on a GPU, we need
to ensure heap-2-stack will not create a stack version of a pointer that
might be passed to another thread. Since passing through memory is by
default transparent, we need to register a callback and inspect stores
we might look through explicitly.
Added:
Modified:
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/test/Transforms/Attributor/heap_to_stack.ll
llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 624e04611f4f..6118404172aa 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -7082,7 +7082,14 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
ValidUsesOnly = false;
return true;
};
- if (!A.checkForAllUses(Pred, *this, *AI.CB))
+ if (!A.checkForAllUses(Pred, *this, *AI.CB, /* CheckBBLivenessOnly */ false,
+ DepClassTy::OPTIONAL, /* IgnoreDroppableUses */ true,
+ [&](const Use &OldU, const Use &NewU) {
+ auto *SI = dyn_cast<StoreInst>(OldU.getUser());
+ return !SI || StackIsAccessibleByOtherThreads ||
+ AA::isAssumedThreadLocalObject(
+ A, *SI->getPointerOperand(), *this);
+ }))
return false;
return ValidUsesOnly;
};
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index 2a265d5074d5..ffe5bd878daa 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -21,6 +21,7 @@ declare void @foo(i32* %p)
declare void @foo_nounw(i32* %p) nounwind nofree
declare void @usei8(i8)
+declare void @usei8p(i8* nocapture)
declare i32 @no_return_call() noreturn
@@ -28,6 +29,9 @@ declare void @free(i8* nocapture) allockind("free")
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global i8* undef, align 4
+;.
define void @h2s_value_simplify_interaction(i1 %c, i8* %A) {
; CHECK-LABEL: define {{[^@]+}}@h2s_value_simplify_interaction
; CHECK-SAME: (i1 [[C:%.*]], i8* nocapture nofree readnone [[A:%.*]]) {
@@ -42,13 +46,13 @@ define void @h2s_value_simplify_interaction(i1 %c, i8* %A) {
; CHECK: f2:
; CHECK-NEXT: [[L:%.*]] = load i8, i8* [[M]], align 16
; CHECK-NEXT: call void @usei8(i8 [[L]])
-; CHECK-NEXT: call void @no_sync_func(i8* nocapture nofree noundef align 16 [[M]]) #[[ATTR10:[0-9]+]]
+; CHECK-NEXT: call void @no_sync_func(i8* nocapture nofree noundef align 16 [[M]]) #[[ATTR11:[0-9]+]]
; CHECK-NEXT: br label [[J]]
; CHECK: dead:
; CHECK-NEXT: unreachable
; CHECK: j:
; CHECK-NEXT: [[PHI:%.*]] = phi i8* [ [[M]], [[F]] ], [ null, [[F2]] ]
-; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef align 16 [[PHI]]) #[[ATTR10]]
+; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef align 16 [[PHI]]) #[[ATTR11]]
; CHECK-NEXT: ret void
;
entry:
@@ -328,7 +332,7 @@ define void @test9() {
; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4
-; CHECK-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR10]]
+; CHECK-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR11]]
; CHECK-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
; CHECK-NEXT: ret void
;
@@ -387,7 +391,7 @@ define i32 @test_lifetime() {
define void @test11() {
; CHECK-LABEL: define {{[^@]+}}@test11() {
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
-; CHECK-NEXT: tail call void @sync_will_return(i8* [[DOTH2S]]) #[[ATTR10]]
+; CHECK-NEXT: tail call void @sync_will_return(i8* [[DOTH2S]]) #[[ATTR11]]
; CHECK-NEXT: ret void
;
%1 = tail call noalias i8* @malloc(i64 4)
@@ -628,7 +632,7 @@ define void @test16c(i8 %v, i8** %P) {
; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
; CHECK-NEXT: store i8* [[DOTH2S]], i8** [[P]], align 8
-; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[DOTH2S]]) #[[ATTR10]]
+; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[DOTH2S]]) #[[ATTR11]]
; CHECK-NEXT: ret void
;
%1 = tail call noalias i8* @malloc(i64 4)
@@ -649,6 +653,25 @@ define void @test16d(i8 %v, i8** %P) {
store i8* %1, i8** %P
ret void
}
+
+ at G = internal global i8* undef, align 4
+define void @test16e(i8 %v) norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define {{[^@]+}}@test16e
+; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR9:[0-9]+]] {
+; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
+; CHECK-NEXT: store i8* [[DOTH2S]], i8** @G, align 8
+; CHECK-NEXT: call void @usei8p(i8* nocapture nofree [[DOTH2S]]) #[[ATTR12:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+ %1 = tail call noalias i8* @malloc(i64 4)
+ store i8* %1, i8** @G
+ %2 = load i8*, i8** @G
+ call void @usei8p(i8* nofree nocapture %2) nocallback nosync willreturn nounwind
+ call void @free(i8* %1)
+ ret void
+}
+
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn }
@@ -659,8 +682,10 @@ define void @test16d(i8 %v, i8** %P) {
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
; CHECK: attributes #[[ATTR7:[0-9]+]] = { allockind("alloc,uninitialized,aligned") allocsize(1) }
; CHECK: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) }
-; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
-; CHECK: attributes #[[ATTR10]] = { nounwind }
+; CHECK: attributes #[[ATTR9]] = { norecurse }
+; CHECK: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; CHECK: attributes #[[ATTR11]] = { nounwind }
+; CHECK: attributes #[[ATTR12]] = { nocallback nosync nounwind willreturn }
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CGSCC: {{.*}}
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index db6f2773fa8e..ef7a80c1e00a 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -34,6 +34,10 @@ declare void @free(i8* nocapture)
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global i8* undef, align 4
+; CHECK: @[[GTL:[a-zA-Z0-9_$"\\.-]+]] = internal thread_local global i8* undef, align 4
+;.
define void @nofree_arg_only(i8* %p1, i8* %p2) {
; CHECK-LABEL: define {{[^@]+}}@nofree_arg_only
; CHECK-SAME: (i8* nocapture nofree [[P1:%.*]], i8* nocapture [[P2:%.*]]) {
@@ -277,7 +281,7 @@ define void @test9() {
; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4
-; CHECK-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
; CHECK-NEXT: ret void
;
@@ -338,7 +342,7 @@ define i32 @test_lifetime() {
define void @test11() {
; CHECK-LABEL: define {{[^@]+}}@test11() {
; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
-; CHECK-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) #[[ATTR5]]
+; CHECK-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) #[[ATTR6]]
; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]])
; CHECK-NEXT: ret void
;
@@ -584,7 +588,7 @@ define void @test16c(i8 %v, i8** %P) {
; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]], align 8
-; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR5]]
+; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR6]]
; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]])
; CHECK-NEXT: ret void
;
@@ -614,7 +618,7 @@ define void @test17() {
; CHECK-LABEL: define {{[^@]+}}@test17() {
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1, addrspace(5)
; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[DOTH2S]] to i8*
-; CHECK-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR7:[0-9]+]]
; CHECK-NEXT: ret void
;
%1 = tail call noalias i8* @__kmpc_alloc_shared(i64 4)
@@ -626,7 +630,7 @@ define void @test17() {
define void @test17b() {
; CHECK-LABEL: define {{[^@]+}}@test17b() {
; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @__kmpc_alloc_shared(i64 noundef 4)
-; CHECK-NEXT: tail call void @usei8(i8* nofree [[TMP1]]) #[[ATTR6]]
+; CHECK-NEXT: tail call void @usei8(i8* nofree [[TMP1]]) #[[ATTR7]]
; CHECK-NEXT: tail call void @__kmpc_free_shared(i8* nocapture [[TMP1]], i64 noundef 4)
; CHECK-NEXT: ret void
;
@@ -643,7 +647,7 @@ define void @move_alloca() {
; CHECK-NEXT: br label [[NOT_ENTRY:%.*]]
; CHECK: not_entry:
; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[DOTH2S]] to i8*
-; CHECK-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR6]]
+; CHECK-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR7]]
; CHECK-NEXT: ret void
;
entry:
@@ -656,6 +660,44 @@ not_entry:
ret void
}
+ at G = internal global i8* undef, align 4
+define void @test16e(i8 %v) norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define {{[^@]+}}@test16e
+; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @__kmpc_alloc_shared(i64 noundef 4)
+; CHECK-NEXT: store i8* [[TMP1]], i8** @G, align 8
+; CHECK-NEXT: call void @usei8(i8* nocapture nofree [[TMP1]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: tail call void @__kmpc_free_shared(i8* noalias nocapture [[TMP1]], i64 noundef 4)
+; CHECK-NEXT: ret void
+;
+ %1 = tail call noalias i8* @__kmpc_alloc_shared(i64 4)
+ store i8* %1, i8** @G
+ %2 = load i8*, i8** @G
+ call void @usei8(i8* nofree nocapture %2) nocallback nosync willreturn nounwind
+ tail call void @__kmpc_free_shared(i8* %1, i64 4)
+ ret void
+}
+
+ at Gtl = internal thread_local global i8* undef, align 4
+define void @test16f(i8 %v) norecurse {
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define {{[^@]+}}@test16f
+; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR5]] {
+; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1, addrspace(5)
+; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[DOTH2S]] to i8*
+; CHECK-NEXT: store i8* [[MALLOC_CAST]], i8** @Gtl, align 8
+; CHECK-NEXT: call void @usei8(i8* nocapture nofree [[MALLOC_CAST]]) #[[ATTR8]]
+; CHECK-NEXT: ret void
+;
+ %1 = tail call noalias i8* @__kmpc_alloc_shared(i64 4)
+ store i8* %1, i8** @Gtl
+ %2 = load i8*, i8** @Gtl
+ call void @usei8(i8* nofree nocapture %2) nocallback nosync willreturn nounwind
+ tail call void @__kmpc_free_shared(i8* %1, i64 4)
+ ret void
+}
+
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind willreturn }
@@ -663,8 +705,10 @@ not_entry:
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind }
; CHECK: attributes #[[ATTR3]] = { noreturn }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; CHECK: attributes #[[ATTR5]] = { nounwind }
-; CHECK: attributes #[[ATTR6]] = { nosync nounwind willreturn }
+; CHECK: attributes #[[ATTR5]] = { norecurse }
+; CHECK: attributes #[[ATTR6]] = { nounwind }
+; CHECK: attributes #[[ATTR7]] = { nosync nounwind willreturn }
+; CHECK: attributes #[[ATTR8]] = { nocallback nosync nounwind willreturn }
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CGSCC: {{.*}}
More information about the llvm-commits
mailing list