[llvm] 9dc7da3 - [GlobalsModRef][FIX] Ensure we honor synchronizing effects of intrinsics
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 12 14:44:05 PDT 2022
Author: Johannes Doerfert
Date: 2022-04-12T16:42:50-05:00
New Revision: 9dc7da3f9cb48ed7ba6c4806f4519d997f6671b5
URL: https://github.com/llvm/llvm-project/commit/9dc7da3f9cb48ed7ba6c4806f4519d997f6671b5
DIFF: https://github.com/llvm/llvm-project/commit/9dc7da3f9cb48ed7ba6c4806f4519d997f6671b5.diff
LOG: [GlobalsModRef][FIX] Ensure we honor synchronizing effects of intrinsics
This is a long standing problem that resurfaces once in a while [0].
There might actually be two problems because I'm not 100% sure if the
issue underlying https://reviews.llvm.org/D115302 would be solved by
this or not. Anyway.
In 2008 we thought intrinsics do not read/write globals passed to them:
https://github.com/llvm/llvm-project/commit/d4133ac31535ce5176f97e9fc81825af8a808760
This is not correct given that intrinsics can synchronize threads and
cause effects to effectively become visible.
NOTE: I did not yet modify any tests but only tried out the reproducer
of https://github.com/llvm/llvm-project/issues/54851.
Fixes: https://github.com/llvm/llvm-project/issues/54851
[0] https://discourse.llvm.org/t/bug-gvn-memdep-bug-in-the-presence-of-intrinsics/59402
Differential Revision: https://reviews.llvm.org/D123531
Added:
llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll
llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll
Modified:
llvm/lib/Analysis/GlobalsModRef.cpp
llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll
llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll
llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp
index 80989c2c66715..c179fd383de2a 100644
--- a/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -511,6 +511,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
Handles.front().I = Handles.begin();
bool KnowNothing = false;
+ // Intrinsics, like any other synchronizing function, can make effects
+ // of other threads visible. Without nosync we know nothing really.
+ // Similarly, if `nocallback` is missing the function, or intrinsic,
+ // can call into the module arbitrarily. If both are set the function
+ // has an effect but will not interact with accesses of internal
+ // globals inside the module. We are conservative here for optnone
+ // functions, might not be necessary.
+ auto MaySyncOrCallIntoModule = [](const Function &F) {
+ return !F.isDeclaration() || !F.hasNoSync() ||
+ !F.hasFnAttribute(Attribute::NoCallback);
+ };
+
// Collect the mod/ref properties due to called functions. We only compute
// one mod-ref set.
for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
@@ -525,7 +537,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// Can't do better than that!
} else if (F->onlyReadsMemory()) {
FI.addModRefInfo(ModRefInfo::Ref);
- if (!F->isIntrinsic() && !F->onlyAccessesArgMemory())
+ if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F))
// This function might call back into the module and read a global -
// consider every global as possibly being read by this function.
FI.setMayReadAnyGlobal();
@@ -533,7 +545,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
FI.addModRefInfo(ModRefInfo::ModRef);
if (!F->onlyAccessesArgMemory())
FI.setMayReadAnyGlobal();
- if (!F->isIntrinsic()) {
+ if (MaySyncOrCallIntoModule(*F)) {
KnowNothing = true;
break;
}
diff --git a/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll b/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll
new file mode 100644
index 0000000000000..7cbaaa13dcb97
--- /dev/null
+++ b/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll
@@ -0,0 +1,38 @@
+; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
+;
+; Functions w/o `nosync` attribute may communicate via memory and must be
+; treated conservatively. Taken from https://reviews.llvm.org/D115302.
+
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+ at s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4
+
+; CHECK-LABEL: @bar_sync
+; CHECK: store
+; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0)
+; CHECK: load
+define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr {
+ store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
+ tail call void @llvm.nvvm.bar.sync(i32 0)
+ %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
+ ret i32 %2
+}
+
+declare void @llvm.nvvm.bar.sync(i32) #0
+
+; CHECK-LABEL: @barrier0
+; CHECK: store
+; CHECK: tail call void @llvm.nvvm.barrier0()
+; CHECK: load
+define dso_local i32 @barrier0(i32 %0) local_unnamed_addr {
+ store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
+ tail call void @llvm.nvvm.barrier0()
+ %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
+ ret i32 %2
+}
+
+declare void @llvm.nvvm.barrier0() #0
+
+attributes #0 = { convergent nounwind }
diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll
index 61bf473770c3a..3e2030f5a3a6f 100644
--- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll
+++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll
@@ -1,4 +1,4 @@
-; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
+; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: @main()
define dso_local i32 @main() {
entry:
- %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
+ %tmp0 = call i8* @llvm.stacksave() #1
%tmp6 = load i8, i8* @deallocCalled, align 1
%tobool = icmp ne i8 %tmp6, 0
br i1 %tobool, label %if.else, label %if.end
@@ -18,10 +18,10 @@ if.else: ; preds = %entry
unreachable
; CHECK-LABEL: if.end:
-; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
+; CHECK-NEXT: call void @llvm.stackrestore
; CHECK-NOT: load i8, i8* @deallocCalled
if.end: ; preds = %entry
- call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
+ call void @llvm.stackrestore(i8* %tmp0)
%tmp7 = load i8, i8* @deallocCalled, align 1
%tobool3 = icmp ne i8 %tmp7, 0
br i1 %tobool3, label %if.end6, label %if.else5
@@ -35,10 +35,10 @@ if.end6: ; preds = %if.end
ret i32 0
}
-declare i8* @llvm.objc.autoreleasePoolPush() #1
-declare void @llvm.objc.autoreleasePoolPop(i8*) #1
+declare i8* @llvm.stacksave() #1
+declare void @llvm.stackrestore(i8*) #1
declare dso_local void @__assert_fail() #0
-attributes #0 = { noreturn nounwind }
-attributes #1 = { nounwind }
+attributes #0 = { noreturn nosync nounwind }
+attributes #1 = { nosync nounwind }
diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll
index f9b8d3b954104..60239a990f2da 100644
--- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll
+++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
+; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -14,7 +14,7 @@ entry:
; CHECK-LABEL: @main()
define dso_local i32 @main() {
entry:
- %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
+ %tmp0 = call i8* @llvm.stacksave() #1
%tmp6 = load i8, i8* @deallocCalled, align 1
%tobool = icmp ne i8 %tmp6, 0
br i1 %tobool, label %if.else, label %if.end
@@ -24,10 +24,10 @@ if.else: ; preds = %entry
unreachable
; CHECK-LABEL: if.end:
-; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
+; CHECK-NEXT: call void @llvm.stackrestore
; CHECK-NOT: load i8, i8* @deallocCalled
if.end: ; preds = %entry
- call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
+ call void @llvm.stackrestore(i8* %tmp0)
%tmp7 = load i8, i8* @deallocCalled, align 1
%tobool3 = icmp ne i8 %tmp7, 0
br i1 %tobool3, label %if.end6, label %if.else5
@@ -41,10 +41,10 @@ if.end6: ; preds = %if.end
ret i32 0
}
-declare i8* @llvm.objc.autoreleasePoolPush() #1
-declare void @llvm.objc.autoreleasePoolPop(i8*) #1
+declare i8* @llvm.stacksave() #1
+declare void @llvm.stackrestore(i8*) #1
declare dso_local void @__assert_fail() #0
-attributes #0 = { noreturn nounwind }
-attributes #1 = { nounwind }
+attributes #0 = { noreturn nosync nounwind }
+attributes #1 = { nosync nounwind }
diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll
index 6135332546cd1..f3fb398944214 100644
--- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll
+++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll
@@ -18,7 +18,7 @@ entry:
; CHECK-LABEL: @main()
define dso_local i32 @main() {
entry:
- %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
+ %tmp0 = call i8* @llvm.stacksave() #1
%tmp6 = load i8, i8* @deallocCalled, align 1
%tobool = icmp ne i8 %tmp6, 0
br i1 %tobool, label %if.else, label %if.end
@@ -28,9 +28,9 @@ if.else: ; preds = %entry
unreachable
; CHECK-LABEL: if.end:
-; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
+; CHECK-NEXT: call void @llvm.stackrestore
if.end: ; preds = %entry
- call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
+ call void @llvm.stackrestore(i8* %tmp0)
%tmp7 = load i8, i8* @deallocCalled, align 1
%tobool3 = icmp ne i8 %tmp7, 0
br i1 %tobool3, label %if.end6, label %if.else5
@@ -44,8 +44,8 @@ if.end6: ; preds = %if.end
ret i32 0
}
-declare i8* @llvm.objc.autoreleasePoolPush() #1
-declare void @llvm.objc.autoreleasePoolPop(i8*) #1
+declare i8* @llvm.stacksave() #1
+declare void @llvm.stackrestore(i8*) #1
declare dso_local void @__assert_fail() #0
attributes #0 = { noreturn nounwind }
diff --git a/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll b/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll
new file mode 100644
index 0000000000000..e0a23d7405734
--- /dev/null
+++ b/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
+; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
+
+; Make sure we do not hoist the load before the intrinsic, unknown function, or
+; optnone function except if we know the unknown function is nosync and nocallback.
+
+ at G1 = internal global i32 undef
+ at G2 = internal global i32 undef
+ at G3 = internal global i32 undef
+ at G4 = internal global i32 undef
+
+define void @test_barrier(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@test_barrier
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
+; CHECK: init:
+; CHECK-NEXT: store i32 0, ptr @G1, align 4
+; CHECK-NEXT: br label [[CHECK]]
+; CHECK: check:
+; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G1, align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: ret void
+;
+ br i1 %c, label %init, label %check
+init:
+ store i32 0, ptr @G1
+ br label %check
+check:
+ call void @llvm.amdgcn.s.barrier()
+ %v = load i32, ptr @G1
+ %cmp = icmp eq i32 %v, 0
+ call void @llvm.assume(i1 %cmp)
+ ret void
+}
+
+define void @test_unknown(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@test_unknown
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
+; CHECK: init:
+; CHECK-NEXT: store i32 0, ptr @G2, align 4
+; CHECK-NEXT: br label [[CHECK]]
+; CHECK: check:
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G2, align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: ret void
+;
+ br i1 %c, label %init, label %check
+init:
+ store i32 0, ptr @G2
+ br label %check
+check:
+ call void @unknown()
+ %v = load i32, ptr @G2
+ %cmp = icmp eq i32 %v, 0
+ call void @llvm.assume(i1 %cmp)
+ ret void
+}
+
+define void @test_optnone(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@test_optnone
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
+; CHECK: init:
+; CHECK-NEXT: store i32 0, ptr @G3, align 4
+; CHECK-NEXT: br label [[CHECK]]
+; CHECK: check:
+; CHECK-NEXT: call void @optnone()
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G3, align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: ret void
+;
+ br i1 %c, label %init, label %check
+init:
+ store i32 0, ptr @G3
+ br label %check
+check:
+ call void @optnone()
+ %v = load i32, ptr @G3
+ %cmp = icmp eq i32 %v, 0
+ call void @llvm.assume(i1 %cmp)
+ ret void
+}
+
+define void @optnone() optnone nosync nocallback noinline {
+; CHECK: Function Attrs: nocallback noinline nosync optnone
+; CHECK-LABEL: define {{[^@]+}}@optnone
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
+ ret void
+}
+
+; Here hoisting is legal and we use it to verify it will happen.
+define void @test_unknown_annotated(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@test_unknown_annotated
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[DOTCHECK_CRIT_EDGE:%.*]]
+; CHECK: .check_crit_edge:
+; CHECK-NEXT: [[V_PRE:%.*]] = load i32, ptr @G4, align 4
+; CHECK-NEXT: br label [[CHECK:%.*]]
+; CHECK: init:
+; CHECK-NEXT: store i32 0, ptr @G4, align 4
+; CHECK-NEXT: br label [[CHECK]]
+; CHECK: check:
+; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[DOTCHECK_CRIT_EDGE]] ], [ 0, [[INIT]] ]
+; CHECK-NEXT: call void @unknown_nosync_nocallback()
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT: ret void
+;
+ br i1 %c, label %init, label %check
+init:
+ store i32 0, ptr @G4
+ br label %check
+check:
+ call void @unknown_nosync_nocallback()
+ %v = load i32, ptr @G4
+ %cmp = icmp eq i32 %v, 0
+ call void @llvm.assume(i1 %cmp)
+ ret void
+}
+
+declare void @unknown()
+declare void @unknown_nosync_nocallback() nosync nocallback
+declare void @llvm.amdgcn.s.barrier()
+declare void @llvm.assume(i1 noundef)
+
More information about the llvm-commits
mailing list