[llvm] 9dc7da3 - [GlobalsModRef][FIX] Ensure we honor synchronizing effects of intrinsics

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 12 14:44:05 PDT 2022


Author: Johannes Doerfert
Date: 2022-04-12T16:42:50-05:00
New Revision: 9dc7da3f9cb48ed7ba6c4806f4519d997f6671b5

URL: https://github.com/llvm/llvm-project/commit/9dc7da3f9cb48ed7ba6c4806f4519d997f6671b5
DIFF: https://github.com/llvm/llvm-project/commit/9dc7da3f9cb48ed7ba6c4806f4519d997f6671b5.diff

LOG: [GlobalsModRef][FIX] Ensure we honor synchronizing effects of intrinsics

This is a long standing problem that resurfaces once in a while [0].
There might actually be two problems because I'm not 100% sure if the
issue underlying https://reviews.llvm.org/D115302 would be solved by
this or not. Anyway.

In 2008 we thought intrinsics do not read/write globals passed to them:
https://github.com/llvm/llvm-project/commit/d4133ac31535ce5176f97e9fc81825af8a808760
This is not correct given that intrinsics can synchronize threads and
cause effects to effectively become visible.

NOTE: I did not yet modify any tests but only tried out the reproducer
      of https://github.com/llvm/llvm-project/issues/54851.

Fixes: https://github.com/llvm/llvm-project/issues/54851

[0] https://discourse.llvm.org/t/bug-gvn-memdep-bug-in-the-presence-of-intrinsics/59402

Differential Revision: https://reviews.llvm.org/D123531

Added: 
    llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll
    llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll

Modified: 
    llvm/lib/Analysis/GlobalsModRef.cpp
    llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll
    llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll
    llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp
index 80989c2c66715..c179fd383de2a 100644
--- a/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -511,6 +511,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
     Handles.front().I = Handles.begin();
     bool KnowNothing = false;
 
+    // Intrinsics, like any other synchronizing function, can make effects
+    // of other threads visible. Without nosync we know nothing really.
+    // Similarly, if `nocallback` is missing the function, or intrinsic,
+    // can call into the module arbitrarily. If both are set the function
+    // has an effect but will not interact with accesses of internal
+    // globals inside the module. We are conservative here for optnone
+    // functions, might not be necessary.
+    auto MaySyncOrCallIntoModule = [](const Function &F) {
+      return !F.isDeclaration() || !F.hasNoSync() ||
+             !F.hasFnAttribute(Attribute::NoCallback);
+    };
+
     // Collect the mod/ref properties due to called functions.  We only compute
     // one mod-ref set.
     for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
@@ -525,7 +537,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
           // Can't do better than that!
         } else if (F->onlyReadsMemory()) {
           FI.addModRefInfo(ModRefInfo::Ref);
-          if (!F->isIntrinsic() && !F->onlyAccessesArgMemory())
+          if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F))
             // This function might call back into the module and read a global -
             // consider every global as possibly being read by this function.
             FI.setMayReadAnyGlobal();
@@ -533,7 +545,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
           FI.addModRefInfo(ModRefInfo::ModRef);
           if (!F->onlyAccessesArgMemory())
             FI.setMayReadAnyGlobal();
-          if (!F->isIntrinsic()) {
+          if (MaySyncOrCallIntoModule(*F)) {
             KnowNothing = true;
             break;
           }

diff  --git a/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll b/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll
new file mode 100644
index 0000000000000..7cbaaa13dcb97
--- /dev/null
+++ b/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll
@@ -0,0 +1,38 @@
+; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
+;
+; Functions w/o `nosync` attribute may communicate via memory and must be
+; treated conservatively. Taken from https://reviews.llvm.org/D115302.
+
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+ at s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4
+
+; CHECK-LABEL: @bar_sync
+; CHECK: store
+; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0)
+; CHECK: load
+define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr {
+  store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
+  tail call void @llvm.nvvm.bar.sync(i32 0)
+  %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
+  ret i32 %2
+}
+
+declare void @llvm.nvvm.bar.sync(i32) #0
+
+; CHECK-LABEL: @barrier0
+; CHECK: store
+; CHECK: tail call void @llvm.nvvm.barrier0()
+; CHECK: load
+define dso_local i32 @barrier0(i32 %0) local_unnamed_addr  {
+  store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
+  tail call void @llvm.nvvm.barrier0()
+  %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
+  ret i32 %2
+}
+
+declare void @llvm.nvvm.barrier0() #0
+
+attributes #0 = { convergent nounwind }

diff  --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll
index 61bf473770c3a..3e2030f5a3a6f 100644
--- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll
+++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken1.ll
@@ -1,4 +1,4 @@
-; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
+; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-LABEL: @main()
 define dso_local i32 @main() {
 entry:
-  %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
+  %tmp0 = call i8* @llvm.stacksave() #1
   %tmp6 = load i8, i8* @deallocCalled, align 1
   %tobool = icmp ne i8 %tmp6, 0
   br i1 %tobool, label %if.else, label %if.end
@@ -18,10 +18,10 @@ if.else:                                          ; preds = %entry
   unreachable
 
 ; CHECK-LABEL: if.end:
-; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
+; CHECK-NEXT: call void @llvm.stackrestore
 ; CHECK-NOT: load i8, i8* @deallocCalled
 if.end:                                           ; preds = %entry
-  call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
+  call void @llvm.stackrestore(i8* %tmp0)
   %tmp7 = load i8, i8* @deallocCalled, align 1
   %tobool3 = icmp ne i8 %tmp7, 0
   br i1 %tobool3, label %if.end6, label %if.else5
@@ -35,10 +35,10 @@ if.end6:                                          ; preds = %if.end
   ret i32 0
 }
 
-declare i8* @llvm.objc.autoreleasePoolPush() #1
-declare void @llvm.objc.autoreleasePoolPop(i8*) #1
+declare i8* @llvm.stacksave() #1
+declare void @llvm.stackrestore(i8*) #1
 declare dso_local void @__assert_fail() #0
 
-attributes #0 = { noreturn nounwind }
-attributes #1 = { nounwind }
+attributes #0 = { noreturn nosync nounwind }
+attributes #1 = { nosync nounwind }
 

diff  --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll
index f9b8d3b954104..60239a990f2da 100644
--- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll
+++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addressnottaken2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
+; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -14,7 +14,7 @@ entry:
 ; CHECK-LABEL: @main()
 define dso_local i32 @main() {
 entry:
-  %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
+  %tmp0 = call i8* @llvm.stacksave() #1
   %tmp6 = load i8, i8* @deallocCalled, align 1
   %tobool = icmp ne i8 %tmp6, 0
   br i1 %tobool, label %if.else, label %if.end
@@ -24,10 +24,10 @@ if.else:                                          ; preds = %entry
   unreachable
 
 ; CHECK-LABEL: if.end:
-; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
+; CHECK-NEXT: call void @llvm.stackrestore
 ; CHECK-NOT: load i8, i8* @deallocCalled
 if.end:                                           ; preds = %entry
-  call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
+  call void @llvm.stackrestore(i8* %tmp0)
   %tmp7 = load i8, i8* @deallocCalled, align 1
   %tobool3 = icmp ne i8 %tmp7, 0
   br i1 %tobool3, label %if.end6, label %if.else5
@@ -41,10 +41,10 @@ if.end6:                                          ; preds = %if.end
   ret i32 0
 }
 
-declare i8* @llvm.objc.autoreleasePoolPush() #1
-declare void @llvm.objc.autoreleasePoolPop(i8*) #1
+declare i8* @llvm.stacksave() #1
+declare void @llvm.stackrestore(i8*) #1
 declare dso_local void @__assert_fail() #0
 
-attributes #0 = { noreturn nounwind }
-attributes #1 = { nounwind }
+attributes #0 = { noreturn nosync nounwind }
+attributes #1 = { nosync nounwind }
 

diff  --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll
index 6135332546cd1..f3fb398944214 100644
--- a/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll
+++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_addresstaken.ll
@@ -18,7 +18,7 @@ entry:
 ; CHECK-LABEL: @main()
 define dso_local i32 @main() {
 entry:
-  %tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
+  %tmp0 = call i8* @llvm.stacksave() #1
   %tmp6 = load i8, i8* @deallocCalled, align 1
   %tobool = icmp ne i8 %tmp6, 0
   br i1 %tobool, label %if.else, label %if.end
@@ -28,9 +28,9 @@ if.else:                                          ; preds = %entry
   unreachable
 
 ; CHECK-LABEL: if.end:
-; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
+; CHECK-NEXT: call void @llvm.stackrestore
 if.end:                                           ; preds = %entry
-  call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
+  call void @llvm.stackrestore(i8* %tmp0)
   %tmp7 = load i8, i8* @deallocCalled, align 1
   %tobool3 = icmp ne i8 %tmp7, 0
   br i1 %tobool3, label %if.end6, label %if.else5
@@ -44,8 +44,8 @@ if.end6:                                          ; preds = %if.end
   ret i32 0
 }
 
-declare i8* @llvm.objc.autoreleasePoolPush() #1
-declare void @llvm.objc.autoreleasePoolPop(i8*) #1
+declare i8* @llvm.stacksave() #1
+declare void @llvm.stackrestore(i8*) #1
 declare dso_local void @__assert_fail() #0
 
 attributes #0 = { noreturn nounwind }

diff  --git a/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll b/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll
new file mode 100644
index 0000000000000..e0a23d7405734
--- /dev/null
+++ b/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
+; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
+
+; Make sure we do not hoist the load before the intrinsic, unknown function, or
+; optnone function except if we know the unknown function is nosync and nocallback.
+
+ at G1 = internal global i32 undef
+ at G2 = internal global i32 undef
+ at G3 = internal global i32 undef
+ at G4 = internal global i32 undef
+
+define void @test_barrier(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@test_barrier
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT:    br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
+; CHECK:       init:
+; CHECK-NEXT:    store i32 0, ptr @G1, align 4
+; CHECK-NEXT:    br label [[CHECK]]
+; CHECK:       check:
+; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr @G1, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret void
+;
+  br i1 %c, label %init, label %check
+init:
+  store i32 0, ptr @G1
+  br label %check
+check:
+  call void @llvm.amdgcn.s.barrier()
+  %v = load i32, ptr @G1
+  %cmp = icmp eq i32 %v, 0
+  call void @llvm.assume(i1 %cmp)
+  ret void
+}
+
+define void @test_unknown(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@test_unknown
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT:    br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
+; CHECK:       init:
+; CHECK-NEXT:    store i32 0, ptr @G2, align 4
+; CHECK-NEXT:    br label [[CHECK]]
+; CHECK:       check:
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr @G2, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret void
+;
+  br i1 %c, label %init, label %check
+init:
+  store i32 0, ptr @G2
+  br label %check
+check:
+  call void @unknown()
+  %v = load i32, ptr @G2
+  %cmp = icmp eq i32 %v, 0
+  call void @llvm.assume(i1 %cmp)
+  ret void
+}
+
+define void @test_optnone(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@test_optnone
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT:    br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
+; CHECK:       init:
+; CHECK-NEXT:    store i32 0, ptr @G3, align 4
+; CHECK-NEXT:    br label [[CHECK]]
+; CHECK:       check:
+; CHECK-NEXT:    call void @optnone()
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr @G3, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret void
+;
+  br i1 %c, label %init, label %check
+init:
+  store i32 0, ptr @G3
+  br label %check
+check:
+  call void @optnone()
+  %v = load i32, ptr @G3
+  %cmp = icmp eq i32 %v, 0
+  call void @llvm.assume(i1 %cmp)
+  ret void
+}
+
+define void @optnone() optnone nosync nocallback noinline {
+; CHECK: Function Attrs: nocallback noinline nosync optnone
+; CHECK-LABEL: define {{[^@]+}}@optnone
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    ret void
+;
+  ret void
+}
+
+; Here hoisting is legal and we use it to verify it will happen.
+define void @test_unknown_annotated(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@test_unknown_annotated
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT:    br i1 [[C]], label [[INIT:%.*]], label [[DOTCHECK_CRIT_EDGE:%.*]]
+; CHECK:       .check_crit_edge:
+; CHECK-NEXT:    [[V_PRE:%.*]] = load i32, ptr @G4, align 4
+; CHECK-NEXT:    br label [[CHECK:%.*]]
+; CHECK:       init:
+; CHECK-NEXT:    store i32 0, ptr @G4, align 4
+; CHECK-NEXT:    br label [[CHECK]]
+; CHECK:       check:
+; CHECK-NEXT:    [[V:%.*]] = phi i32 [ [[V_PRE]], [[DOTCHECK_CRIT_EDGE]] ], [ 0, [[INIT]] ]
+; CHECK-NEXT:    call void @unknown_nosync_nocallback()
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret void
+;
+  br i1 %c, label %init, label %check
+init:
+  store i32 0, ptr @G4
+  br label %check
+check:
+  call void @unknown_nosync_nocallback()
+  %v = load i32, ptr @G4
+  %cmp = icmp eq i32 %v, 0
+  call void @llvm.assume(i1 %cmp)
+  ret void
+}
+
+declare void @unknown()
+declare void @unknown_nosync_nocallback() nosync nocallback
+declare void @llvm.amdgcn.s.barrier()
+declare void @llvm.assume(i1 noundef)
+


        


More information about the llvm-commits mailing list