[llvm] 4e0f464 - Reapply "[OpenMP][FIX] Restrict more unsound assmptions about threading"

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 19 18:28:04 PST 2022


Author: Johannes Doerfert
Date: 2022-12-19T18:27:52-08:00
New Revision: 4e0f464ce2d995d9ee8466729dd5f8c2239f3bab

URL: https://github.com/llvm/llvm-project/commit/4e0f464ce2d995d9ee8466729dd5f8c2239f3bab
DIFF: https://github.com/llvm/llvm-project/commit/4e0f464ce2d995d9ee8466729dd5f8c2239f3bab.diff

LOG: Reapply "[OpenMP][FIX] Restrict more unsound assmptions about threading"

This reverts commit 3b052558125cbedf18c2ddb65780b50d6f437d54.

This patch got reverted due to an unrelated memory leak that has been
fixed.

Added: 
    llvm/test/Transforms/Attributor/value-simplify-reachability.ll
    llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Modified: 
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/Transforms/Attributor/value-simplify-assume.ll
    llvm/test/Transforms/Attributor/value-simplify-gpu.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 937f2573cd820..fb0e0a0bf5694 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1077,25 +1077,23 @@ struct AAPointerInfoImpl
         QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
     const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
         IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
-    const bool NoSync = NoSyncAA.isAssumedNoSync();
+    bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();
 
     // Helper to determine if we need to consider threading, which we cannot
     // right now. However, if the function is (assumed) nosync or the thread
     // executing all instructions is the main thread only we can ignore
     // threading.
     auto CanIgnoreThreading = [&](const Instruction &I) -> bool {
-      if (NoSync)
-        return true;
       if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I))
         return true;
       return false;
     };
 
     // Helper to determine if the access is executed by the same thread as the
-    // load, for now it is sufficient to avoid any potential threading effects
-    // as we cannot deal with them anyway.
-    auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool {
-      return CanIgnoreThreading(*Acc.getLocalInst());
+    // given instruction, for now it is sufficient to avoid any potential
+    // threading effects as we cannot deal with them anyway.
+    auto IsSameThreadAsInst = [&](const Access &Acc) -> bool {
+      return AllInSameNoSyncFn || CanIgnoreThreading(*Acc.getLocalInst());
     };
 
     // TODO: Use inter-procedural reachability and dominance.
@@ -1172,10 +1170,14 @@ struct AAPointerInfoImpl
       if (FindInterferingWrites && Dominates)
         HasBeenWrittenTo = true;
 
+      // Track if all interesting accesses are in the same `nosync` function as
+      // the given instruction.
+      AllInSameNoSyncFn &= Acc.getRemoteInst()->getFunction() == &Scope;
+
       // For now we only filter accesses based on CFG reasoning which does not
       // work yet if we have threading effects, or the access is complicated.
       if (CanUseCFGResoning && Dominates && UseDominanceReasoning &&
-          IsSameThreadAsLoad(Acc))
+          IsSameThreadAsInst(Acc))
         DominatingWrites.insert(&Acc);
 
       InterferingAccesses.push_back({&Acc, Exact});
@@ -1188,6 +1190,8 @@ struct AAPointerInfoImpl
     // the worst case quadratic as we are looking for another write that will
     // hide the effect of this one.
     auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
+      if (!IsSameThreadAsInst(Acc))
+        return false;
       if ((!Acc.isWriteOrAssumption() ||
            !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA,
                                        IsLiveInCalleeCB)) &&
@@ -1198,8 +1202,6 @@ struct AAPointerInfoImpl
 
       if (!DT || !UseDominanceReasoning)
         return false;
-      if (!IsSameThreadAsLoad(Acc))
-        return false;
       if (!DominatingWrites.count(&Acc))
         return false;
       for (const Access *DomAcc : DominatingWrites) {
@@ -1219,7 +1221,8 @@ struct AAPointerInfoImpl
     // succeeded for all or not.
     unsigned NumInterferingAccesses = InterferingAccesses.size();
     for (auto &It : InterferingAccesses) {
-      if (!CanUseCFGResoning || NumInterferingAccesses > MaxInterferingAccesses ||
+      if (!AllInSameNoSyncFn ||
+          NumInterferingAccesses > MaxInterferingAccesses ||
           !CanSkipAccess(*It.first, It.second)) {
         if (!UserCB(*It.first, It.second))
           return false;

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-assume.ll b/llvm/test/Transforms/Attributor/value-simplify-assume.ll
index aed444171f6d1..24eb89cfab165 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-assume.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-assume.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 
 @Gstatic_int1 = internal global i32 zeroinitializer, align 4

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
index 7ae99e48ff8e6..6e6113e6e2cc4 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
@@ -1,9 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 
 target triple = "amdgcn-amd-amdhsa"
 
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
 @ReachableKernel = internal addrspace(3) global i32 3, align 4
 @UnreachableKernel = internal addrspace(3) global i32 42, align 4
 @ReachableKernelAS0 = internal global i32 7, align 4
@@ -110,7 +111,8 @@ define internal void @level2Kernela() {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
 ; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
-; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5:[0-9]+]]
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync nounwind
@@ -138,7 +140,8 @@ define internal void @level2Kernelb() {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
 ; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
-; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5]]
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync nounwind
@@ -160,18 +163,12 @@ entry:
 }
 
 define internal void @level2Kernelall_late() {
-; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@level2Kernelall_late
-; TUNIT-SAME: () #[[ATTR2]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    ret void
-;
-; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@level2Kernelall_late
-; CGSCC-SAME: () #[[ATTR2]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
-; CGSCC-NEXT:    ret void
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_late
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; CHECK-NEXT:    ret void
 ;
 entry:
   store i32 1, i32 *addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
@@ -211,10 +208,12 @@ define internal void @level1(i32 %C) {
 ; TUNIT-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0
 ; TUNIT-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TUNIT:       if.then:
-; TUNIT-NEXT:    call void @level2a() #[[ATTR3]]
+; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* [[LOCAL]], align 4
+; TUNIT-NEXT:    call void @level2a(i32 [[TMP0]]) #[[ATTR3]]
 ; TUNIT-NEXT:    br label [[IF_END:%.*]]
 ; TUNIT:       if.else:
-; TUNIT-NEXT:    call void @level2b() #[[ATTR3]]
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* [[LOCAL]], align 4
+; TUNIT-NEXT:    call void @level2b(i32 [[TMP1]]) #[[ATTR3]]
 ; TUNIT-NEXT:    br label [[IF_END]]
 ; TUNIT:       if.end:
 ; TUNIT-NEXT:    call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]]
@@ -263,6 +262,7 @@ define internal void @level2all_early(i32* %addr) {
 ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    store i32 17, i32* [[ADDR]], align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
@@ -282,11 +282,14 @@ entry:
 define internal void @level2a(i32* %addr) {
 ; TUNIT: Function Attrs: norecurse nosync nounwind
 ; TUNIT-LABEL: define {{[^@]+}}@level2a
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]]
+; TUNIT-NEXT:    [[ADDR_PRIV:%.*]] = alloca i32, align 4
+; TUNIT-NEXT:    store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    [[QQQQ2:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT:    call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[QQQQ2]]) #[[ATTR5]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync nounwind
@@ -310,11 +313,14 @@ entry:
 define internal void @level2b(i32* %addr) {
 ; TUNIT: Function Attrs: norecurse nosync nounwind
 ; TUNIT-LABEL: define {{[^@]+}}@level2b
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]]
+; TUNIT-NEXT:    [[ADDR_PRIV:%.*]] = alloca i32, align 4
+; TUNIT-NEXT:    store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT:    call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[TMP3]]) #[[ATTR5]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync nounwind
@@ -336,20 +342,13 @@ entry:
 }
 
 define internal void @level2all_late(i32* %addr) {
-; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@level2all_late
-; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    ret void
-;
-; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@level2all_late
-; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; CGSCC-NEXT:    store i32 5, i32* [[ADDR]], align 4
-; CGSCC-NEXT:    ret void
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@level2all_late
+; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; CHECK-NEXT:    store i32 5, i32* [[ADDR]], align 4
+; CHECK-NEXT:    ret void
 ;
 entry:
   store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-reachability.ll b/llvm/test/Transforms/Attributor/value-simplify-reachability.ll
new file mode 100644
index 0000000000000..b505ac06629f7
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/value-simplify-reachability.ll
@@ -0,0 +1,834 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+
+ at GInt1 = internal global i32 undef, align 4
+ at GInt2 = internal global i32 zeroinitializer, align 4
+ at GInt3 = internal global i32 undef, align 4
+ at GInt4 = internal global i32 zeroinitializer, align 4
+ at GInt5 = internal global i32 undef, align 4
+
+declare void @llvm.assume(i1)
+declare void @useI32(i32) nosync nocallback
+declare void @free(ptr) allockind("free") "alloc-family"="malloc"
+declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0, 1) "alloc-family"="malloc"
+
+;.
+; CHECK: @[[GINT1:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef, align 4
+; CHECK: @[[GINT2:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0, align 4
+; CHECK: @[[GINT3:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef, align 4
+; CHECK: @[[GINT4:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0, align 4
+; CHECK: @[[GINT5:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 undef, align 4
+;.
+define internal void @write1ToGInt1() {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@write1ToGInt1
+; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    store i32 1, ptr @GInt1, align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 1, ptr @GInt1
+  ret void
+}
+
+define internal void @write1ToGInt2() {
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@write1ToGInt2
+; CHECK-SAME: () #[[ATTR4]] {
+; CHECK-NEXT:    store i32 1, ptr @GInt2, align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 1, ptr @GInt2
+  ret void
+}
+
+define void @entry1(i1 %c, i32 %v) {
+; TUNIT: Function Attrs: norecurse nosync
+; TUNIT-LABEL: define {{[^@]+}}@entry1
+; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5:[0-9]+]] {
+; TUNIT-NEXT:    [[L0:%.*]] = load i32, ptr @GInt1, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L0]])
+; TUNIT-NEXT:    call void @write1ToGInt1() #[[ATTR10:[0-9]+]]
+; TUNIT-NEXT:    [[L1:%.*]] = load i32, ptr @GInt1, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L1]])
+; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; TUNIT:       T:
+; TUNIT-NEXT:    store i32 [[V]], ptr @GInt1, align 4
+; TUNIT-NEXT:    [[L2:%.*]] = load i32, ptr @GInt1, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L2]])
+; TUNIT-NEXT:    br label [[F]]
+; TUNIT:       F:
+; TUNIT-NEXT:    [[L3:%.*]] = load i32, ptr @GInt1, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L3]])
+; TUNIT-NEXT:    call void @write1ToGInt1() #[[ATTR10]]
+; TUNIT-NEXT:    [[L4:%.*]] = load i32, ptr @GInt1, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L4]])
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: nosync
+; CGSCC-LABEL: define {{[^@]+}}@entry1
+; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5:[0-9]+]] {
+; CGSCC-NEXT:    [[L0:%.*]] = load i32, ptr @GInt1, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L0]])
+; CGSCC-NEXT:    call void @write1ToGInt1() #[[ATTR10:[0-9]+]]
+; CGSCC-NEXT:    [[L1:%.*]] = load i32, ptr @GInt1, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L1]])
+; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CGSCC:       T:
+; CGSCC-NEXT:    store i32 [[V]], ptr @GInt1, align 4
+; CGSCC-NEXT:    [[L2:%.*]] = load i32, ptr @GInt1, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L2]])
+; CGSCC-NEXT:    br label [[F]]
+; CGSCC:       F:
+; CGSCC-NEXT:    [[L3:%.*]] = load i32, ptr @GInt1, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L3]])
+; CGSCC-NEXT:    call void @write1ToGInt1() #[[ATTR10]]
+; CGSCC-NEXT:    [[L4:%.*]] = load i32, ptr @GInt1, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L4]])
+; CGSCC-NEXT:    ret void
+;
+  %l0 = load i32, ptr @GInt1
+  call void @useI32(i32 %l0)
+  call void @write1ToGInt1();
+  %l1 = load i32, ptr @GInt1
+  call void @useI32(i32 %l1)
+  br i1 %c, label %T, label %F
+T:
+  store i32 %v, ptr @GInt1
+  %l2 = load i32, ptr @GInt1
+  call void @useI32(i32 %l2)
+  br label %F
+F:
+  %l3 = load i32, ptr @GInt1
+  call void @useI32(i32 %l3)
+  call void @write1ToGInt1();
+  %l4 = load i32, ptr @GInt1
+  call void @useI32(i32 %l4)
+  ret void
+}
+
+define void @entry2(i1 %c, i32 %v) {
+; TUNIT: Function Attrs: norecurse nosync
+; TUNIT-LABEL: define {{[^@]+}}@entry2
+; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] {
+; TUNIT-NEXT:    [[L0:%.*]] = load i32, ptr @GInt2, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L0]])
+; TUNIT-NEXT:    call void @write1ToGInt2() #[[ATTR10]]
+; TUNIT-NEXT:    [[L1:%.*]] = load i32, ptr @GInt2, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L1]])
+; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; TUNIT:       T:
+; TUNIT-NEXT:    store i32 [[V]], ptr @GInt2, align 4
+; TUNIT-NEXT:    [[L2:%.*]] = load i32, ptr @GInt2, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L2]])
+; TUNIT-NEXT:    br label [[F]]
+; TUNIT:       F:
+; TUNIT-NEXT:    [[L3:%.*]] = load i32, ptr @GInt2, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L3]])
+; TUNIT-NEXT:    call void @write1ToGInt2() #[[ATTR10]]
+; TUNIT-NEXT:    [[L4:%.*]] = load i32, ptr @GInt2, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L4]])
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: nosync
+; CGSCC-LABEL: define {{[^@]+}}@entry2
+; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] {
+; CGSCC-NEXT:    [[L0:%.*]] = load i32, ptr @GInt2, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L0]])
+; CGSCC-NEXT:    call void @write1ToGInt2() #[[ATTR10]]
+; CGSCC-NEXT:    [[L1:%.*]] = load i32, ptr @GInt2, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L1]])
+; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CGSCC:       T:
+; CGSCC-NEXT:    store i32 [[V]], ptr @GInt2, align 4
+; CGSCC-NEXT:    [[L2:%.*]] = load i32, ptr @GInt2, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L2]])
+; CGSCC-NEXT:    br label [[F]]
+; CGSCC:       F:
+; CGSCC-NEXT:    [[L3:%.*]] = load i32, ptr @GInt2, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L3]])
+; CGSCC-NEXT:    call void @write1ToGInt2() #[[ATTR10]]
+; CGSCC-NEXT:    [[L4:%.*]] = load i32, ptr @GInt2, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L4]])
+; CGSCC-NEXT:    ret void
+;
+  %l0 = load i32, ptr @GInt2
+  call void @useI32(i32 %l0)
+  call void @write1ToGInt2();
+  %l1 = load i32, ptr @GInt2
+  call void @useI32(i32 %l1)
+  br i1 %c, label %T, label %F
+T:
+  store i32 %v, ptr @GInt2
+  %l2 = load i32, ptr @GInt2
+  call void @useI32(i32 %l2)
+  br label %F
+F:
+  %l3 = load i32, ptr @GInt2
+  call void @useI32(i32 %l3)
+  call void @write1ToGInt2();
+  %l4 = load i32, ptr @GInt2
+  call void @useI32(i32 %l4)
+  ret void
+}
+define void @entry3(i1 %c, i32 %v) {
+; TUNIT: Function Attrs: norecurse nosync
+; TUNIT-LABEL: define {{[^@]+}}@entry3
+; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] {
+; TUNIT-NEXT:    [[L0:%.*]] = load i32, ptr @GInt3, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L0]])
+; TUNIT-NEXT:    store i32 1, ptr @GInt3, align 4
+; TUNIT-NEXT:    [[L1:%.*]] = load i32, ptr @GInt3, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L1]])
+; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; TUNIT:       T:
+; TUNIT-NEXT:    store i32 [[V]], ptr @GInt3, align 4
+; TUNIT-NEXT:    [[L2:%.*]] = load i32, ptr @GInt3, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L2]])
+; TUNIT-NEXT:    br label [[F]]
+; TUNIT:       F:
+; TUNIT-NEXT:    [[L3:%.*]] = load i32, ptr @GInt3, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L3]])
+; TUNIT-NEXT:    store i32 1, ptr @GInt3, align 4
+; TUNIT-NEXT:    [[L4:%.*]] = load i32, ptr @GInt3, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L4]])
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: norecurse nosync
+; CGSCC-LABEL: define {{[^@]+}}@entry3
+; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR6:[0-9]+]] {
+; CGSCC-NEXT:    [[L0:%.*]] = load i32, ptr @GInt3, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L0]])
+; CGSCC-NEXT:    store i32 1, ptr @GInt3, align 4
+; CGSCC-NEXT:    [[L1:%.*]] = load i32, ptr @GInt3, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L1]])
+; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CGSCC:       T:
+; CGSCC-NEXT:    store i32 [[V]], ptr @GInt3, align 4
+; CGSCC-NEXT:    [[L2:%.*]] = load i32, ptr @GInt3, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L2]])
+; CGSCC-NEXT:    br label [[F]]
+; CGSCC:       F:
+; CGSCC-NEXT:    [[L3:%.*]] = load i32, ptr @GInt3, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L3]])
+; CGSCC-NEXT:    store i32 1, ptr @GInt3, align 4
+; CGSCC-NEXT:    [[L4:%.*]] = load i32, ptr @GInt3, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L4]])
+; CGSCC-NEXT:    ret void
+;
+  %l0 = load i32, ptr @GInt3
+  call void @useI32(i32 %l0)
+  store i32 1, ptr @GInt3
+  %l1 = load i32, ptr @GInt3
+  call void @useI32(i32 %l1)
+  br i1 %c, label %T, label %F
+T:
+  store i32 %v, ptr @GInt3
+  %l2 = load i32, ptr @GInt3
+  call void @useI32(i32 %l2)
+  br label %F
+F:
+  %l3 = load i32, ptr @GInt3
+  call void @useI32(i32 %l3)
+  store i32 1, ptr @GInt3
+  %l4 = load i32, ptr @GInt3
+  call void @useI32(i32 %l4)
+  ret void
+}
+
+define void @entry4(i1 %c, i32 %v) {
+; TUNIT: Function Attrs: norecurse nosync
+; TUNIT-LABEL: define {{[^@]+}}@entry4
+; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] {
+; TUNIT-NEXT:    [[L0:%.*]] = load i32, ptr @GInt4, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L0]])
+; TUNIT-NEXT:    store i32 1, ptr @GInt4, align 4
+; TUNIT-NEXT:    [[L1:%.*]] = load i32, ptr @GInt4, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L1]])
+; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; TUNIT:       T:
+; TUNIT-NEXT:    store i32 [[V]], ptr @GInt4, align 4
+; TUNIT-NEXT:    [[L2:%.*]] = load i32, ptr @GInt4, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L2]])
+; TUNIT-NEXT:    br label [[F]]
+; TUNIT:       F:
+; TUNIT-NEXT:    [[L3:%.*]] = load i32, ptr @GInt4, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L3]])
+; TUNIT-NEXT:    store i32 1, ptr @GInt4, align 4
+; TUNIT-NEXT:    [[L4:%.*]] = load i32, ptr @GInt4, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L4]])
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: norecurse nosync
+; CGSCC-LABEL: define {{[^@]+}}@entry4
+; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR6]] {
+; CGSCC-NEXT:    [[L0:%.*]] = load i32, ptr @GInt4, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L0]])
+; CGSCC-NEXT:    store i32 1, ptr @GInt4, align 4
+; CGSCC-NEXT:    [[L1:%.*]] = load i32, ptr @GInt4, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L1]])
+; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CGSCC:       T:
+; CGSCC-NEXT:    store i32 [[V]], ptr @GInt4, align 4
+; CGSCC-NEXT:    [[L2:%.*]] = load i32, ptr @GInt4, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L2]])
+; CGSCC-NEXT:    br label [[F]]
+; CGSCC:       F:
+; CGSCC-NEXT:    [[L3:%.*]] = load i32, ptr @GInt4, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L3]])
+; CGSCC-NEXT:    store i32 1, ptr @GInt4, align 4
+; CGSCC-NEXT:    [[L4:%.*]] = load i32, ptr @GInt4, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L4]])
+; CGSCC-NEXT:    ret void
+;
+  %l0 = load i32, ptr @GInt4
+  call void @useI32(i32 %l0)
+  store i32 1, ptr @GInt4
+  %l1 = load i32, ptr @GInt4
+  call void @useI32(i32 %l1)
+  br i1 %c, label %T, label %F
+T:
+  store i32 %v, ptr @GInt4
+  %l2 = load i32, ptr @GInt4
+  call void @useI32(i32 %l2)
+  br label %F
+F:
+  %l3 = load i32, ptr @GInt4
+  call void @useI32(i32 %l3)
+  store i32 1, ptr @GInt4
+  %l4 = load i32, ptr @GInt4
+  call void @useI32(i32 %l4)
+  ret void
+}
+
+; TODO: In this test we can replace %l0, in the others above we cannot.
+define void @entry5(i1 %c, i32 %v) {
+; TUNIT: Function Attrs: norecurse nosync
+; TUNIT-LABEL: define {{[^@]+}}@entry5
+; TUNIT-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR5]] {
+; TUNIT-NEXT:    [[L0:%.*]] = load i32, ptr @GInt5, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L0]])
+; TUNIT-NEXT:    store i32 1, ptr @GInt5, align 4
+; TUNIT-NEXT:    [[L1:%.*]] = load i32, ptr @GInt5, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L1]]) #[[ATTR6:[0-9]+]]
+; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; TUNIT:       T:
+; TUNIT-NEXT:    store i32 [[V]], ptr @GInt5, align 4
+; TUNIT-NEXT:    [[L2:%.*]] = load i32, ptr @GInt5, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L2]]) #[[ATTR6]]
+; TUNIT-NEXT:    br label [[F]]
+; TUNIT:       F:
+; TUNIT-NEXT:    [[L3:%.*]] = load i32, ptr @GInt5, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L3]]) #[[ATTR6]]
+; TUNIT-NEXT:    store i32 1, ptr @GInt5, align 4
+; TUNIT-NEXT:    [[L4:%.*]] = load i32, ptr @GInt5, align 4
+; TUNIT-NEXT:    call void @useI32(i32 [[L4]]) #[[ATTR6]]
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: norecurse nosync
+; CGSCC-LABEL: define {{[^@]+}}@entry5
+; CGSCC-SAME: (i1 [[C:%.*]], i32 [[V:%.*]]) #[[ATTR6]] {
+; CGSCC-NEXT:    [[L0:%.*]] = load i32, ptr @GInt5, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L0]])
+; CGSCC-NEXT:    store i32 1, ptr @GInt5, align 4
+; CGSCC-NEXT:    [[L1:%.*]] = load i32, ptr @GInt5, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L1]]) #[[ATTR7:[0-9]+]]
+; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CGSCC:       T:
+; CGSCC-NEXT:    store i32 [[V]], ptr @GInt5, align 4
+; CGSCC-NEXT:    [[L2:%.*]] = load i32, ptr @GInt5, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L2]]) #[[ATTR7]]
+; CGSCC-NEXT:    br label [[F]]
+; CGSCC:       F:
+; CGSCC-NEXT:    [[L3:%.*]] = load i32, ptr @GInt5, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L3]]) #[[ATTR7]]
+; CGSCC-NEXT:    store i32 1, ptr @GInt5, align 4
+; CGSCC-NEXT:    [[L4:%.*]] = load i32, ptr @GInt5, align 4
+; CGSCC-NEXT:    call void @useI32(i32 [[L4]]) #[[ATTR7]]
+; CGSCC-NEXT:    ret void
+;
+  %l0 = load i32, ptr @GInt5
+  call void @useI32(i32 %l0)
+  store i32 1, ptr @GInt5
+  %l1 = load i32, ptr @GInt5
+  call void @useI32(i32 %l1) nocallback
+  br i1 %c, label %T, label %F
+T:
+  store i32 %v, ptr @GInt5
+  %l2 = load i32, ptr @GInt5
+  call void @useI32(i32 %l2) nocallback
+  br label %F
+F:
+  %l3 = load i32, ptr @GInt5
+  call void @useI32(i32 %l3) nocallback
+  store i32 1, ptr @GInt5
+  %l4 = load i32, ptr @GInt5
+  call void @useI32(i32 %l4) nocallback
+  ret void
+}
+
+
+declare void @use_4_i8(i8, i8, i8, i8) nocallback
+
+define void @exclusion_set1(i1 %c1, i1 %c2, i1 %c3) {
+; CHECK-LABEL: define {{[^@]+}}@exclusion_set1
+; CHECK-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 2
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 3
+; CHECK-NEXT:    [[L0_A:%.*]] = load i8, ptr [[CALL_H2S]], align 1
+; CHECK-NEXT:    [[L1_A:%.*]] = load i8, ptr [[GEP1]], align 1
+; CHECK-NEXT:    [[L2_A:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[L3_A:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef [[L0_A]], i8 noundef [[L1_A]], i8 noundef [[L2_A]], i8 noundef [[L3_A]])
+; CHECK-NEXT:    store i8 1, ptr [[CALL_H2S]], align 4
+; CHECK-NEXT:    [[L1_B:%.*]] = load i8, ptr [[GEP1]], align 1
+; CHECK-NEXT:    [[L2_B:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[L3_B:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_B]], i8 noundef [[L2_B]], i8 noundef [[L3_B]])
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_MERGE1:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[L1_C:%.*]] = load i8, ptr [[GEP1]], align 1
+; CHECK-NEXT:    [[L2_C:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[L3_C:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_C]], i8 noundef [[L2_C]], i8 noundef [[L3_C]])
+; CHECK-NEXT:    store i8 2, ptr [[GEP1]], align 4
+; CHECK-NEXT:    [[L2_D:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[L3_D:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef [[L2_D]], i8 noundef [[L3_D]])
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_MERGE1]], label [[IF_THEN2:%.*]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    [[L2_E:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[L3_E:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef [[L2_E]], i8 noundef [[L3_E]])
+; CHECK-NEXT:    store i8 3, ptr [[GEP2]], align 4
+; CHECK-NEXT:    [[L3_F:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef [[L3_F]])
+; CHECK-NEXT:    br i1 [[C2]], label [[IF_MERGE2:%.*]], label [[IF_THEN3:%.*]]
+; CHECK:       if.merge1:
+; CHECK-NEXT:    [[L1_G:%.*]] = load i8, ptr [[GEP1]], align 1
+; CHECK-NEXT:    [[L2_G:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[L3_G:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_G]], i8 noundef [[L2_G]], i8 noundef [[L3_G]])
+; CHECK-NEXT:    br label [[IF_MERGE2]]
+; CHECK:       if.merge2:
+; CHECK-NEXT:    [[L1_H:%.*]] = load i8, ptr [[GEP1]], align 1
+; CHECK-NEXT:    [[L2_H:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[L3_H:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_H]], i8 noundef [[L2_H]], i8 noundef [[L3_H]])
+; CHECK-NEXT:    br label [[IF_END:%.*]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    [[L3_I:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef [[L3_I]])
+; CHECK-NEXT:    store i8 4, ptr [[GEP3]], align 4
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef 4)
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[L1_K:%.*]] = load i8, ptr [[GEP1]], align 1
+; CHECK-NEXT:    [[L2_K:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT:    [[L3_K:%.*]] = load i8, ptr [[GEP3]], align 1
+; CHECK-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef [[L1_K]], i8 noundef [[L2_K]], i8 noundef [[L3_K]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = call noalias i8* @calloc(i64 1, i64 4) norecurse
+  %gep0 = getelementptr inbounds i8, i8* %call, i64 0
+  %gep1 = getelementptr inbounds i8, i8* %call, i64 1
+  %gep2 = getelementptr inbounds i8, i8* %call, i64 2
+  %gep3 = getelementptr inbounds i8, i8* %call, i64 3
+
+  %l0_a = load i8, i8* %gep0
+  %l1_a = load i8, i8* %gep1
+  %l2_a = load i8, i8* %gep2
+  %l3_a = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_a, i8 %l1_a, i8 %l2_a, i8 %l3_a)
+
+  store i8 1, i8* %gep0, align 4
+
+  %l0_b = load i8, i8* %gep0
+  %l1_b = load i8, i8* %gep1
+  %l2_b = load i8, i8* %gep2
+  %l3_b = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_b, i8 %l1_b, i8 %l2_b, i8 %l3_b)
+
+  br i1 %c1, label %if.merge1, label %if.then
+
+if.then:
+  %l0_c = load i8, i8* %gep0
+  %l1_c = load i8, i8* %gep1
+  %l2_c = load i8, i8* %gep2
+  %l3_c = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_c, i8 %l1_c, i8 %l2_c, i8 %l3_c)
+
+  store i8 2, i8* %gep1, align 4
+
+  %l0_d = load i8, i8* %gep0
+  %l1_d = load i8, i8* %gep1
+  %l2_d = load i8, i8* %gep2
+  %l3_d = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_d, i8 %l1_d, i8 %l2_d, i8 %l3_d)
+
+  br i1 %c1, label %if.merge1, label %if.then2
+
+if.then2:
+  %l0_e = load i8, i8* %gep0
+  %l1_e = load i8, i8* %gep1
+  %l2_e = load i8, i8* %gep2
+  %l3_e = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_e, i8 %l1_e, i8 %l2_e, i8 %l3_e)
+
+  store i8 3, i8* %gep2, align 4
+
+  %l0_f = load i8, i8* %gep0
+  %l1_f = load i8, i8* %gep1
+  %l2_f = load i8, i8* %gep2
+  %l3_f = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_f, i8 %l1_f, i8 %l2_f, i8 %l3_f)
+
+  br i1 %c2, label %if.merge2, label %if.then3
+
+if.merge1:
+
+  %l0_g = load i8, i8* %gep0
+  %l1_g = load i8, i8* %gep1
+  %l2_g = load i8, i8* %gep2
+  %l3_g = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_g, i8 %l1_g, i8 %l2_g, i8 %l3_g)
+
+  br label %if.merge2
+
+if.merge2:
+
+  %l0_h = load i8, i8* %gep0
+  %l1_h = load i8, i8* %gep1
+  %l2_h = load i8, i8* %gep2
+  %l3_h = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_h, i8 %l1_h, i8 %l2_h, i8 %l3_h)
+
+  br label %if.end
+
+if.then3:
+
+  %l0_i = load i8, i8* %gep0
+  %l1_i = load i8, i8* %gep1
+  %l2_i = load i8, i8* %gep2
+  %l3_i = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_i, i8 %l1_i, i8 %l2_i, i8 %l3_i)
+
+  store i8 4, i8* %gep3, align 4
+
+  %l0_j = load i8, i8* %gep0
+  %l1_j = load i8, i8* %gep1
+  %l2_j = load i8, i8* %gep2
+  %l3_j = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_j, i8 %l1_j, i8 %l2_j, i8 %l3_j)
+
+  br label %if.end
+
+if.end:
+  %l0_k = load i8, i8* %gep0
+  %l1_k = load i8, i8* %gep1
+  %l2_k = load i8, i8* %gep2
+  %l3_k = load i8, i8* %gep3
+  call void @use_4_i8(i8 %l0_k, i8 %l1_k, i8 %l2_k, i8 %l3_k)
+
+  call void @free(i8* %call) norecurse
+  ret void
+}
+
+define void @exclusion_set2(i1 %c1, i1 %c2, i1 %c3) {
+; TUNIT: Function Attrs: norecurse
+; TUNIT-LABEL: define {{[^@]+}}@exclusion_set2
+; TUNIT-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR7:[0-9]+]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    call void @use_4_i8(i8 1, i8 2, i8 3, i8 4)
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; TUNIT-NEXT:    br i1 [[C1]], label [[IF_MERGE1:%.*]], label [[IF_THEN:%.*]]
+; TUNIT:       if.then:
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 3, i8 4)
+; TUNIT-NEXT:    br i1 [[C1]], label [[IF_MERGE1]], label [[IF_THEN2:%.*]]
+; TUNIT:       if.then2:
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 3, i8 4)
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 4)
+; TUNIT-NEXT:    br i1 [[C2]], label [[IF_MERGE2:%.*]], label [[IF_THEN3:%.*]]
+; TUNIT:       if.merge1:
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; TUNIT-NEXT:    br label [[IF_MERGE2]]
+; TUNIT:       if.merge2:
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; TUNIT-NEXT:    br label [[IF_END:%.*]]
+; TUNIT:       if.then3:
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 4)
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef 4)
+; TUNIT-NEXT:    br label [[IF_END]]
+; TUNIT:       if.end:
+; TUNIT-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: norecurse
+; CGSCC-LABEL: define {{[^@]+}}@exclusion_set2
+; CGSCC-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR8:[0-9]+]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    call void @use_4_i8(i8 1, i8 2, i8 3, i8 4)
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; CGSCC-NEXT:    br i1 [[C1]], label [[IF_MERGE1:%.*]], label [[IF_THEN:%.*]]
+; CGSCC:       if.then:
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 3, i8 4)
+; CGSCC-NEXT:    br i1 [[C1]], label [[IF_MERGE1]], label [[IF_THEN2:%.*]]
+; CGSCC:       if.then2:
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 3, i8 4)
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 4)
+; CGSCC-NEXT:    br i1 [[C2]], label [[IF_MERGE2:%.*]], label [[IF_THEN3:%.*]]
+; CGSCC:       if.merge1:
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; CGSCC-NEXT:    br label [[IF_MERGE2]]
+; CGSCC:       if.merge2:
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; CGSCC-NEXT:    br label [[IF_END:%.*]]
+; CGSCC:       if.then3:
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 4)
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 noundef 2, i8 noundef 3, i8 noundef 4)
+; CGSCC-NEXT:    br label [[IF_END]]
+; CGSCC:       if.end:
+; CGSCC-NEXT:    call void @use_4_i8(i8 noundef 1, i8 2, i8 3, i8 4)
+; CGSCC-NEXT:    ret void
+;
+entry:
+  %alloc = alloca i8, i32 4
+  %gep0 = getelementptr inbounds i8, ptr %alloc, i64 0
+  %gep1 = getelementptr inbounds i8, ptr %alloc, i64 1
+  %gep2 = getelementptr inbounds i8, ptr %alloc, i64 2
+  %gep3 = getelementptr inbounds i8, ptr %alloc, i64 3
+
+  %l0_a = load i8, ptr %gep0
+  %l1_a = load i8, ptr %gep1
+  %l2_a = load i8, ptr %gep2
+  %l3_a = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_a, i8 %l1_a, i8 %l2_a, i8 %l3_a)
+
+  store i8 1, ptr %gep0, align 4
+
+  %l0_b = load i8, ptr %gep0
+  %l1_b = load i8, ptr %gep1
+  %l2_b = load i8, ptr %gep2
+  %l3_b = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_b, i8 %l1_b, i8 %l2_b, i8 %l3_b)
+
+  br i1 %c1, label %if.merge1, label %if.then
+
+if.then:
+  %l0_c = load i8, ptr %gep0
+  %l1_c = load i8, ptr %gep1
+  %l2_c = load i8, ptr %gep2
+  %l3_c = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_c, i8 %l1_c, i8 %l2_c, i8 %l3_c)
+
+  store i8 2, ptr %gep1, align 4
+
+  %l0_d = load i8, ptr %gep0
+  %l1_d = load i8, ptr %gep1
+  %l2_d = load i8, ptr %gep2
+  %l3_d = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_d, i8 %l1_d, i8 %l2_d, i8 %l3_d)
+
+  br i1 %c1, label %if.merge1, label %if.then2
+
+if.then2:
+  %l0_e = load i8, ptr %gep0
+  %l1_e = load i8, ptr %gep1
+  %l2_e = load i8, ptr %gep2
+  %l3_e = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_e, i8 %l1_e, i8 %l2_e, i8 %l3_e)
+
+  store i8 3, ptr %gep2, align 4
+
+  %l0_f = load i8, ptr %gep0
+  %l1_f = load i8, ptr %gep1
+  %l2_f = load i8, ptr %gep2
+  %l3_f = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_f, i8 %l1_f, i8 %l2_f, i8 %l3_f)
+
+  br i1 %c2, label %if.merge2, label %if.then3
+
+if.merge1:
+
+  %l0_g = load i8, ptr %gep0
+  %l1_g = load i8, ptr %gep1
+  %l2_g = load i8, ptr %gep2
+  %l3_g = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_g, i8 %l1_g, i8 %l2_g, i8 %l3_g)
+
+  br label %if.merge2
+
+if.merge2:
+
+  %l0_h = load i8, ptr %gep0
+  %l1_h = load i8, ptr %gep1
+  %l2_h = load i8, ptr %gep2
+  %l3_h = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_h, i8 %l1_h, i8 %l2_h, i8 %l3_h)
+
+  br label %if.end
+
+if.then3:
+
+  %l0_i = load i8, ptr %gep0
+  %l1_i = load i8, ptr %gep1
+  %l2_i = load i8, ptr %gep2
+  %l3_i = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_i, i8 %l1_i, i8 %l2_i, i8 %l3_i)
+
+  store i8 4, ptr %gep3, align 4
+
+  %l0_j = load i8, ptr %gep0
+  %l1_j = load i8, ptr %gep1
+  %l2_j = load i8, ptr %gep2
+  %l3_j = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_j, i8 %l1_j, i8 %l2_j, i8 %l3_j)
+
+  br label %if.end
+
+if.end:
+  %l0_k = load i8, ptr %gep0
+  %l1_k = load i8, ptr %gep1
+  %l2_k = load i8, ptr %gep2
+  %l3_k = load i8, ptr %gep3
+  call void @use_4_i8(i8 %l0_k, i8 %l1_k, i8 %l2_k, i8 %l3_k)
+
+  ret void
+}
+
+declare void @usei32(i32) nocallback nosync
+define internal void @exclusion_set3_helper(i1 %c, ptr %p) {
+; TUNIT: Function Attrs: nosync
+; TUNIT-LABEL: define {{[^@]+}}@exclusion_set3_helper
+; TUNIT-SAME: (i1 [[C:%.*]], ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR8:[0-9]+]] {
+; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; TUNIT:       t:
+; TUNIT-NEXT:    store i32 42, ptr [[P]], align 4
+; TUNIT-NEXT:    br label [[M:%.*]]
+; TUNIT:       f:
+; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4
+; TUNIT-NEXT:    [[ADD:%.*]] = add i32 [[L]], 1
+; TUNIT-NEXT:    store i32 [[ADD]], ptr [[P]], align 4
+; TUNIT-NEXT:    [[CND:%.*]] = icmp eq i32 [[L]], 100
+; TUNIT-NEXT:    br i1 [[CND]], label [[F2:%.*]], label [[F]]
+; TUNIT:       f2:
+; TUNIT-NEXT:    [[USE1:%.*]] = load i32, ptr [[P]], align 4
+; TUNIT-NEXT:    call void @usei32(i32 [[USE1]])
+; TUNIT-NEXT:    store i32 77, ptr [[P]], align 4
+; TUNIT-NEXT:    call void @exclusion_set3_helper(i1 noundef true, ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P]]) #[[ATTR8]]
+; TUNIT-NEXT:    [[USE2:%.*]] = load i32, ptr [[P]], align 4
+; TUNIT-NEXT:    call void @usei32(i32 [[USE2]])
+; TUNIT-NEXT:    br label [[T]]
+; TUNIT:       m:
+; TUNIT-NEXT:    [[USE3:%.*]] = load i32, ptr [[P]], align 4
+; TUNIT-NEXT:    call void @usei32(i32 [[USE3]])
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: nosync
+; CGSCC-LABEL: define {{[^@]+}}@exclusion_set3_helper
+; CGSCC-SAME: (i1 [[C:%.*]], ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR5]] {
+; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CGSCC:       t:
+; CGSCC-NEXT:    store i32 42, ptr [[P]], align 4
+; CGSCC-NEXT:    br label [[M:%.*]]
+; CGSCC:       f:
+; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4
+; CGSCC-NEXT:    [[ADD:%.*]] = add i32 [[L]], 1
+; CGSCC-NEXT:    store i32 [[ADD]], ptr [[P]], align 4
+; CGSCC-NEXT:    [[CND:%.*]] = icmp eq i32 [[L]], 100
+; CGSCC-NEXT:    br i1 [[CND]], label [[F2:%.*]], label [[F]]
+; CGSCC:       f2:
+; CGSCC-NEXT:    [[USE1:%.*]] = load i32, ptr [[P]], align 4
+; CGSCC-NEXT:    call void @usei32(i32 [[USE1]])
+; CGSCC-NEXT:    store i32 77, ptr [[P]], align 4
+; CGSCC-NEXT:    call void @exclusion_set3_helper(i1 noundef true, ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[P]]) #[[ATTR5]]
+; CGSCC-NEXT:    [[USE2:%.*]] = load i32, ptr [[P]], align 4
+; CGSCC-NEXT:    call void @usei32(i32 [[USE2]])
+; CGSCC-NEXT:    br label [[T]]
+; CGSCC:       m:
+; CGSCC-NEXT:    [[USE3:%.*]] = load i32, ptr [[P]], align 4
+; CGSCC-NEXT:    call void @usei32(i32 [[USE3]])
+; CGSCC-NEXT:    ret void
+;
+  br i1 %c, label %t, label %f
+t:
+  store i32 42, ptr %p
+  br label %m
+f:
+  %l = load i32, ptr %p
+  %add = add i32 %l, 1
+  store i32 %add, ptr %p
+  %cnd = icmp eq i32 %l, 100
+  br i1 %cnd, label %f2, label %f
+f2:
+  %use1 = load i32, ptr %p
+  call void @usei32(i32 %use1)
+  store i32 77, ptr %p
+  call void @exclusion_set3_helper(i1 true, ptr %p)
+  %use2 = load i32, ptr %p
+  call void @usei32(i32 %use2)
+  br label %t
+m:
+  %use3 = load i32, ptr %p
+  call void @usei32(i32 %use3)
+  ret void
+}
+
+define i32 @exclusion_set3(i1 %c) {
+; TUNIT: Function Attrs: norecurse nosync
+; TUNIT-LABEL: define {{[^@]+}}@exclusion_set3
+; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR5]] {
+; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
+; TUNIT-NEXT:    store i32 3, ptr [[A]], align 4
+; TUNIT-NEXT:    call void @exclusion_set3_helper(i1 [[C]], ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR8]]
+; TUNIT-NEXT:    [[FINAL:%.*]] = load i32, ptr [[A]], align 4
+; TUNIT-NEXT:    ret i32 [[FINAL]]
+;
+; CGSCC: Function Attrs: nosync
+; CGSCC-LABEL: define {{[^@]+}}@exclusion_set3
+; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR5]] {
+; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CGSCC-NEXT:    store i32 3, ptr [[A]], align 4
+; CGSCC-NEXT:    call void @exclusion_set3_helper(i1 [[C]], ptr noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]])
+; CGSCC-NEXT:    [[FINAL:%.*]] = load i32, ptr [[A]], align 4
+; CGSCC-NEXT:    ret i32 [[FINAL]]
+;
+  %a = alloca i32
+  store i32 3, ptr %a
+  call void @exclusion_set3_helper(i1 %c, ptr %a)
+  %final = load i32, ptr %a
+  ret i32 %final
+}
+
+;.
+; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback nosync }
+; TUNIT: attributes #[[ATTR2:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+; TUNIT: attributes #[[ATTR3:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(write) }
+; TUNIT: attributes #[[ATTR5]] = { norecurse nosync }
+; TUNIT: attributes #[[ATTR6]] = { nocallback }
+; TUNIT: attributes #[[ATTR7]] = { norecurse }
+; TUNIT: attributes #[[ATTR8]] = { nosync }
+; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; TUNIT: attributes #[[ATTR10]] = { nosync nounwind }
+;.
+; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback nosync }
+; CGSCC: attributes #[[ATTR2:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+; CGSCC: attributes #[[ATTR3:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(write) }
+; CGSCC: attributes #[[ATTR5]] = { nosync }
+; CGSCC: attributes #[[ATTR6]] = { norecurse nosync }
+; CGSCC: attributes #[[ATTR7]] = { nocallback }
+; CGSCC: attributes #[[ATTR8]] = { norecurse }
+; CGSCC: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; CGSCC: attributes #[[ATTR10]] = { nounwind }
+;.

diff  --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
new file mode 100644
index 0000000000000..add432de2e34a
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+
+target triple = "amdgcn-amd-amdhsa"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+ at G = internal addrspace(3) global i32 undef, align 4
+
+; Make sure we do not delete the stores to @G without also replacing the load with `1`.
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+;.
+define void @kernel() "kernel" {
+; TUNIT: Function Attrs: norecurse
+; TUNIT-LABEL: define {{[^@]+}}@kernel
+; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
+; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+; TUNIT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
+; TUNIT-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; TUNIT:       if.then:
+; TUNIT-NEXT:    store i32 1, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    br label [[IF_MERGE:%.*]]
+; TUNIT:       if.else:
+; TUNIT-NEXT:    call void @barrier() #[[ATTR4:[0-9]+]]
+; TUNIT-NEXT:    [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    call void @use1(i32 [[L]]) #[[ATTR4]]
+; TUNIT-NEXT:    br label [[IF_MERGE]]
+; TUNIT:       if.merge:
+; TUNIT-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; TUNIT:       if.then2:
+; TUNIT-NEXT:    store i32 2, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    call void @barrier() #[[ATTR4]]
+; TUNIT-NEXT:    br label [[IF_END]]
+; TUNIT:       if.end:
+; TUNIT-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: norecurse
+; CGSCC-LABEL: define {{[^@]+}}@kernel
+; CGSCC-SAME: () #[[ATTR0:[0-9]+]] {
+; CGSCC-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+; CGSCC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
+; CGSCC-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CGSCC:       if.then:
+; CGSCC-NEXT:    store i32 1, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    br label [[IF_MERGE:%.*]]
+; CGSCC:       if.else:
+; CGSCC-NEXT:    call void @barrier()
+; CGSCC-NEXT:    [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    call void @use1(i32 [[L]])
+; CGSCC-NEXT:    br label [[IF_MERGE]]
+; CGSCC:       if.merge:
+; CGSCC-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; CGSCC:       if.then2:
+; CGSCC-NEXT:    store i32 2, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    call void @barrier()
+; CGSCC-NEXT:    br label [[IF_END]]
+; CGSCC:       if.end:
+; CGSCC-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+; CGSCC-NEXT:    ret void
+;
+  %call = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+  %cmp = icmp eq i32 %call, -1
+  br i1 %cmp, label %if.then, label %if.else
+if.then:
+  store i32 1, i32 addrspace(3)* @G
+  br label %if.merge
+if.else:
+  call void @barrier();
+  %l = load i32, i32 addrspace(3)* @G
+  call void @use1(i32 %l)
+  br label %if.merge
+if.merge:
+  br i1 %cmp, label %if.then2, label %if.end
+if.then2:
+  store i32 2, i32 addrspace(3)* @G
+  call void @barrier();
+  br label %if.end
+if.end:
+  call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+  ret void
+}
+
+declare void @barrier() norecurse nounwind nocallback
+declare void @use1(i32) nosync norecurse nounwind nocallback
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) nocallback
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback
+
+!llvm.module.flags = !{!0, !1}
+!nvvm.annotations = !{!2}
+
+!0 = !{i32 7, !"openmp", i32 50}
+!1 = !{i32 7, !"openmp-device", i32 50}
+!2 = !{void ()* @kernel, !"kernel", i32 1}
+
+;.
+; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
+; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
+; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
+; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback }
+; TUNIT: attributes #[[ATTR4]] = { nounwind }
+;.
+; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" }
+; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
+; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
+; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
+;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}


        


More information about the llvm-commits mailing list