[llvm] 07c3753 - [OpenMP][FIX] Restrict more unsound assmptions about threading

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 13 22:58:45 PST 2022


Author: Johannes Doerfert
Date: 2022-12-13T22:58:33-08:00
New Revision: 07c375348083170e39c9498a42a9679c7e08f07f

URL: https://github.com/llvm/llvm-project/commit/07c375348083170e39c9498a42a9679c7e08f07f
DIFF: https://github.com/llvm/llvm-project/commit/07c375348083170e39c9498a42a9679c7e08f07f.diff

LOG: [OpenMP][FIX] Restrict more unsound assmptions about threading

Even if all loads and stores are in `nosync` functions we cannot
guarantee there is no synchronization going on between them. As such, we
cannot use CFG reasoning. We could check the entire module, or, what
happens now to minimize test churn, is to check if all accesses are in
the same function that is `nosync`. A follow up will undo some of the
regressions where possible.

Similarly, reachability cannot be used to exclude an access if the
access is not known to be executed by the same thread as the given
instruction.

The OpenMP-opt test was added for the latter problem.

Added: 
    llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Modified: 
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll
    llvm/test/Transforms/Attributor/internal-noalias.ll
    llvm/test/Transforms/Attributor/value-simplify-assume.ll
    llvm/test/Transforms/Attributor/value-simplify-gpu.ll
    llvm/test/Transforms/Attributor/value-simplify-reachability.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 7791a39616e32..a472d7bbe93cf 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1076,25 +1076,23 @@ struct AAPointerInfoImpl
         QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
     const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
         IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
-    const bool NoSync = NoSyncAA.isAssumedNoSync();
+    bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();
 
     // Helper to determine if we need to consider threading, which we cannot
     // right now. However, if the function is (assumed) nosync or the thread
     // executing all instructions is the main thread only we can ignore
     // threading.
     auto CanIgnoreThreading = [&](const Instruction &I) -> bool {
-      if (NoSync)
-        return true;
       if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I))
         return true;
       return false;
     };
 
     // Helper to determine if the access is executed by the same thread as the
-    // load, for now it is sufficient to avoid any potential threading effects
-    // as we cannot deal with them anyway.
-    auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool {
-      return CanIgnoreThreading(*Acc.getLocalInst());
+    // given instruction, for now it is sufficient to avoid any potential
+    // threading effects as we cannot deal with them anyway.
+    auto IsSameThreadAsInst = [&](const Access &Acc) -> bool {
+      return AllInSameNoSyncFn || CanIgnoreThreading(*Acc.getLocalInst());
     };
 
     // TODO: Use inter-procedural reachability and dominance.
@@ -1180,10 +1178,14 @@ struct AAPointerInfoImpl
       if (FindInterferingWrites && Dominates)
         HasBeenWrittenTo = true;
 
+      // Track if all interesting accesses are in the same `nosync` function as
+      // the given instruction.
+      AllInSameNoSyncFn &= Acc.getRemoteInst()->getFunction() == &Scope;
+
       // For now we only filter accesses based on CFG reasoning which does not
       // work yet if we have threading effects, or the access is complicated.
       if (CanUseCFGResoning && Dominates && UseDominanceReasoning &&
-          IsSameThreadAsLoad(Acc))
+          IsSameThreadAsInst(Acc))
         DominatingWrites.insert(&Acc);
 
       InterferingAccesses.push_back({&Acc, Exact});
@@ -1196,6 +1198,8 @@ struct AAPointerInfoImpl
     // the worst case quadratic as we are looking for another write that will
     // hide the effect of this one.
     auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
+      if (!IsSameThreadAsInst(Acc))
+        return false;
       if ((!Acc.isWriteOrAssumption() ||
            !AA::isPotentiallyReachable(A, *Acc.getRemoteInst(), I, QueryingAA,
                                        &ExclusionSet, IsLiveInCalleeCB)) &&
@@ -1206,8 +1210,6 @@ struct AAPointerInfoImpl
 
       if (!DT || !UseDominanceReasoning)
         return false;
-      if (!IsSameThreadAsLoad(Acc))
-        return false;
       if (!DominatingWrites.count(&Acc))
         return false;
       for (const Access *DomAcc : DominatingWrites) {
@@ -1227,7 +1229,8 @@ struct AAPointerInfoImpl
     // succeeded for all or not.
     unsigned NumInterferingAccesses = InterferingAccesses.size();
     for (auto &It : InterferingAccesses) {
-      if (!CanUseCFGResoning || NumInterferingAccesses > MaxInterferingAccesses ||
+      if (!AllInSameNoSyncFn ||
+          NumInterferingAccesses > MaxInterferingAccesses ||
           !CanSkipAccess(*It.first, It.second)) {
         if (!UserCB(*It.first, It.second))
           return false;

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll
index 0347cc3aba8ae..3536ee0849bfb 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 
 ;; This function returns its second argument on all return statements

diff  --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll
index 2355660e1ee18..74608c93b3bb3 100644
--- a/llvm/test/Transforms/Attributor/internal-noalias.ll
+++ b/llvm/test/Transforms/Attributor/internal-noalias.ll
@@ -7,8 +7,8 @@ define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 {
 ; TUNIT-LABEL: define {{[^@]+}}@visible
 ; TUNIT-SAME: (i32* noalias nocapture nofree readonly [[A:%.*]], i32* noalias nocapture nofree readonly align 4 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR4:[0-9]+]]
-; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR4]]
+; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR3:[0-9]+]]
+; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree readonly align 4 [[B]]) #[[ATTR3]]
 ; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
 ; TUNIT-NEXT:    ret i32 [[ADD]]
 ;
@@ -36,7 +36,7 @@ define private i32 @noalias_args(i32* %A, i32* %B) #0 {
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
 ; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
-; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
+; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
 ; TUNIT-NEXT:    [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CALL]]
 ; TUNIT-NEXT:    ret i32 [[ADD2]]
 ;
@@ -94,8 +94,8 @@ define dso_local i32 @visible_local(i32* %A) #0 {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    store i32 5, i32* [[B]], align 4
-; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
-; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR4]]
+; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
+; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR3]]
 ; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
 ; TUNIT-NEXT:    ret i32 [[ADD]]
 ;
@@ -158,10 +158,11 @@ define i32 @visible_local_2() {
 }
 
 define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 {
-; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable
+; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
 ; TUNIT-LABEL: define {{[^@]+}}@noalias_args_argmem_rn
-; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR3:[0-9]+]] {
-; TUNIT-NEXT:    ret i32 undef
+; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:    [[T0:%.*]] = load i32, i32* [[B]], align 4
+; TUNIT-NEXT:    ret i32 [[T0]]
 ;
 ; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
 ; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem_rn
@@ -180,8 +181,9 @@ define i32 @visible_local_3() {
 ; TUNIT-LABEL: define {{[^@]+}}@visible_local_3
 ; TUNIT-SAME: () #[[ATTR2]] {
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
-; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR5:[0-9]+]]
-; TUNIT-NEXT:    ret i32 5
+; TUNIT-NEXT:    store i32 5, i32* [[B]], align 4
+; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR4:[0-9]+]]
+; TUNIT-NEXT:    ret i32 [[CALL]]
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none)
 ; CGSCC-LABEL: define {{[^@]+}}@visible_local_3
@@ -203,9 +205,8 @@ attributes #1 = { argmemonly noinline nounwind uwtable willreturn}
 ; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable }
 ; TUNIT: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable }
 ; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) }
-; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable }
-; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind }
-; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn }
+; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind }
+; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn }
 ;.
 ; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable }
 ; CGSCC: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable }

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-assume.ll b/llvm/test/Transforms/Attributor/value-simplify-assume.ll
index b9f6f27296e5a..211c043eaae2c 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-assume.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-assume.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 
 @Gstatic_int1 = internal global i32 zeroinitializer, align 4
@@ -422,6 +422,9 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse {
 ; TUNIT-LABEL: define {{[^@]+}}@assume_3_nr
 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT:    store i1 [[ARG]], i1* [[STACK]], align 1
+; TUNIT-NEXT:    [[L:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT:    call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]]
 ; TUNIT-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
 ; TUNIT:       t:
 ; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
@@ -472,6 +475,7 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse {
 ; TUNIT-LABEL: define {{[^@]+}}@assume_4_nr
 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT:    store i1 [[ARG]], i1* [[STACK]], align 1
 ; TUNIT-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
 ; TUNIT:       t:
 ; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
@@ -524,6 +528,9 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse {
 ; TUNIT-LABEL: define {{[^@]+}}@assume_5_nr
 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT:    store i1 [[ARG]], i1* [[STACK]], align 1
+; TUNIT-NEXT:    [[L1:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT:    call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
 ; TUNIT-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
 ; TUNIT:       t:
 ; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
@@ -592,7 +599,9 @@ define i1 @assume_5c_nr(i1 %cond) norecurse {
 ; TUNIT-LABEL: define {{[^@]+}}@assume_5c_nr
 ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[STACK:%.*]] = alloca i1, align 1
-; TUNIT-NEXT:    call void @llvm.assume(i1 noundef true) #[[ATTR6]]
+; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
+; TUNIT-NEXT:    [[L1:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT:    call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
 ; TUNIT-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
 ; TUNIT:       t:
 ; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
@@ -1037,6 +1046,9 @@ define i1 @assume_3(i1 %arg, i1 %cond) {
 ; TUNIT-LABEL: define {{[^@]+}}@assume_3
 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT:    store i1 [[ARG]], i1* [[STACK]], align 1
+; TUNIT-NEXT:    [[L:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT:    call void @llvm.assume(i1 noundef [[L]]) #[[ATTR6]]
 ; TUNIT-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
 ; TUNIT:       t:
 ; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
@@ -1087,6 +1099,7 @@ define i1 @assume_4(i1 %arg, i1 %cond) {
 ; TUNIT-LABEL: define {{[^@]+}}@assume_4
 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT:    store i1 [[ARG]], i1* [[STACK]], align 1
 ; TUNIT-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
 ; TUNIT:       t:
 ; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
@@ -1139,6 +1152,9 @@ define i1 @assume_5(i1 %arg, i1 %cond) {
 ; TUNIT-LABEL: define {{[^@]+}}@assume_5
 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[STACK:%.*]] = alloca i1, align 1
+; TUNIT-NEXT:    store i1 [[ARG]], i1* [[STACK]], align 1
+; TUNIT-NEXT:    [[L1:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT:    call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
 ; TUNIT-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
 ; TUNIT:       t:
 ; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
@@ -1207,7 +1223,9 @@ define i1 @assume_5c(i1 %cond) {
 ; TUNIT-LABEL: define {{[^@]+}}@assume_5c
 ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[STACK:%.*]] = alloca i1, align 1
-; TUNIT-NEXT:    call void @llvm.assume(i1 noundef true) #[[ATTR6]]
+; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1
+; TUNIT-NEXT:    [[L1:%.*]] = load i1, i1* [[STACK]], align 1
+; TUNIT-NEXT:    call void @llvm.assume(i1 noundef [[L1]]) #[[ATTR6]]
 ; TUNIT-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
 ; TUNIT:       t:
 ; TUNIT-NEXT:    store i1 true, i1* [[STACK]], align 1

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
index 7ae99e48ff8e6..6e6113e6e2cc4 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll
@@ -1,9 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 
 target triple = "amdgcn-amd-amdhsa"
 
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
 @ReachableKernel = internal addrspace(3) global i32 3, align 4
 @UnreachableKernel = internal addrspace(3) global i32 42, align 4
 @ReachableKernelAS0 = internal global i32 7, align 4
@@ -110,7 +111,8 @@ define internal void @level2Kernela() {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
 ; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
-; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5:[0-9]+]]
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync nounwind
@@ -138,7 +140,8 @@ define internal void @level2Kernelb() {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4
 ; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4
-; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5]]
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef [[TMP2]]) #[[ATTR5]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync nounwind
@@ -160,18 +163,12 @@ entry:
 }
 
 define internal void @level2Kernelall_late() {
-; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@level2Kernelall_late
-; TUNIT-SAME: () #[[ATTR2]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    ret void
-;
-; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@level2Kernelall_late
-; CGSCC-SAME: () #[[ATTR2]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
-; CGSCC-NEXT:    ret void
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_late
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
+; CHECK-NEXT:    ret void
 ;
 entry:
   store i32 1, i32 *addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4
@@ -211,10 +208,12 @@ define internal void @level1(i32 %C) {
 ; TUNIT-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0
 ; TUNIT-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TUNIT:       if.then:
-; TUNIT-NEXT:    call void @level2a() #[[ATTR3]]
+; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* [[LOCAL]], align 4
+; TUNIT-NEXT:    call void @level2a(i32 [[TMP0]]) #[[ATTR3]]
 ; TUNIT-NEXT:    br label [[IF_END:%.*]]
 ; TUNIT:       if.else:
-; TUNIT-NEXT:    call void @level2b() #[[ATTR3]]
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* [[LOCAL]], align 4
+; TUNIT-NEXT:    call void @level2b(i32 [[TMP1]]) #[[ATTR3]]
 ; TUNIT-NEXT:    br label [[IF_END]]
 ; TUNIT:       if.end:
 ; TUNIT-NEXT:    call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]]
@@ -263,6 +262,7 @@ define internal void @level2all_early(i32* %addr) {
 ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    store i32 17, i32* [[ADDR]], align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
@@ -282,11 +282,14 @@ entry:
 define internal void @level2a(i32* %addr) {
 ; TUNIT: Function Attrs: norecurse nosync nounwind
 ; TUNIT-LABEL: define {{[^@]+}}@level2a
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]]
+; TUNIT-NEXT:    [[ADDR_PRIV:%.*]] = alloca i32, align 4
+; TUNIT-NEXT:    store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    [[QQQQ2:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT:    call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[QQQQ2]]) #[[ATTR5]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync nounwind
@@ -310,11 +313,14 @@ entry:
 define internal void @level2b(i32* %addr) {
 ; TUNIT: Function Attrs: norecurse nosync nounwind
 ; TUNIT-LABEL: define {{[^@]+}}@level2b
-; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]]
+; TUNIT-NEXT:    [[ADDR_PRIV:%.*]] = alloca i32, align 4
+; TUNIT-NEXT:    store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; TUNIT-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4
+; TUNIT-NEXT:    call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 [[TMP3]]) #[[ATTR5]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync nounwind
@@ -336,20 +342,13 @@ entry:
 }
 
 define internal void @level2all_late(i32* %addr) {
-; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define {{[^@]+}}@level2all_late
-; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; TUNIT-NEXT:    ret void
-;
-; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define {{[^@]+}}@level2all_late
-; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
-; CGSCC-NEXT:    store i32 5, i32* [[ADDR]], align 4
-; CGSCC-NEXT:    ret void
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define {{[^@]+}}@level2all_late
+; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4
+; CHECK-NEXT:    store i32 5, i32* [[ADDR]], align 4
+; CHECK-NEXT:    ret void
 ;
 entry:
   store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-reachability.ll b/llvm/test/Transforms/Attributor/value-simplify-reachability.ll
index e1353511705d1..ec1baf475afa7 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-reachability.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-reachability.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 
 @GInt1 = internal global i32 undef, align 4
@@ -711,7 +711,8 @@ define internal void @exclusion_set3_helper(i1 %c, ptr %p) {
 ; TUNIT-NEXT:    call void @usei32(i32 [[USE2]])
 ; TUNIT-NEXT:    br label [[T]]
 ; TUNIT:       m:
-; TUNIT-NEXT:    call void @usei32(i32 42)
+; TUNIT-NEXT:    [[USE3:%.*]] = load i32, ptr [[P]], align 4
+; TUNIT-NEXT:    call void @usei32(i32 [[USE3]])
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nosync

diff  --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
new file mode 100644
index 0000000000000..add432de2e34a
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+
+target triple = "amdgcn-amd-amdhsa"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+ at G = internal addrspace(3) global i32 undef, align 4
+
+; Make sure we do not delete the stores to @G without also replacing the load with `1`.
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+;.
+define void @kernel() "kernel" {
+; TUNIT: Function Attrs: norecurse
+; TUNIT-LABEL: define {{[^@]+}}@kernel
+; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
+; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+; TUNIT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
+; TUNIT-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; TUNIT:       if.then:
+; TUNIT-NEXT:    store i32 1, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    br label [[IF_MERGE:%.*]]
+; TUNIT:       if.else:
+; TUNIT-NEXT:    call void @barrier() #[[ATTR4:[0-9]+]]
+; TUNIT-NEXT:    [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    call void @use1(i32 [[L]]) #[[ATTR4]]
+; TUNIT-NEXT:    br label [[IF_MERGE]]
+; TUNIT:       if.merge:
+; TUNIT-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; TUNIT:       if.then2:
+; TUNIT-NEXT:    store i32 2, i32 addrspace(3)* @G, align 4
+; TUNIT-NEXT:    call void @barrier() #[[ATTR4]]
+; TUNIT-NEXT:    br label [[IF_END]]
+; TUNIT:       if.end:
+; TUNIT-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+; TUNIT-NEXT:    ret void
+;
+; CGSCC: Function Attrs: norecurse
+; CGSCC-LABEL: define {{[^@]+}}@kernel
+; CGSCC-SAME: () #[[ATTR0:[0-9]+]] {
+; CGSCC-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+; CGSCC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
+; CGSCC-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CGSCC:       if.then:
+; CGSCC-NEXT:    store i32 1, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    br label [[IF_MERGE:%.*]]
+; CGSCC:       if.else:
+; CGSCC-NEXT:    call void @barrier()
+; CGSCC-NEXT:    [[L:%.*]] = load i32, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    call void @use1(i32 [[L]])
+; CGSCC-NEXT:    br label [[IF_MERGE]]
+; CGSCC:       if.merge:
+; CGSCC-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; CGSCC:       if.then2:
+; CGSCC-NEXT:    store i32 2, i32 addrspace(3)* @G, align 4
+; CGSCC-NEXT:    call void @barrier()
+; CGSCC-NEXT:    br label [[IF_END]]
+; CGSCC:       if.end:
+; CGSCC-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+; CGSCC-NEXT:    ret void
+;
+  %call = call i32 @__kmpc_target_init(%struct.ident_t* undef, i8 1, i1 false)
+  %cmp = icmp eq i32 %call, -1
+  br i1 %cmp, label %if.then, label %if.else
+if.then:
+  store i32 1, i32 addrspace(3)* @G
+  br label %if.merge
+if.else:
+  call void @barrier();
+  %l = load i32, i32 addrspace(3)* @G
+  call void @use1(i32 %l)
+  br label %if.merge
+if.merge:
+  br i1 %cmp, label %if.then2, label %if.end
+if.then2:
+  store i32 2, i32 addrspace(3)* @G
+  call void @barrier();
+  br label %if.end
+if.end:
+  call void @__kmpc_target_deinit(%struct.ident_t* undef, i8 1)
+  ret void
+}
+
+declare void @barrier() norecurse nounwind nocallback
+declare void @use1(i32) nosync norecurse nounwind nocallback
+declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) nocallback
+declare void @__kmpc_target_deinit(%struct.ident_t*, i8) nocallback
+
+!llvm.module.flags = !{!0, !1}
+!nvvm.annotations = !{!2}
+
+!0 = !{i32 7, !"openmp", i32 50}
+!1 = !{i32 7, !"openmp-device", i32 50}
+!2 = !{void ()* @kernel, !"kernel", i32 1}
+
+;.
+; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
+; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
+; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
+; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback }
+; TUNIT: attributes #[[ATTR4]] = { nounwind }
+;.
+; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" }
+; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
+; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
+; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META2:![0-9]+]] = !{void ()* @kernel, !"kernel", i32 1}
+;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}


        


More information about the llvm-commits mailing list