[llvm] b89558a - [OpenMP][FIX] Properly track and lookup Execution Domains

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 20 17:44:58 PDT 2023


Author: Johannes Doerfert
Date: 2023-03-20T17:44:24-07:00
New Revision: b89558a2ae4b5b20a6f3e8ba0295439f947fd38c

URL: https://github.com/llvm/llvm-project/commit/b89558a2ae4b5b20a6f3e8ba0295439f947fd38c
DIFF: https://github.com/llvm/llvm-project/commit/b89558a2ae4b5b20a6f3e8ba0295439f947fd38c.diff

LOG: [OpenMP][FIX] Properly track and lookup Execution Domains

This is a two part fix. First, we need two Execution Domains (ED) to
track the values of a function. One for incoming values and one for
outgoing values. This was conflated before. Second, at the function
entry we need to look at the incoming information from call sites not
iterate over non-existing predecessors.

Added: 
    llvm/test/Transforms/Attributor/reduced/openmp_opt_global_read.ll
    llvm/test/Transforms/Attributor/reduced/openmp_opt_global_synced.ll

Modified: 
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 04ba96481fb01..624e04611f4f4 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -10999,7 +10999,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
     InformationCache &InfoCache = A.getInfoCache();
     if (InfoCache.isOnlyUsedByAssume(LI)) {
       if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) {
-            if (!I)
+            if (!I || isa<AssumeInst>(I))
               return true;
             if (auto *SI = dyn_cast<StoreInst>(I))
               return A.isAssumedDead(SI->getOperandUse(0), this,

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 7fd82df4910b2..a25537e00fec6 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instruction.h"
@@ -2663,7 +2664,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
                           bool InitialEdgeOnly = false);
 
   /// Accumulate information for the entry block in \p EntryBBED.
-  void handleEntryBB(Attributor &A, ExecutionDomainTy &EntryBBED);
+  void handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED);
 
   /// See AbstractAttribute::updateImpl.
   ChangeStatus updateImpl(Attributor &A) override;
@@ -2736,12 +2737,15 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
       break;
     } while ((CurI = CurI->getPrevNonDebugInstruction()));
 
-    if (!CurI &&
-        !llvm::all_of(
-            predecessors(I.getParent()), [&](const BasicBlock *PredBB) {
-              return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly;
-            })) {
-      return false;
+    if (!CurI) {
+      const BasicBlock *BB = I.getParent();
+      if (BB == &BB->getParent()->getEntryBlock())
+        return BEDMap.lookup(nullptr).IsReachedFromAlignedBarrierOnly;
+      if (!llvm::all_of(predecessors(BB), [&](const BasicBlock *PredBB) {
+            return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly;
+          })) {
+        return false;
+      }
     }
 
     // On neither traversal we found a anything but aligned barriers.
@@ -2761,7 +2765,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
   ExecutionDomainTy getFunctionExecutionDomain() const override {
     assert(isValidState() &&
            "No request should be made against an invalid state!");
-    return BEDMap.lookup(nullptr);
+    return InterProceduralED;
   }
   ///}
 
@@ -2810,6 +2814,9 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
     return false;
   };
 
+  /// Mapping containing information about the function for other AAs.
+  ExecutionDomainTy InterProceduralED;
+
   /// Mapping containing information per block.
   DenseMap<const BasicBlock *, ExecutionDomainTy> BEDMap;
   DenseMap<const CallBase *, ExecutionDomainTy> CEDMap;
@@ -2844,26 +2851,29 @@ void AAExecutionDomainFunction::mergeInPredecessor(
     ED.clearAssumeInstAndAlignedBarriers();
 }
 
-void AAExecutionDomainFunction::handleEntryBB(Attributor &A,
+void AAExecutionDomainFunction::handleCallees(Attributor &A,
                                               ExecutionDomainTy &EntryBBED) {
-  SmallVector<ExecutionDomainTy> PredExecDomains;
+  SmallVector<ExecutionDomainTy> CallSiteEDs;
   auto PredForCallSite = [&](AbstractCallSite ACS) {
     const auto &EDAA = A.getAAFor<AAExecutionDomain>(
         *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
         DepClassTy::OPTIONAL);
     if (!EDAA.getState().isValidState())
       return false;
-    PredExecDomains.emplace_back(
+    CallSiteEDs.emplace_back(
         EDAA.getExecutionDomain(*cast<CallBase>(ACS.getInstruction())));
     return true;
   };
 
+  ExecutionDomainTy ExitED;
   bool AllCallSitesKnown;
   if (A.checkForAllCallSites(PredForCallSite, *this,
                              /* RequiresAllCallSites */ true,
                              AllCallSitesKnown)) {
-    for (const auto &PredED : PredExecDomains)
-      mergeInPredecessor(A, EntryBBED, PredED);
+    for (const auto &CSED : CallSiteEDs) {
+      mergeInPredecessor(A, EntryBBED, CSED);
+      ExitED.IsReachingAlignedBarrierOnly &= CSED.IsReachingAlignedBarrierOnly;
+    }
 
   } else {
     // We could not find all predecessors, so this is either a kernel or a
@@ -2873,16 +2883,19 @@ void AAExecutionDomainFunction::handleEntryBB(Attributor &A,
       EntryBBED.IsExecutedByInitialThreadOnly = false;
       EntryBBED.IsReachedFromAlignedBarrierOnly = true;
       EntryBBED.EncounteredNonLocalSideEffect = false;
+      ExitED.IsReachingAlignedBarrierOnly = true;
     } else {
       EntryBBED.IsExecutedByInitialThreadOnly = false;
       EntryBBED.IsReachedFromAlignedBarrierOnly = false;
       EntryBBED.EncounteredNonLocalSideEffect = true;
+      ExitED.IsReachingAlignedBarrierOnly = false;
     }
   }
 
   auto &FnED = BEDMap[nullptr];
-  FnED.IsReachingAlignedBarrierOnly &=
+  FnED.IsReachedFromAlignedBarrierOnly &=
       EntryBBED.IsReachedFromAlignedBarrierOnly;
+  FnED.IsReachingAlignedBarrierOnly &= ExitED.IsReachingAlignedBarrierOnly;
 }
 
 ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
@@ -2934,7 +2947,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
     ExecutionDomainTy ED;
     // Propagate "incoming edges" into information about this block.
     if (IsEntryBB) {
-      handleEntryBB(A, ED);
+      handleCallees(A, ED);
     } else {
       // For live non-entry blocks we only propagate
       // information via live edges.
@@ -3073,18 +3086,24 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
         ED.EncounteredNonLocalSideEffect = true;
     }
 
+    bool IsEndAndNotReachingAlignedBarriersOnly = false;
     if (!isa<UnreachableInst>(BB.getTerminator()) &&
         !BB.getTerminator()->getNumSuccessors()) {
 
-      auto &FnED = BEDMap[nullptr];
-      mergeInPredecessor(A, FnED, ED);
+      mergeInPredecessor(A, InterProceduralED, ED);
 
+      auto &FnED = BEDMap[nullptr];
+      if (!FnED.IsReachingAlignedBarrierOnly) {
+        IsEndAndNotReachingAlignedBarriersOnly = true;
+        SyncInstWorklist.push_back(BB.getTerminator());
+      }
       if (IsKernel)
         HandleAlignedBarrier(nullptr, ED);
     }
 
     ExecutionDomainTy &StoredED = BEDMap[&BB];
-    ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly;
+    ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly &
+                                      !IsEndAndNotReachingAlignedBarriersOnly;
 
     // Check if we computed anything 
diff erent as part of the forward
     // traversal. We do not take assumptions and aligned barriers into account
@@ -3135,8 +3154,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
     }
     if (SyncBB != &EntryBB)
       continue;
-    auto &FnED = BEDMap[nullptr];
-    if (SetAndRecord(FnED.IsReachingAlignedBarrierOnly, false))
+    if (SetAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false))
       Changed = true;
   }
 

diff  --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_read.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_read.ll
new file mode 100644
index 0000000000000..94412b5627205
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_read.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs
+; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK
+
+ at IsSPMDMode = internal addrspace(3) global i32 undef
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.assume(i1 noundef) #0
+
+define weak_odr amdgpu_kernel void @__omp_offloading_16_2e1d69__ZN11qmcplusplus7ompBLAS9gemv_implIfEEiRiciiT_PKS3_iS5_iS3_PS3_i_l44() {
+bb:
+  %i36 = load i32, ptr addrspace(3) @IsSPMDMode, align 4
+  %i37 = icmp eq i32 %i36, 0
+  tail call void @llvm.assume(i1 %i37)
+  ret void
+}
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 7, !"openmp", i32 50}
+!1 = !{i32 7, !"openmp-device", i32 50}
+;.
+; CHECK: @[[ISSPMDMODE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef
+;.
+; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_2e1d69__ZN11qmcplusplus7ompBLAS9gemv_implIfEEiRiciiT_PKS3_iS5_iS3_PS3_i_l44() {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    tail call void @llvm.assume(i1 true)
+; CHECK-NEXT:    ret void
+;
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+;.

diff  --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_synced.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_synced.ll
new file mode 100644
index 0000000000000..5cdd0ffb79af8
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_synced.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs
+; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK
+
+ at _ZN4ompx5state9TeamStateE = internal addrspace(3) global ptr undef
+
+define internal fastcc i1 @__kmpc_kernel_parallel() {
+bb:
+  %i = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
+  %i1 = icmp eq ptr %i, null
+  ret i1 %i1
+}
+
+define weak_odr amdgpu_kernel void @__omp_offloading_16_2e1d69__ZN11qmcplusplus7ompBLAS9gemv_implIfEEiRiciiT_PKS3_iS5_iS3_PS3_i_l44() #1 {
+bb:
+  call void @barrier()
+  %i31 = call fastcc i1 @__kmpc_kernel_parallel()
+  call void @barrier()
+  store ptr @use, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
+  call void @barrier()
+  store ptr null, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
+  call void @barrier()
+  call void @use(i1 %i31)
+  ret void
+}
+
+declare void @use(i1)
+declare void @barrier() nocallback
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #1 = { "kernel" }
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 7, !"openmp", i32 50}
+!1 = !{i32 7, !"openmp-device", i32 50}
+;.
+; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global ptr undef
+;.
+; CHECK: Function Attrs: norecurse nosync nounwind memory(read)
+; CHECK-LABEL: define {{[^@]+}}@__kmpc_kernel_parallel
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I:%.*]] = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
+; CHECK-NEXT:    [[I1:%.*]] = icmp eq ptr [[I]], null
+; CHECK-NEXT:    ret i1 [[I1]]
+;
+;
+; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_2e1d69__ZN11qmcplusplus7ompBLAS9gemv_implIfEEiRiciiT_PKS3_iS5_iS3_PS3_i_l44
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    call void @barrier()
+; CHECK-NEXT:    [[I31:%.*]] = call fastcc i1 @__kmpc_kernel_parallel()
+; CHECK-NEXT:    call void @barrier()
+; CHECK-NEXT:    store ptr @use, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
+; CHECK-NEXT:    call void @barrier()
+; CHECK-NEXT:    store ptr null, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
+; CHECK-NEXT:    call void @barrier()
+; CHECK-NEXT:    call void @use(i1 [[I31]])
+; CHECK-NEXT:    ret void
+;
+;.
+; CHECK: attributes #[[ATTR0]] = { norecurse nosync nounwind memory(read) }
+; CHECK: attributes #[[ATTR1]] = { "kernel" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+;.

diff  --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index 531765c78f842..5c7737128daf0 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -37,6 +37,7 @@ define void @kernel() "kernel" {
 ; CHECK:       if.else:
 ; CHECK-NEXT:    call void @barrier() #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    call void @use1(i32 undef) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
 ; CHECK-NEXT:    call void @barrier() #[[ATTR6]]
 ; CHECK-NEXT:    br label [[IF_MERGE]]
 ; CHECK:       if.merge:


        


More information about the llvm-commits mailing list