[llvm] 24656e9 - [OpenMPOpt] The kernel end is not necessarily an aligned barrier

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 7 16:39:15 PDT 2023


Author: Johannes Doerfert
Date: 2023-07-07T16:38:34-07:00
New Revision: 24656e995ae9c5893fa32db62863e55c3c70c5e2

URL: https://github.com/llvm/llvm-project/commit/24656e995ae9c5893fa32db62863e55c3c70c5e2
DIFF: https://github.com/llvm/llvm-project/commit/24656e995ae9c5893fa32db62863e55c3c70c5e2.diff

LOG: [OpenMPOpt] The kernel end is not necessarily an aligned barrier

A kernel can be exited in a non-aligned fashion, so we cannot pretend it
always ends in an aligned barrier. Instead, we require an explicit
aligned barrier as we lack a divergence analysis at this point.

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 880e5560e03c69..ba0a72bda0c8c4 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2579,7 +2579,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
 
     SmallPtrSet<CallBase *, 16> DeletedBarriers;
     auto HandleAlignedBarrier = [&](CallBase *CB) {
-      const ExecutionDomainTy &ED = CEDMap[{CB, PRE}];
+      const ExecutionDomainTy &ED = CB ? CEDMap[{CB, PRE}] : BEDMap[nullptr];
       if (!ED.IsReachedFromAlignedBarrierOnly ||
           ED.EncounteredNonLocalSideEffect)
         return;
@@ -2913,11 +2913,10 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
   // Helper to deal with an aligned barrier encountered during the forward
   // traversal. \p CB is the aligned barrier, \p ED is the execution domain when
   // it was encountered.
-  auto HandleAlignedBarrier = [&](CallBase *CB, ExecutionDomainTy &ED) {
-    if (CB)
-      Changed |= AlignedBarriers.insert(CB);
+  auto HandleAlignedBarrier = [&](CallBase &CB, ExecutionDomainTy &ED) {
+    Changed |= AlignedBarriers.insert(&CB);
     // First, update the barrier ED kept in the separate CEDMap.
-    auto &CallInED = CEDMap[{CB, PRE}];
+    auto &CallInED = CEDMap[{&CB, PRE}];
     Changed |= mergeInPredecessor(A, CallInED, ED);
     CallInED.IsReachingAlignedBarrierOnly = true;
     // Next adjust the ED we use for the traversal.
@@ -2925,9 +2924,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
     ED.IsReachedFromAlignedBarrierOnly = true;
     // Aligned barrier collection has to come last.
     ED.clearAssumeInstAndAlignedBarriers();
-    if (CB)
-      ED.addAlignedBarrier(A, *CB);
-    auto &CallOutED = CEDMap[{CB, POST}];
+    ED.addAlignedBarrier(A, CB);
+    auto &CallOutED = CEDMap[{&CB, POST}];
     Changed |= mergeInPredecessor(A, CallOutED, ED);
   };
 
@@ -2946,6 +2944,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
     // TODO: We use local reasoning since we don't have a divergence analysis
     // 	     running as well. We could basically allow uniform branches here.
     bool AlignedBarrierLastInBlock = IsEntryBB && IsKernel;
+    bool IsExplicitlyAligned = IsEntryBB && IsKernel;
     ExecutionDomainTy ED;
     // Propagate "incoming edges" into information about this block.
     if (IsEntryBB) {
@@ -3020,14 +3019,16 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
           AANoSync::isAlignedBarrier(*CB, AlignedBarrierLastInBlock);
 
       AlignedBarrierLastInBlock &= IsNoSync;
+      IsExplicitlyAligned &= IsNoSync;
 
       // Next we check for calls. Aligned barriers are handled
       // explicitly, everything else is kept for the backward traversal and will
       // also affect our state.
       if (CB) {
         if (IsAlignedBarrier) {
-          HandleAlignedBarrier(CB, ED);
+          HandleAlignedBarrier(*CB, ED);
           AlignedBarrierLastInBlock = true;
+          IsExplicitlyAligned = true;
           continue;
         }
 
@@ -3129,14 +3130,16 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
       Changed |= mergeInPredecessor(A, InterProceduralED, ED);
 
       auto &FnED = BEDMap[nullptr];
+      if (IsKernel && !IsExplicitlyAligned)
+        FnED.IsReachingAlignedBarrierOnly = false;
+      Changed |= mergeInPredecessor(A, FnED, ED);
+
       if (!FnED.IsReachingAlignedBarrierOnly) {
         IsEndAndNotReachingAlignedBarriersOnly = true;
         SyncInstWorklist.push_back(BB.getTerminator());
         auto &BBED = BEDMap[&BB];
         Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false);
       }
-      if (IsKernel)
-        HandleAlignedBarrier(nullptr, ED);
     }
 
     ExecutionDomainTy &StoredED = BEDMap[&BB];

diff  --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index f431906f7027a4..b91cf8e6801eb1 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -252,26 +252,32 @@ S:
   ret void
 }
 
-; FIXME: We should not replace the load or delete the second store.
+; We should not replace the load or delete the second store.
 define void @kernel4b1(i1 %c) "kernel" {
 ; TUNIT-LABEL: define {{[^@]+}}@kernel4b1
 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:    store i32 0, ptr addrspace(3) @QB1, align 4
 ; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
 ; TUNIT:       L:
 ; TUNIT-NEXT:    call void @sync()
-; TUNIT-NEXT:    call void @use1(i32 0) #[[ATTR7]]
+; TUNIT-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
+; TUNIT-NEXT:    call void @use1(i32 [[V]]) #[[ATTR7]]
 ; TUNIT-NEXT:    ret void
 ; TUNIT:       S:
+; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @QB1, align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC-LABEL: define {{[^@]+}}@kernel4b1
 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT:    store i32 0, ptr addrspace(3) @QB1, align 4
 ; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
 ; CGSCC:       L:
 ; CGSCC-NEXT:    call void @sync()
-; CGSCC-NEXT:    call void @use1(i32 0) #[[ATTR6]]
+; CGSCC-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
+; CGSCC-NEXT:    call void @use1(i32 [[V]]) #[[ATTR6]]
 ; CGSCC-NEXT:    ret void
 ; CGSCC:       S:
+; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @QB1, align 4
 ; CGSCC-NEXT:    ret void
 ;
   store i32 0, ptr addrspace(3) @QB1
@@ -328,7 +334,7 @@ define void @kernel4b2(i1 %c) "kernel" {
 ; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
 ; TUNIT:       L:
 ; TUNIT-NEXT:    call void @sync()
-; TUNIT-NEXT:    call void @use1(i32 undef) #[[ATTR7]]
+; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
 ; TUNIT-NEXT:    ret void
 ; TUNIT:       S:
 ; TUNIT-NEXT:    ret void
@@ -338,7 +344,7 @@ define void @kernel4b2(i1 %c) "kernel" {
 ; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
 ; CGSCC:       L:
 ; CGSCC-NEXT:    call void @sync()
-; CGSCC-NEXT:    call void @use1(i32 undef) #[[ATTR6]]
+; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
 ; CGSCC-NEXT:    ret void
 ; CGSCC:       S:
 ; CGSCC-NEXT:    ret void


        


More information about the llvm-commits mailing list