[llvm] 4009f84 - [OpenMPOpt] Check for execution with an aligned barrier

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 7 16:39:14 PDT 2023


Author: Johannes Doerfert
Date: 2023-07-07T16:38:33-07:00
New Revision: 4009f84d2df53793278c913eb9bb45f9f7363366

URL: https://github.com/llvm/llvm-project/commit/4009f84d2df53793278c913eb9bb45f9f7363366
DIFF: https://github.com/llvm/llvm-project/commit/4009f84d2df53793278c913eb9bb45f9f7363366.diff

LOG: [OpenMPOpt] Check for execution with an aligned barrier

If the next or last synchronizing instruction was an aligned barrier,
the instruction is executed in an aligned region.

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index e1b4b1f68d1e79..880e5560e03c69 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2674,6 +2674,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
     if (!isValidState())
       return false;
 
+    bool ForwardIsOk = true;
     const Instruction *CurI;
 
     // Check forward until a call or the block end is reached.
@@ -2682,19 +2683,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
       auto *CB = dyn_cast<CallBase>(CurI);
       if (!CB)
         continue;
-      if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
-        break;
-      }
+      if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
+        return true;
       const auto &It = CEDMap.find({CB, PRE});
       if (It == CEDMap.end())
         continue;
       if (!It->getSecond().IsReachingAlignedBarrierOnly)
-        return false;
+        ForwardIsOk = false;
       break;
     } while ((CurI = CurI->getNextNonDebugInstruction()));
 
     if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
-      return false;
+      ForwardIsOk = false;
 
     // Check backward until a call or the block beginning is reached.
     CurI = &I;
@@ -2702,9 +2702,8 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
       auto *CB = dyn_cast<CallBase>(CurI);
       if (!CB)
         continue;
-      if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
-        break;
-      }
+      if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
+        return true;
       const auto &It = CEDMap.find({CB, POST});
       if (It == CEDMap.end())
         continue;
@@ -2713,6 +2712,11 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
       return false;
     } while ((CurI = CurI->getPrevNonDebugInstruction()));
 
+    // Delayed decision on the forward pass to allow aligned barrier detection
+    // in the backwards traversal.
+    if (!ForwardIsOk)
+      return false;
+
     if (!CurI) {
       const BasicBlock *BB = I.getParent();
       if (BB == &BB->getParent()->getEntryBlock())

diff  --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index ed4912810be28e..f431906f7027a4 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -21,6 +21,8 @@ target triple = "amdgcn-amd-amdhsa"
 @QB3 = internal addrspace(3) global i32 undef, align 4
 @QC3 = internal addrspace(3) global i32 undef, align 4
 @QD3 = internal addrspace(3) global i32 undef, align 4
+ at UAA1 = internal addrspace(3) global i32 undef, align 4
+ at UAA2 = internal addrspace(3) global i32 undef, align 4
 @str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
 
 ; Make sure we do not delete the stores to @G without also replacing the load with `1`.
@@ -40,6 +42,8 @@ target triple = "amdgcn-amd-amdhsa"
 ; TUNIT: @QB3 = internal addrspace(3) global i32 undef, align 4
 ; TUNIT: @QC3 = internal addrspace(3) global i32 undef, align 4
 ; TUNIT: @QD3 = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @UAA1 = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @UAA2 = internal addrspace(3) global i32 undef, align 4
 ; TUNIT: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
 ; TUNIT: @kernel_nested_parallelism = weak constant i8 0
 ;.
@@ -58,6 +62,8 @@ target triple = "amdgcn-amd-amdhsa"
 ; CGSCC: @QB3 = internal addrspace(3) global i32 undef, align 4
 ; CGSCC: @QC3 = internal addrspace(3) global i32 undef, align 4
 ; CGSCC: @QD3 = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @UAA1 = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @UAA2 = internal addrspace(3) global i32 undef, align 4
 ; CGSCC: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
 ;.
 define void @kernel() "kernel" {
@@ -646,6 +652,86 @@ S:
   ret void
 }
 
+define void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
+; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
+; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
+; TUNIT:       L:
+; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
+; TUNIT-NEXT:    ret void
+; TUNIT:       S:
+; TUNIT-NEXT:    call void @sync()
+; TUNIT-NEXT:    call void @barrier() #[[ATTR7]]
+; TUNIT-NEXT:    call void @sync()
+; TUNIT-NEXT:    ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
+; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
+; CGSCC:       L:
+; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
+; CGSCC-NEXT:    ret void
+; CGSCC:       S:
+; CGSCC-NEXT:    call void @sync()
+; CGSCC-NEXT:    call void @barrier() #[[ATTR6]]
+; CGSCC-NEXT:    call void @sync()
+; CGSCC-NEXT:    ret void
+;
+  br i1 %c, label %S, label %L
+L:
+  call void @barrier();
+  %v = load i32, ptr addrspace(3) @UAA1
+  call void @use1(i32 %v)
+  ret void
+S:
+  call void @sync();
+  store i32 2, ptr addrspace(3) @UAA1
+  call void @barrier();
+  call void @sync();
+  ret void
+}
+
+define void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
+; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
+; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
+; TUNIT:       L:
+; TUNIT-NEXT:    call void @sync()
+; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
+; TUNIT-NEXT:    ret void
+; TUNIT:       S:
+; TUNIT-NEXT:    call void @sync()
+; TUNIT-NEXT:    call void @barrier() #[[ATTR7]]
+; TUNIT-NEXT:    call void @sync()
+; TUNIT-NEXT:    ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
+; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
+; CGSCC:       L:
+; CGSCC-NEXT:    call void @sync()
+; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
+; CGSCC-NEXT:    ret void
+; CGSCC:       S:
+; CGSCC-NEXT:    call void @sync()
+; CGSCC-NEXT:    call void @barrier() #[[ATTR6]]
+; CGSCC-NEXT:    call void @sync()
+; CGSCC-NEXT:    ret void
+;
+  br i1 %c, label %S, label %L
+L:
+  call void @sync();
+  %v = load i32, ptr addrspace(3) @UAA2
+  call void @use1(i32 %v)
+  ret void
+S:
+  call void @sync();
+  store i32 2, ptr addrspace(3) @UAA2
+  call void @barrier();
+  call void @sync();
+  ret void
+}
+
 declare void @sync()
 declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
 declare void @use1(i32) nosync norecurse nounwind nocallback
@@ -654,7 +740,7 @@ declare void @__kmpc_target_deinit(ptr, i8) nocallback
 declare void @llvm.assume(i1)
 
 !llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
+!nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18}
 
 !0 = !{i32 7, !"openmp", i32 50}
 !1 = !{i32 7, !"openmp-device", i32 50}
@@ -673,6 +759,8 @@ declare void @llvm.assume(i1)
 !14 = !{ptr @kernel4d2, !"kernel", i32 1}
 !15 = !{ptr @kernel4c3, !"kernel", i32 1}
 !16 = !{ptr @kernel4d3, !"kernel", i32 1}
+!17 = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
+!18 = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
 
 ;.
 ; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
@@ -709,4 +797,6 @@ declare void @llvm.assume(i1)
 ; CHECK: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
 ; CHECK: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
 ; CHECK: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
+; CHECK: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
+; CHECK: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
 ;.


        


More information about the llvm-commits mailing list