[llvm] 4009f84 - [OpenMPOpt] Check for execution with an aligned barrier
Johannes Doerfert via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 7 16:39:14 PDT 2023
Author: Johannes Doerfert
Date: 2023-07-07T16:38:33-07:00
New Revision: 4009f84d2df53793278c913eb9bb45f9f7363366
URL: https://github.com/llvm/llvm-project/commit/4009f84d2df53793278c913eb9bb45f9f7363366
DIFF: https://github.com/llvm/llvm-project/commit/4009f84d2df53793278c913eb9bb45f9f7363366.diff
LOG: [OpenMPOpt] Check for execution with an aligned barrier
If the next or last synchronizing instruction was an aligned barrier,
the instruction is executed in an aligned region.
Added:
Modified:
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index e1b4b1f68d1e79..880e5560e03c69 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2674,6 +2674,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
if (!isValidState())
return false;
+ bool ForwardIsOk = true;
const Instruction *CurI;
// Check forward until a call or the block end is reached.
@@ -2682,19 +2683,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
auto *CB = dyn_cast<CallBase>(CurI);
if (!CB)
continue;
- if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
- break;
- }
+ if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
+ return true;
const auto &It = CEDMap.find({CB, PRE});
if (It == CEDMap.end())
continue;
if (!It->getSecond().IsReachingAlignedBarrierOnly)
- return false;
+ ForwardIsOk = false;
break;
} while ((CurI = CurI->getNextNonDebugInstruction()));
if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
- return false;
+ ForwardIsOk = false;
// Check backward until a call or the block beginning is reached.
CurI = &I;
@@ -2702,9 +2702,8 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
auto *CB = dyn_cast<CallBase>(CurI);
if (!CB)
continue;
- if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
- break;
- }
+ if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
+ return true;
const auto &It = CEDMap.find({CB, POST});
if (It == CEDMap.end())
continue;
@@ -2713,6 +2712,11 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
return false;
} while ((CurI = CurI->getPrevNonDebugInstruction()));
+ // Delayed decision on the forward pass to allow aligned barrier detection
+ // in the backwards traversal.
+ if (!ForwardIsOk)
+ return false;
+
if (!CurI) {
const BasicBlock *BB = I.getParent();
if (BB == &BB->getParent()->getEntryBlock())
diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index ed4912810be28e..f431906f7027a4 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -21,6 +21,8 @@ target triple = "amdgcn-amd-amdhsa"
@QB3 = internal addrspace(3) global i32 undef, align 4
@QC3 = internal addrspace(3) global i32 undef, align 4
@QD3 = internal addrspace(3) global i32 undef, align 4
+ at UAA1 = internal addrspace(3) global i32 undef, align 4
+ at UAA2 = internal addrspace(3) global i32 undef, align 4
@str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
; Make sure we do not delete the stores to @G without also replacing the load with `1`.
@@ -40,6 +42,8 @@ target triple = "amdgcn-amd-amdhsa"
; TUNIT: @QB3 = internal addrspace(3) global i32 undef, align 4
; TUNIT: @QC3 = internal addrspace(3) global i32 undef, align 4
; TUNIT: @QD3 = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @UAA1 = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @UAA2 = internal addrspace(3) global i32 undef, align 4
; TUNIT: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
; TUNIT: @kernel_nested_parallelism = weak constant i8 0
;.
@@ -58,6 +62,8 @@ target triple = "amdgcn-amd-amdhsa"
; CGSCC: @QB3 = internal addrspace(3) global i32 undef, align 4
; CGSCC: @QC3 = internal addrspace(3) global i32 undef, align 4
; CGSCC: @QD3 = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @UAA1 = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @UAA2 = internal addrspace(3) global i32 undef, align 4
; CGSCC: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
;.
define void @kernel() "kernel" {
@@ -646,6 +652,86 @@ S:
ret void
}
+define void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
+; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
+; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
+; TUNIT: L:
+; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
+; TUNIT-NEXT: ret void
+; TUNIT: S:
+; TUNIT-NEXT: call void @sync()
+; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
+; TUNIT-NEXT: call void @sync()
+; TUNIT-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
+; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
+; CGSCC: L:
+; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
+; CGSCC-NEXT: ret void
+; CGSCC: S:
+; CGSCC-NEXT: call void @sync()
+; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
+; CGSCC-NEXT: call void @sync()
+; CGSCC-NEXT: ret void
+;
+ br i1 %c, label %S, label %L
+L:
+ call void @barrier();
+ %v = load i32, ptr addrspace(3) @UAA1
+ call void @use1(i32 %v)
+ ret void
+S:
+ call void @sync();
+ store i32 2, ptr addrspace(3) @UAA1
+ call void @barrier();
+ call void @sync();
+ ret void
+}
+
+define void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
+; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
+; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
+; TUNIT: L:
+; TUNIT-NEXT: call void @sync()
+; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]]
+; TUNIT-NEXT: ret void
+; TUNIT: S:
+; TUNIT-NEXT: call void @sync()
+; TUNIT-NEXT: call void @barrier() #[[ATTR7]]
+; TUNIT-NEXT: call void @sync()
+; TUNIT-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
+; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
+; CGSCC: L:
+; CGSCC-NEXT: call void @sync()
+; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]]
+; CGSCC-NEXT: ret void
+; CGSCC: S:
+; CGSCC-NEXT: call void @sync()
+; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
+; CGSCC-NEXT: call void @sync()
+; CGSCC-NEXT: ret void
+;
+ br i1 %c, label %S, label %L
+L:
+ call void @sync();
+ %v = load i32, ptr addrspace(3) @UAA2
+ call void @use1(i32 %v)
+ ret void
+S:
+ call void @sync();
+ store i32 2, ptr addrspace(3) @UAA2
+ call void @barrier();
+ call void @sync();
+ ret void
+}
+
declare void @sync()
declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
declare void @use1(i32) nosync norecurse nounwind nocallback
@@ -654,7 +740,7 @@ declare void @__kmpc_target_deinit(ptr, i8) nocallback
declare void @llvm.assume(i1)
!llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
+!nvvm.annotations = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18}
!0 = !{i32 7, !"openmp", i32 50}
!1 = !{i32 7, !"openmp-device", i32 50}
@@ -673,6 +759,8 @@ declare void @llvm.assume(i1)
!14 = !{ptr @kernel4d2, !"kernel", i32 1}
!15 = !{ptr @kernel4c3, !"kernel", i32 1}
!16 = !{ptr @kernel4d3, !"kernel", i32 1}
+!17 = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
+!18 = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
;.
; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
@@ -709,4 +797,6 @@ declare void @llvm.assume(i1)
; CHECK: [[META14:![0-9]+]] = !{ptr @kernel4d2, !"kernel", i32 1}
; CHECK: [[META15:![0-9]+]] = !{ptr @kernel4c3, !"kernel", i32 1}
; CHECK: [[META16:![0-9]+]] = !{ptr @kernel4d3, !"kernel", i32 1}
+; CHECK: [[META17:![0-9]+]] = !{ptr @kernel_unknown_and_aligned1, !"kernel", i32 1}
+; CHECK: [[META18:![0-9]+]] = !{ptr @kernel_unknown_and_aligned2, !"kernel", i32 1}
;.
More information about the llvm-commits
mailing list