[llvm] 578d507 - [OpenMP][FIX] Ensure to determine aligned regions properly

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 2 02:28:44 PST 2023


Author: Johannes Doerfert
Date: 2023-02-02T02:28:10-08:00
New Revision: 578d507359c6b4ad44f10dc6da6d8328e9743f31

URL: https://github.com/llvm/llvm-project/commit/578d507359c6b4ad44f10dc6da6d8328e9743f31
DIFF: https://github.com/llvm/llvm-project/commit/578d507359c6b4ad44f10dc6da6d8328e9743f31.diff

LOG: [OpenMP][FIX] Ensure to determine aligned regions properly

There were missing checks in the aligned region code, copy-paste errors
(= usage of the IsReachedFromAlignedBarrierOnly value instead of
IsReachingAlignedBarrierOnly value on the forward pass), and a missing
update of the call state for sync declarations and definitions.

Partially fixes https://github.com/llvm/llvm-project/issues/60425

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index be2c2b7d0161..7fd82df4910b 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2680,7 +2680,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
                                  const Instruction &I) const override {
     assert(I.getFunction() == getAnchorScope() &&
            "Instruction is out of scope!");
-    if (!isValidState() || isa<CallBase>(I))
+    if (!isValidState())
       return false;
 
     const Instruction *CurI;
@@ -2691,14 +2691,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
       auto *CB = dyn_cast<CallBase>(CurI);
       if (!CB)
         continue;
+      if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
+        break;
+      }
       const auto &It = CEDMap.find(CB);
       if (It == CEDMap.end())
         continue;
-      if (!It->getSecond().IsReachedFromAlignedBarrierOnly)
+      if (!It->getSecond().IsReachingAlignedBarrierOnly)
         return false;
+      break;
     } while ((CurI = CurI->getNextNonDebugInstruction()));
 
-    if (!CurI && !BEDMap.lookup(I.getParent()).IsReachedFromAlignedBarrierOnly)
+    if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
       return false;
 
     // Check backward until a call or the block beginning is reached.
@@ -2707,12 +2711,16 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
       auto *CB = dyn_cast<CallBase>(CurI);
       if (!CB)
         continue;
+      if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) {
+        break;
+      }
       const auto &It = CEDMap.find(CB);
       if (It == CEDMap.end())
         continue;
       if (!AA::isNoSyncInst(A, *CB, *this)) {
-        if (It->getSecond().IsReachedFromAlignedBarrierOnly)
+        if (It->getSecond().IsReachedFromAlignedBarrierOnly) {
           break;
+        }
         return false;
       }
 
@@ -3008,7 +3016,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
           if (EDAA.getState().isValidState()) {
             const auto &CalleeED = EDAA.getFunctionExecutionDomain();
             ED.IsReachedFromAlignedBarrierOnly =
-                CalleeED.IsReachedFromAlignedBarrierOnly;
+                CallED.IsReachedFromAlignedBarrierOnly =
+                    CalleeED.IsReachedFromAlignedBarrierOnly;
             AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
             if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
               ED.EncounteredNonLocalSideEffect |=
@@ -3023,8 +3032,9 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
             continue;
           }
         }
-        ED.IsReachedFromAlignedBarrierOnly =
-            IsNoSync && ED.IsReachedFromAlignedBarrierOnly;
+        if (!IsNoSync)
+          ED.IsReachedFromAlignedBarrierOnly =
+              CallED.IsReachedFromAlignedBarrierOnly = false;
         AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly;
         ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory();
         if (!IsNoSync)

diff  --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index ca9905af23cd..531765c78f84 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -8,17 +8,20 @@ target triple = "amdgcn-amd-amdhsa"
 
 @G = internal addrspace(3) global i32 undef, align 4
 @H = internal addrspace(3) global i32 undef, align 4
+ at X = internal addrspace(3) global i32 undef, align 4
 @str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
 
 ; Make sure we do not delete the stores to @G without also replacing the load with `1`.
 ;.
 ; TUNIT: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
 ; TUNIT: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
 ; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
 ; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
 ;.
 ; CGSCC: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
 ; CGSCC: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
 ; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
 ;.
 define void @kernel() "kernel" {
@@ -30,20 +33,17 @@ define void @kernel() "kernel" {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i32 1, ptr addrspace(3) @G, align 4
 ; CHECK-NEXT:    br label [[IF_MERGE:%.*]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    call void @barrier() #[[ATTR5:[0-9]+]]
-; CHECK-NEXT:    [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
-; CHECK-NEXT:    call void @use1(i32 [[L]]) #[[ATTR5]]
-; CHECK-NEXT:    call void @barrier() #[[ATTR5]]
+; CHECK-NEXT:    call void @barrier() #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    call void @use1(i32 undef) #[[ATTR6]]
+; CHECK-NEXT:    call void @barrier() #[[ATTR6]]
 ; CHECK-NEXT:    br label [[IF_MERGE]]
 ; CHECK:       if.merge:
-; CHECK-NEXT:    call void @use1(i32 2) #[[ATTR5]]
+; CHECK-NEXT:    call void @use1(i32 2) #[[ATTR6]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then2:
-; CHECK-NEXT:    store i32 2, ptr addrspace(3) @G, align 4
-; CHECK-NEXT:    call void @barrier() #[[ATTR5]]
+; CHECK-NEXT:    call void @barrier() #[[ATTR6]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    call void @__kmpc_target_deinit(ptr undef, i8 1)
@@ -87,6 +87,63 @@ define void @test_assume() {
   ret void
 }
 
+; We can't ignore the sync, hence this might store 2 into %p
+define void @kernel2(ptr %p) "kernel" {
+; CHECK-LABEL: define {{[^@]+}}@kernel2
+; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    store i32 1, ptr addrspace(3) @X, align 4
+; CHECK-NEXT:    call void @sync()
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
+; CHECK-NEXT:    store i32 2, ptr addrspace(3) @X, align 4
+; CHECK-NEXT:    store i32 [[V]], ptr [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 1, ptr addrspace(3) @X
+  call void @sync()
+  %v = load i32, ptr addrspace(3) @X
+  store i32 2, ptr addrspace(3) @X
+  store i32 %v, ptr %p
+  ret void
+}
+
+; We can't ignore the sync, hence this might store 2 into %p
+define void @kernel3(ptr %p) "kernel" {
+; TUNIT-LABEL: define {{[^@]+}}@kernel3
+; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:    store i32 1, ptr addrspace(3) @X, align 4
+; TUNIT-NEXT:    call void @sync_def.internalized()
+; TUNIT-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
+; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @X, align 4
+; TUNIT-NEXT:    store i32 [[V]], ptr [[P]], align 4
+; TUNIT-NEXT:    ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@kernel3
+; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT:    store i32 1, ptr addrspace(3) @X, align 4
+; CGSCC-NEXT:    call void @sync_def()
+; CGSCC-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
+; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @X, align 4
+; CGSCC-NEXT:    store i32 [[V]], ptr [[P]], align 4
+; CGSCC-NEXT:    ret void
+;
+  store i32 1, ptr addrspace(3) @X
+  call void @sync_def()
+  %v = load i32, ptr addrspace(3) @X
+  store i32 2, ptr addrspace(3) @X
+  store i32 %v, ptr %p
+  ret void
+}
+
+define void @sync_def() {
+; CHECK-LABEL: define {{[^@]+}}@sync_def() {
+; CHECK-NEXT:    call void @sync()
+; CHECK-NEXT:    ret void
+;
+  call void @sync()
+  ret void
+}
+
+declare void @sync()
 declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
 declare void @use1(i32) nosync norecurse nounwind nocallback
 declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback
@@ -94,24 +151,26 @@ declare void @__kmpc_target_deinit(ptr, i8) nocallback
 declare void @llvm.assume(i1)
 
 !llvm.module.flags = !{!0, !1}
-!nvvm.annotations = !{!2}
+!nvvm.annotations = !{!2, !3, !4}
 
 !0 = !{i32 7, !"openmp", i32 50}
 !1 = !{i32 7, !"openmp-device", i32 50}
 !2 = !{ptr @kernel, !"kernel", i32 1}
+!3 = !{ptr @kernel2, !"kernel", i32 1}
+!4 = !{ptr @kernel3, !"kernel", i32 1}
 
 ;.
 ; CHECK: attributes #[[ATTR0]] = { norecurse "kernel" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
-; CHECK: attributes #[[ATTR5]] = { nounwind }
+; CHECK: attributes #[[ATTR1]] = { "kernel" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+; CHECK: attributes #[[ATTR6]] = { nounwind }
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
 ; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
+; CHECK: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
+; CHECK: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
 ;.
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CGSCC: {{.*}}
-; TUNIT: {{.*}}


        


More information about the llvm-commits mailing list