[llvm] 22c898d - [OpenMP] Use Attributor to find underlying objects of stores

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 9 23:35:07 PST 2023


Author: Johannes Doerfert
Date: 2023-01-09T23:34:52-08:00
New Revision: 22c898dbfd3a79c90a78dfc2af28928eeb167ecf

URL: https://github.com/llvm/llvm-project/commit/22c898dbfd3a79c90a78dfc2af28928eeb167ecf
DIFF: https://github.com/llvm/llvm-project/commit/22c898dbfd3a79c90a78dfc2af28928eeb167ecf.diff

LOG: [OpenMP] Use Attributor to find underlying objects of stores

When we see a store in generic mode we need to decide if we should guard
it for SPMDzation. This patch changes the getUnderlyingObjects call to
the more optimistic getAssumedUnderlyingObjects call to identify more
thread local pointers.

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll
    llvm/test/Transforms/OpenMP/spmdization_guarding.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5cf29bb1eafc..44871e94a71c 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -4070,23 +4070,21 @@ struct AAKernelInfoFunction : AAKernelInfo {
       if (!I.mayWriteToMemory())
         return true;
       if (auto *SI = dyn_cast<StoreInst>(&I)) {
-        SmallVector<const Value *> Objects;
-        getUnderlyingObjects(SI->getPointerOperand(), Objects);
-        if (llvm::all_of(Objects,
-                         [](const Value *Obj) { return isa<AllocaInst>(Obj); }))
-          return true;
-        // Check for AAHeapToStack moved objects which must not be guarded.
+        const auto &UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>(
+            *this, IRPosition::value(*SI->getPointerOperand()),
+            DepClassTy::OPTIONAL);
         auto &HS = A.getAAFor<AAHeapToStack>(
             *this, IRPosition::function(*I.getFunction()),
             DepClassTy::OPTIONAL);
-        if (llvm::all_of(Objects, [&HS](const Value *Obj) {
-              auto *CB = dyn_cast<CallBase>(Obj);
-              if (!CB)
-                return false;
-              return HS.isAssumedHeapToStack(*CB);
-            })) {
+        if (UnderlyingObjsAA.forallUnderlyingObjects([&](Value &Obj) {
+              if (AA::isAssumedThreadLocalObject(A, Obj, *this))
+                return true;
+              // Check for AAHeapToStack moved objects which must not be
+              // guarded.
+              auto *CB = dyn_cast<CallBase>(&Obj);
+              return CB && HS.isAssumedHeapToStack(*CB);
+            }))
           return true;
-        }
       }
 
       // Insert instruction that needs guarding.

diff  --git a/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll b/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll
index 0f8c0beb5a51..822b701c2f4d 100644
--- a/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll
+++ b/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll
@@ -12,6 +12,7 @@ define internal i32 @nblist() {
   ret i32 0
 }
 
+
 define fastcc void @rec(ptr %0, i64 %1) {
 ; CHECK-LABEL: define {{[^@]+}}@rec(
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]

diff  --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 8d8b333a49a5..e99ad4aef386 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -40,6 +40,7 @@ target triple = "nvptx64"
 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
 @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode = weak constant i8 1
 @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
+ at LocGlob = private unnamed_addr addrspace(5) global i32 43
 
 ; Function Attrs: convergent norecurse nounwind
 ;.
@@ -47,12 +48,14 @@ target triple = "nvptx64"
 ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
+; CHECK: @[[LOCGLOB:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(5) global i32 43
 ; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
 ;.
 ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
 ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
 ; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
+; CHECK-DISABLED: @[[LOCGLOB:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(5) global i32 43
 ; CHECK-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) #0 {
@@ -60,11 +63,16 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
 ; CHECK-SAME: (ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8
+; CHECK-NEXT:    [[LOC:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[AL32:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK:       user_code.entry:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
+; CHECK-NEXT:    store ptr [[SELECT]], ptr [[LOC]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
 ; CHECK-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
 ; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[N]], 32
@@ -179,6 +187,8 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
 ; CHECK-DISABLED-NEXT:  entry:
 ; CHECK-DISABLED-NEXT:    [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8
 ; CHECK-DISABLED-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLED-NEXT:    [[LOC:%.*]] = alloca ptr, align 8
+; CHECK-DISABLED-NEXT:    [[AL32:%.*]] = alloca i32, align 4
 ; CHECK-DISABLED-NEXT:    [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
 ; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]]
 ; CHECK-DISABLED-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
@@ -217,6 +227,9 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
 ; CHECK-DISABLED-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-DISABLED-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
 ; CHECK-DISABLED:       user_code.entry:
+; CHECK-DISABLED-NEXT:    [[C:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-DISABLED-NEXT:    [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
+; CHECK-DISABLED-NEXT:    store ptr [[SELECT]], ptr [[LOC]], align 8
 ; CHECK-DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
 ; CHECK-DISABLED-NEXT:    store i32 0, ptr [[X]], align 4, !noalias !8
 ; CHECK-DISABLED-NEXT:    [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
@@ -262,12 +275,17 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
 ; CHECK-DISABLED-NEXT:    ret void
 ;
 entry:
+  %loc = alloca ptr
+  %al32 = alloca i32
   %N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32
   %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3
   %exec_user_code = icmp eq i32 %0, -1
   br i1 %exec_user_code, label %user_code.entry, label %worker.exit
 
 user_code.entry:                                  ; preds = %entry
+  %c = icmp eq i64 %N, 42
+  %select = select i1 %c, ptr %al32, ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
+  store ptr %select, ptr %loc
   %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
   store i32 0, ptr %x, align 4, !noalias !8
   %arrayidx1.i = getelementptr inbounds i32, ptr %x, i64 1
@@ -292,6 +310,9 @@ for.body.i:                                       ; preds = %for.cond.i
   %idxprom4.i = zext i32 %i.0.i to i64
   %arrayidx5.i = getelementptr inbounds i32, ptr %x, i64 %idxprom4.i
   store i32 %sub3.i, ptr %arrayidx5.i, align 4, !noalias !8
+  ; No Need to guard these accesses
+  %l = load ptr, ptr %loc
+  store i32 4711, ptr %l
   %inc.i = add nuw nsw i32 %i.0.i, 1
   br label %for.cond.i, !llvm.loop !11
 
@@ -359,6 +380,7 @@ define weak i32 @__kmpc_target_init(ptr, i8, i1) {
 
 ; Function Attrs: convergent
 declare i32 @no_openmp(ptr) #1
+declare void @no_openmp_i32(i32) #1
 
 ; Function Attrs: convergent nounwind readonly willreturn
 declare void @pure() #2


        


More information about the llvm-commits mailing list