[llvm] 9e2fc0b - [OpenMP] Check OpenMP assumptions on call-sites as well

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 10 11:53:02 PDT 2021


Author: Joseph Huber
Date: 2021-09-10T14:52:47-04:00
New Revision: 9e2fc0ba3706589ef740035e6779c62bc76b9a50

URL: https://github.com/llvm/llvm-project/commit/9e2fc0ba3706589ef740035e6779c62bc76b9a50
DIFF: https://github.com/llvm/llvm-project/commit/9e2fc0ba3706589ef740035e6779c62bc76b9a50.diff

LOG: [OpenMP] Check OpenMP assumptions on call-sites as well

This patch adds functionality to check assumption attributes on call
sites as well.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D109376

Added: 
    llvm/test/Transforms/OpenMP/spmdization_assumes.ll

Modified: 
    llvm/include/llvm/IR/Assumptions.h
    llvm/lib/IR/Assumptions.cpp
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h
index f64616c25d876..70b6ddc09e904 100644
--- a/llvm/include/llvm/IR/Assumptions.h
+++ b/llvm/include/llvm/IR/Assumptions.h
@@ -21,6 +21,7 @@
 namespace llvm {
 
 class Function;
+class CallBase;
 
 /// The key we use for assumption attributes.
 constexpr StringRef AssumptionAttrKey = "llvm.assume";
@@ -45,6 +46,10 @@ struct KnownAssumptionString {
 /// Return true if \p F has the assumption \p AssumptionStr attached.
 bool hasAssumption(Function &F, const KnownAssumptionString &AssumptionStr);
 
+/// Return true if \p CB or the callee has the assumption \p AssumptionStr
+/// attached.
+bool hasAssumption(CallBase &CB, const KnownAssumptionString &AssumptionStr);
+
 } // namespace llvm
 
 #endif

diff  --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp
index 6498114cd60d5..918a77484df5f 100644
--- a/llvm/lib/IR/Assumptions.cpp
+++ b/llvm/lib/IR/Assumptions.cpp
@@ -11,12 +11,13 @@
 #include "llvm/IR/Assumptions.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
 
 using namespace llvm;
 
-bool llvm::hasAssumption(Function &F,
-                         const KnownAssumptionString &AssumptionStr) {
-  const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+namespace {
+bool hasAssumption(const Attribute &A,
+                   const KnownAssumptionString &AssumptionStr) {
   if (!A.isValid())
     return false;
   assert(A.isStringAttribute() && "Expected a string attribute!");
@@ -28,6 +29,23 @@ bool llvm::hasAssumption(Function &F,
     return Assumption == AssumptionStr;
   });
 }
+} // namespace
+
+bool llvm::hasAssumption(Function &F,
+                         const KnownAssumptionString &AssumptionStr) {
+  const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+  return ::hasAssumption(A, AssumptionStr);
+}
+
+bool llvm::hasAssumption(CallBase &CB,
+                         const KnownAssumptionString &AssumptionStr) {
+  if (Function *F = CB.getCalledFunction())
+    if (hasAssumption(*F, AssumptionStr))
+      return true;
+
+  const Attribute &A = CB.getFnAttr(AssumptionAttrKey);
+  return ::hasAssumption(A, AssumptionStr);
+}
 
 StringSet<> llvm::KnownAssumptionStrings({
     "omp_no_openmp",          // OpenMP 5.1

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5a1e24b71dfdc..10057f304e51c 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -3710,13 +3710,15 @@ struct AAKernelInfoCallSite : AAKernelInfo {
     Function *Callee = getAssociatedFunction();
 
     // Helper to lookup an assumption string.
-    auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) {
-      return Fn && hasAssumption(*Fn, AssumptionStr);
+    auto HasAssumption = [](CallBase &CB, StringRef AssumptionStr) {
+      return hasAssumption(CB, AssumptionStr);
     };
 
     // Check for SPMD-mode assumptions.
-    if (HasAssumption(Callee, "ompx_spmd_amenable"))
+    if (HasAssumption(CB, "ompx_spmd_amenable")) {
       SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+      indicateOptimisticFixpoint();
+    }
 
     // First weed out calls we do not care about, that is readonly/readnone
     // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
@@ -3738,8 +3740,8 @@ struct AAKernelInfoCallSite : AAKernelInfo {
 
         // Unknown callees might contain parallel regions, except if they have
         // an appropriate assumption attached.
-        if (!(HasAssumption(Callee, "omp_no_openmp") ||
-              HasAssumption(Callee, "omp_no_parallelism")))
+        if (!(HasAssumption(CB, "omp_no_openmp") ||
+              HasAssumption(CB, "omp_no_parallelism")))
           ReachedUnknownParallelRegions.insert(&CB);
 
         // If SPMDCompatibilityTracker is not fixed, we need to give up on the

diff  --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
new file mode 100644
index 0000000000000..75f81782b408f
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
+
+; void foo(double x) {
+; #pragma omp target map(from:x)
+;   {
+;     x = sin(M_PI);
+; #pragma omp parallel
+;     { }
+;   }
+; }
+
+target triple = "nvptx64"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+ at 0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
+ at 1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+ at __omp_offloading_fd02_404433c2_main_l5_exec_mode = weak constant i8 1
+ at llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
+
+; Function Attrs: alwaysinline convergent norecurse nounwind
+;.
+; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
+; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
+;.
+define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
+; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5
+; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false, i1 false) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       user_code.entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR3]]
+; CHECK-NEXT:    [[CALL_I:%.*]] = call double @__nv_sin(double 0x400921FB54442D18) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT:    br label [[REGION_CHECK_TID:%.*]]
+; CHECK:       region.check.tid:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
+; CHECK:       region.guarded:
+; CHECK-NEXT:    store double [[CALL_I]], double* [[X]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
+; CHECK:       region.guarded.end:
+; CHECK-NEXT:    br label [[REGION_BARRIER]]
+; CHECK:       region.barrier:
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
+; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
+; CHECK:       region.exit:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]]
+; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR3]]
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  %captured_vars_addrs = alloca [0 x i8*], align 8
+  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3
+  %exec_user_code = icmp eq i32 %0, -1
+  br i1 %exec_user_code, label %user_code.entry, label %common.ret
+
+common.ret:                                       ; preds = %entry, %user_code.entry
+  ret void
+
+user_code.entry:                                  ; preds = %entry
+  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
+  %call.i = call double @__nv_sin(double 0x400921FB54442D18) #6
+  store double %call.i, double* %x, align 8, !tbaa !8
+  %2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0
+  call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3
+  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 false, i1 true) #3
+  br label %common.ret
+}
+
+declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
+
+; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
+define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 {
+; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
+; CHECK-SAME: (i32* noalias nocapture [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
+entry:
+  ret void
+}
+
+; Function Attrs: norecurse nounwind
+define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
+; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
+; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
+; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** nonnull [[GLOBAL_ARGS]]) #[[ATTR3]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %global_args = alloca i8**, align 8
+  call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3
+  ret void
+}
+
+declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
+
+; Function Attrs: nounwind
+declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
+
+; Function Attrs: alwaysinline
+declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4
+
+declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
+
+; Function Attrs: convergent
+declare double @__nv_sin(double) local_unnamed_addr #5
+
+attributes #0 = { alwaysinline convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { nounwind }
+attributes #4 = { alwaysinline }
+attributes #5 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
+
+!omp_offload.info = !{!0}
+!nvvm.annotations = !{!1}
+!llvm.module.flags = !{!2, !3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
+!1 = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
+!2 = !{i32 1, !"wchar_size", i32 4}
+!3 = !{i32 7, !"openmp", i32 50}
+!4 = !{i32 7, !"openmp-device", i32 50}
+!5 = !{i32 7, !"PIC Level", i32 2}
+!6 = !{i32 7, !"frame-pointer", i32 2}
+!7 = !{!"clang version 14.0.0"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"double", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C/C++ TBAA"}
+;.
+; CHECK: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+; CHECK: attributes #[[ATTR1]] = { alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+; CHECK: attributes #[[ATTR2]] = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+; CHECK: attributes #[[ATTR3]] = { nounwind }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { alwaysinline }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind }
+; CHECK: attributes #[[ATTR7]] = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0}
+; CHECK: [[META1:![0-9]+]] = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1}
+; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META5:![0-9]+]] = !{i32 7, !"PIC Level", i32 2}
+; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; CHECK: [[META7:![0-9]+]] = !{!"clang version 14.0.0"}
+; CHECK: [[TBAA8]] = !{!9, !9, i64 0}
+; CHECK: [[META9:![0-9]+]] = !{!"double", !10, i64 0}
+; CHECK: [[META10:![0-9]+]] = !{!"omnipotent char", !11, i64 0}
+; CHECK: [[META11:![0-9]+]] = !{!"Simple C/C++ TBAA"}
+;.


        


More information about the llvm-commits mailing list