[llvm] b2a8d2c - [OpenMP] Avoid running openmp-opt on dead functions

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 10 15:04:42 PST 2023


Author: Johannes Doerfert
Date: 2023-01-10T15:03:51-08:00
New Revision: b2a8d2c69bce98604f1feaad9e6bebd3759b5635

URL: https://github.com/llvm/llvm-project/commit/b2a8d2c69bce98604f1feaad9e6bebd3759b5635
DIFF: https://github.com/llvm/llvm-project/commit/b2a8d2c69bce98604f1feaad9e6bebd3759b5635.diff

LOG: [OpenMP] Avoid running openmp-opt on dead functions

The Attributor has logic to run only on assumed live functions and this
is exposed to users now. OpenMP-opt will (mostly) ignore dead internal
functions now but run the same deduction as before if an internal
function is marked live.

This should lower compile time as we run on less code and delete more
code early on. For the full OpenMC module compiled with noinline and
JITed at runtime, we save ~25%, or ~10s on my machine during JITing.

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/IPO/Attributor.h
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll
    llvm/test/Transforms/OpenMP/global_constructor.ll
    llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll
    llvm/test/Transforms/OpenMP/single_threaded_execution.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index dbf289242c5d8..272d71076923a 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1406,12 +1406,13 @@ struct AttributorConfig {
   bool RewriteSignatures = true;
 
   /// Flag to determine if we want to initialize all default AAs for an internal
-  /// function marked live.
-  /// TODO: This should probably be a callback, or maybe
-  /// identifyDefaultAbstractAttributes should be virtual, something to allow
-  /// customizable lazy initialization for internal functions.
+  /// function marked live. See also: InitializationCallback>
   bool DefaultInitializeLiveInternals = true;
 
+  /// Callback function to be invoked on internal functions marked live.
+  std::function<void(Attributor &A, const Function &F)> InitializationCallback =
+      nullptr;
+
   /// Helper to update an underlying call graph and to delete functions.
   CallGraphUpdater &CGUpdater;
 
@@ -1738,6 +1739,8 @@ struct Attributor {
 
     if (Configuration.DefaultInitializeLiveInternals)
       identifyDefaultAbstractAttributes(const_cast<Function &>(F));
+    if (Configuration.InitializationCallback)
+      Configuration.InitializationCallback(*this, F);
   }
 
   /// Helper function to remove callsite.

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index eebe8d347cb13..8bd61a2681552 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2163,8 +2163,13 @@ struct OpenMPOpt {
   void registerFoldRuntimeCall(RuntimeFunction RF);
 
   /// Populate the Attributor with abstract attribute opportunities in the
-  /// function.
+  /// functions.
   void registerAAs(bool IsModulePass);
+
+public:
+  /// Callback to register AAs for live functions, including internal functions
+  /// marked live during the traversal.
+  static void registerAAsForFunction(Attributor &A, const Function &F);
 };
 
 Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
@@ -4849,20 +4854,35 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
     if (F->isDeclaration())
       continue;
 
-    if (!DisableOpenMPOptDeglobalization)
-      A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
-    A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
-    if (!DisableOpenMPOptDeglobalization)
-      A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
-
-    for (auto &I : instructions(*F)) {
-      if (auto *LI = dyn_cast<LoadInst>(&I)) {
-        bool UsedAssumedInformation = false;
-        A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
-                               UsedAssumedInformation, AA::Interprocedural);
-      } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
-        A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
-      }
+    // We look at internal functions only on-demand but if any use is not a
+    // direct call or outside the current set of analyzed functions, we have
+    // to do it eagerly.
+    if (F->hasLocalLinkage()) {
+      if (llvm::all_of(F->uses(), [this](const Use &U) {
+            const auto *CB = dyn_cast<CallBase>(U.getUser());
+            return CB && CB->isCallee(&U) &&
+                   !A.isRunOn(const_cast<Function *>(CB->getCaller()));
+          }))
+        continue;
+    }
+    registerAAsForFunction(A, *F);
+  }
+}
+
+void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
+  if (!DisableOpenMPOptDeglobalization)
+    A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
+  A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(F));
+  if (!DisableOpenMPOptDeglobalization)
+    A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(F));
+
+  for (auto &I : instructions(F)) {
+    if (auto *LI = dyn_cast<LoadInst>(&I)) {
+      bool UsedAssumedInformation = false;
+      A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
+                             UsedAssumedInformation, AA::Interprocedural);
+    } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
+      A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
     }
   }
 }
@@ -5033,10 +5053,13 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
   }
 
   // Look at every function in the Module unless it was internalized.
+  SetVector<Function *> Functions;
   SmallVector<Function *, 16> SCC;
   for (Function &F : M)
-    if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
+    if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) {
       SCC.push_back(&F);
+      Functions.insert(&F);
+    }
 
   if (SCC.empty())
     return PreservedAnalyses::all();
@@ -5057,12 +5080,13 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
 
   AttributorConfig AC(CGUpdater);
   AC.DefaultInitializeLiveInternals = false;
+  AC.IsModulePass = true;
   AC.RewriteSignatures = false;
   AC.MaxFixpointIterations = MaxFixpointIterations;
   AC.OREGetter = OREGetter;
   AC.PassName = DEBUG_TYPE;
+  AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
 
-  SetVector<Function *> Functions;
   Attributor A(Functions, InfoCache, AC);
 
   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
@@ -5137,6 +5161,7 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
   AC.MaxFixpointIterations = MaxFixpointIterations;
   AC.OREGetter = OREGetter;
   AC.PassName = DEBUG_TYPE;
+  AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
 
   Attributor A(Functions, InfoCache, AC);
 

diff  --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll
index 5005a097faacf..57cbcb98794f5 100644
--- a/llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll
+++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll
@@ -29,19 +29,6 @@ define weak_odr ptr @h(ptr %0) {
 ; CHECK-NEXT:    ret void
 ;
 ;
-; CHECK: Function Attrs: norecurse nounwind memory(none)
-; CHECK-LABEL: define {{[^@]+}}@g
-; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    ret double 0.000000e+00
-;
-;
-; CHECK: Function Attrs: norecurse nosync nounwind memory(none)
-; CHECK-LABEL: define {{[^@]+}}@h.internalized
-; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i64 undef
-; CHECK-NEXT:    ret ptr [[TMP2]]
-;
-;
 ; CHECK-LABEL: define {{[^@]+}}@h
 ; CHECK-SAME: (ptr [[TMP0:%.*]]) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[TMP0]], align 4
@@ -50,7 +37,6 @@ define weak_odr ptr @h(ptr %0) {
 ;
 ;.
 ; CHECK: attributes #[[ATTR0]] = { norecurse nounwind memory(none) }
-; CHECK: attributes #[[ATTR1]] = { norecurse nosync nounwind memory(none) }
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
 ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}

diff  --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll
index 1c304ce743eda..a0b608b10d069 100644
--- a/llvm/test/Transforms/OpenMP/global_constructor.ll
+++ b/llvm/test/Transforms/OpenMP/global_constructor.ll
@@ -37,7 +37,7 @@ declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
 
 declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
 
-define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
+define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
 entry:
   %call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1
   %call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1
@@ -78,31 +78,29 @@ attributes #1 = { convergent nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
 ; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR1:[0-9]+]]
 ; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       user_code.entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; CHECK:       region.guarded:
 ; CHECK-NEXT:    store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
-; CHECK-NEXT:    tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR2]]
+; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
+; CHECK-NEXT:    tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR1]]
 ; CHECK-NEXT:    br label [[COMMON_RET]]
 ;
 ;
-; CHECK: Function Attrs: norecurse
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor
-; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]]
-; CHECK-NEXT:    [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]]
+; CHECK-NEXT:    [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0]]
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
 ; CHECK-NEXT:    store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]]
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll b/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll
index 822b701c2f4db..855bab8b5d944 100644
--- a/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll
+++ b/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll
@@ -29,19 +29,6 @@ define fastcc void @rec(ptr %0, i64 %1) {
 
 !0 = !{i32 7, !"openmp", i32 50}
 !1 = !{i32 7, !"openmp-device", i32 50}
-; MODULE-LABEL: define {{[^@]+}}@nblist
-; MODULE-SAME: () #[[ATTR0:[0-9]+]] {
-; MODULE-NEXT:    [[TMP1:%.*]] = call ptr @alloc()
-; MODULE-NEXT:    call fastcc void @rec.internalized(ptr [[TMP1]], i64 0)
-; MODULE-NEXT:    ret i32 0
-;
-;
-; MODULE-LABEL: define {{[^@]+}}@rec.internalized
-; MODULE-SAME: (ptr nocapture writeonly [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
-; MODULE-NEXT:    call fastcc void @rec.internalized(ptr nocapture writeonly [[TMP0]], i64 0) #[[ATTR2:[0-9]+]]
-; MODULE-NEXT:    ret void
-;
-;
 ; MODULE-LABEL: define {{[^@]+}}@rec
 ; MODULE-SAME: (ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
 ; MODULE-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[TMP1]]

diff  --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
index 16ccd2d726d36..fb523215e7d43 100644
--- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
+++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
@@ -51,6 +51,13 @@ if.end:
   ret void
 }
 
+; CHECK: [openmp-opt] Basic block @foo entry is executed by a single thread.
+; Function Attrs: noinline
+define internal void @foo() {
+entry:
+  ret void
+}
+
 ; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
 ; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread.
 ; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
@@ -72,13 +79,6 @@ if.end:
   ret void
 }
 
-; CHECK: [openmp-opt] Basic block @foo entry is executed by a single thread.
-; Function Attrs: noinline
-define internal void @foo() {
-entry:
-  ret void
-}
-
 ; CHECK: [openmp-opt] Basic block @bar.internalized entry is executed by a single thread.
 ; Function Attrs: noinline
 define void @bar() {


        


More information about the llvm-commits mailing list