[llvm] e28936f - [OpenMP][Opt] Annotate known runtime functions and deduplicate more

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 8 16:36:27 PST 2020


Author: Johannes Doerfert
Date: 2020-02-08T18:35:39-06:00
New Revision: e28936f6137c5a9c4f7673e248c192a9811543b6

URL: https://github.com/llvm/llvm-project/commit/e28936f6137c5a9c4f7673e248c192a9811543b6
DIFF: https://github.com/llvm/llvm-project/commit/e28936f6137c5a9c4f7673e248c192a9811543b6.diff

LOG: [OpenMP][Opt] Annotate known runtime functions and deduplicate more

This adds ~27 more runtime calls to the OpenMPKinds.def file, all with
attributes. We deduplicate 16 of those automatically in function =
thread scope. And we annotate all of them automatically during the
OpenMPOpt discovery step. A test with all omp_XXXX runtime calls to
track annotation coverage is included.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D69984

Added: 
    

Modified: 
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 8e016760dd67..3525c427b8bc 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -176,6 +176,34 @@ __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
 
 __OMP_RTL(omp_get_thread_num, false, Int32, )
+__OMP_RTL(omp_get_num_threads, false, Int32, )
+__OMP_RTL(omp_get_max_threads, false, Int32, )
+__OMP_RTL(omp_in_parallel, false, Int32, )
+__OMP_RTL(omp_get_dynamic, false, Int32, )
+__OMP_RTL(omp_get_cancellation, false, Int32, )
+__OMP_RTL(omp_get_nested, false, Int32, )
+__OMP_RTL(omp_get_schedule, false, Void, Int32Ptr, Int32Ptr)
+__OMP_RTL(omp_get_thread_limit, false, Int32, )
+__OMP_RTL(omp_get_supported_active_levels, false, Int32, )
+__OMP_RTL(omp_get_max_active_levels, false, Int32, )
+__OMP_RTL(omp_get_level, false, Int32, )
+__OMP_RTL(omp_get_ancestor_thread_num, false, Int32, )
+__OMP_RTL(omp_get_team_size, false, Int32, )
+__OMP_RTL(omp_get_active_level, false, Int32, )
+__OMP_RTL(omp_in_final, false, Int32, )
+__OMP_RTL(omp_get_proc_bind, false, Int32, )
+__OMP_RTL(omp_get_num_places, false, Int32, )
+__OMP_RTL(omp_get_num_procs, false, Int32, )
+__OMP_RTL(omp_get_place_proc_ids, false, Void, Int32, Int32Ptr)
+__OMP_RTL(omp_get_place_num, false, Int32, )
+__OMP_RTL(omp_get_partition_num_places, false, Int32, )
+__OMP_RTL(omp_get_partition_place_nums, false, Int32, )
+
+__OMP_RTL(omp_set_num_threads, false, Void, Int32)
+__OMP_RTL(omp_set_dynamic, false, Void, Int32)
+__OMP_RTL(omp_set_nested, false, Void, Int32)
+__OMP_RTL(omp_set_schedule, false, Void, Int32, Int32)
+__OMP_RTL(omp_set_max_active_levels, false, Void, Int32)
 
 __OMP_RTL(__last, false, Void, )
 
@@ -197,6 +225,16 @@ __OMP_ATTRS_SET(GetterAttrs,
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly),
                                    EnumAttr(NoSync), EnumAttr(NoFree))
                     : AttributeSet(EnumAttr(NoUnwind)))
+__OMP_ATTRS_SET(GetterArgWriteAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(NoFree))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+__OMP_ATTRS_SET(SetterAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly),
+                                   EnumAttr(NoSync), EnumAttr(NoFree))
+                    : AttributeSet(EnumAttr(NoUnwind)))
 
 #undef __OMP_ATTRS_SET
 #undef OMP_ATTRS_SET
@@ -213,6 +251,41 @@ __OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
 
 __OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
+                ArrayRef<AttributeSet>(
+                    {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
+                     AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))}))
+__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(),
+                {})
+__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(),
+                ArrayRef<AttributeSet>({AttributeSet(),
+                                        AttributeSet(EnumAttr(NoCapture),
+                                                     EnumAttr(WriteOnly))}))
+__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {})
 
 #undef __OMP_RTL_ATTRS
 #undef OMP_RTL_ATTRS

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 1822b5ec1072..03604b559504 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
@@ -52,9 +53,10 @@ struct OpenMPOpt {
             SmallPtrSetImpl<Function *> &ModuleSlice,
             CallGraphUpdater &CGUpdater)
       : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
-        CGUpdater(CGUpdater) {
+        OMPBuilder(M), CGUpdater(CGUpdater) {
     initializeTypes(M);
     initializeRuntimeFunctions();
+    OMPBuilder.initialize();
   }
 
   /// Generic information that describes a runtime function
@@ -118,12 +120,36 @@ struct OpenMPOpt {
   bool deduplicateRuntimeCalls() {
     bool Changed = false;
 
+    RuntimeFunction DeduplicableRuntimeCallIDs[] = {
+        OMPRTL_omp_get_num_threads,
+        OMPRTL_omp_in_parallel,
+        OMPRTL_omp_get_cancellation,
+        OMPRTL_omp_get_thread_limit,
+        OMPRTL_omp_get_supported_active_levels,
+        OMPRTL_omp_get_level,
+        OMPRTL_omp_get_ancestor_thread_num,
+        OMPRTL_omp_get_team_size,
+        OMPRTL_omp_get_active_level,
+        OMPRTL_omp_in_final,
+        OMPRTL_omp_get_proc_bind,
+        OMPRTL_omp_get_num_places,
+        OMPRTL_omp_get_num_procs,
+        OMPRTL_omp_get_place_num,
+        OMPRTL_omp_get_partition_num_places,
+        OMPRTL_omp_get_partition_place_nums};
+
+    // Global-tid is handled separatly.
     SmallSetVector<Value *, 16> GTIdArgs;
     collectGlobalThreadIdArguments(GTIdArgs);
     LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
                       << " global thread ID arguments\n");
 
     for (Function *F : SCC) {
+      for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
+        deduplicateRuntimeCalls(*F, RFIs[DeduplicableRuntimeCallID]);
+
+      // __kmpc_global_thread_num is special as we can replace it with an
+      // argument in enough cases to make it worth trying.
       Value *GTIdArg = nullptr;
       for (Argument &Arg : F->args())
         if (GTIdArgs.count(&Arg)) {
@@ -132,7 +158,6 @@ struct OpenMPOpt {
         }
       Changed |= deduplicateRuntimeCalls(
           *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
-      Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]);
     }
 
     return Changed;
@@ -259,6 +284,7 @@ struct OpenMPOpt {
       unsigned NumUses = 0;
       if (!RFI.Declaration)
         return NumUses;
+      OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
 
       NumOpenMPRuntimeFunctionsIdentified += 1;
       NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
@@ -312,6 +338,9 @@ struct OpenMPOpt {
   /// The slice of the module we are allowed to look at.
   SmallPtrSetImpl<Function *> &ModuleSlice;
 
+  /// An OpenMP-IR-Builder instance
+  OpenMPIRBuilder OMPBuilder;
+
   /// Callback to update the call graph, the first argument is a removed call,
   /// the second an optional replacement call.
   CallGraphUpdater &CGUpdater;


        


More information about the llvm-commits mailing list