[llvm] d61d72d - [OpenMP] Remove noinline attributes in the device runtime

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 25 12:45:00 PDT 2022


Author: Joseph Huber
Date: 2022-07-25T15:44:50-04:00
New Revision: d61d72dae604c3258e25c00622b1a85861450303

URL: https://github.com/llvm/llvm-project/commit/d61d72dae604c3258e25c00622b1a85861450303
DIFF: https://github.com/llvm/llvm-project/commit/d61d72dae604c3258e25c00622b1a85861450303.diff

LOG: [OpenMP] Remove noinline attributes in the device runtime

We previously used the `noinline` attributes to specify some defintions
which should be kept alive in the runtime. These were then stripped
immediately in the OpenMPOpt module pass. However, Since the changes in
D130298, we not explicitly state which functions will have external
visiblity in the bitcode library. Additionally the OpenMPOpt module pass
should run before the inliner pass, so this shouldn't make a difference
in whether or not the functions will be alive for the initial pass of
OpenMPOpt. This should simplify the interface, and additionally save
time spend on scanning funciton names for noinline.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D130368

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    openmp/libomptarget/DeviceRTL/include/Synchronization.h
    openmp/libomptarget/DeviceRTL/src/Mapping.cpp
    openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
    openmp/libomptarget/DeviceRTL/src/State.cpp
    openmp/libomptarget/DeviceRTL/src/Synchronization.cpp

Removed: 
    llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 0b42fc1519918..ef2384faa2730 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -499,18 +499,6 @@ struct OMPInformationCache : public InformationCache {
   }
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
 
-    // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
-    // functions, except if `optnone` is present.
-    if (isOpenMPDevice(M)) {
-      for (Function &F : M) {
-        for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
-          if (F.hasFnAttribute(Attribute::NoInline) &&
-              F.getName().startswith(Prefix) &&
-              !F.hasFnAttribute(Attribute::OptimizeNone))
-            F.removeFnAttr(Attribute::NoInline);
-      }
-    }
-
     // TODO: We should attach the attributes defined in OMPKinds.def.
   }
 

diff  --git a/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll b/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll
deleted file mode 100644
index 349e2799de27a..0000000000000
--- a/llvm/test/Transforms/OpenMP/remove_noinline_attributes.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes
-; RUN: opt < %s -S -openmp-opt-cgscc        | FileCheck %s
-; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s
-
-declare void @unknown()
-
-; __kmpc functions
-define void @__kmpc_noinline() noinline nounwind {
-; CHECK: Function Attrs: nounwind
-; CHECK-LABEL: @__kmpc_noinline(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-; omp_X functions
-define void @omp_noinline() noinline nounwind {
-; CHECK: Function Attrs: nounwind
-; CHECK-LABEL: @omp_noinline(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-; _OMP namespace
-define void @_ZN4_OMP_noinline() noinline nounwind {
-; CHECK: Function Attrs: nounwind
-; CHECK-LABEL: @_ZN4_OMP_noinline(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-
-; Negative tests:
-
-define void @__kmpc_noinline_optnone() noinline optnone nounwind {
-; CHECK: Function Attrs: noinline nounwind optnone
-; CHECK-LABEL: @__kmpc_noinline_optnone(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-define void @omp_noinline_optnone() noinline optnone nounwind {
-; CHECK: Function Attrs: noinline nounwind optnone
-; CHECK-LABEL: @omp_noinline_optnone(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-; _OMP namespace
-define void @_ZN4_OMP_noinline_optnone() noinline optnone nounwind {
-; CHECK: Function Attrs: noinline nounwind optnone
-; CHECK-LABEL: @_ZN4_OMP_noinline_optnone(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-define void @a___kmpc_noinline() noinline nounwind {
-; CHECK: Function Attrs: noinline nounwind
-; CHECK-LABEL: @a___kmpc_noinline(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-define void @a_omp_noinline() noinline nounwind {
-; CHECK: Function Attrs: noinline nounwind
-; CHECK-LABEL: @a_omp_noinline(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-define void @a__ZN4_OMP_noinline() noinline nounwind {
-; CHECK: Function Attrs: noinline nounwind
-; CHECK-LABEL: @a__ZN4_OMP_noinline(
-; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret void
-;
-  call void @unknown()
-  ret void
-}
-
-!llvm.module.flags = !{!0, !1}
-
-!0 = !{i32 7, !"openmp", i32 50}
-!1 = !{i32 7, !"openmp-device", i32 50}

diff  --git a/openmp/libomptarget/DeviceRTL/include/Synchronization.h b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
index e33f37a659af2..4b8898f2ffb76 100644
--- a/openmp/libomptarget/DeviceRTL/include/Synchronization.h
+++ b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
@@ -29,15 +29,13 @@ void threads();
 
 /// Synchronizing threads is allowed even if they all hit 
diff erent instances of
 /// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
-/// restrictive in that it requires all threads to hit the same instance. The
-/// noinline is removed by the openmp-opt pass and helps to preserve the
-/// information till then.
+/// restrictive in that it requires all threads to hit the same instance.
 ///{
 #pragma omp begin assumes ext_aligned_barrier
 
 /// Synchronize all threads in a block, they are are reaching the same
 /// instruction (hence all threads in the block are "aligned").
-__attribute__((noinline)) void threadsAligned();
+void threadsAligned();
 
 #pragma omp end assumes
 ///}

diff  --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index 172bbbff68f8e..b161c55382236 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -289,17 +289,17 @@ bool mapping::isGenericMode() { return !isSPMDMode(); }
 ///}
 
 extern "C" {
-__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
+uint32_t __kmpc_get_hardware_thread_id_in_block() {
   FunctionTracingRAII();
   return mapping::getThreadIdInBlock();
 }
 
-__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
+uint32_t __kmpc_get_hardware_num_threads_in_block() {
   FunctionTracingRAII();
   return impl::getNumHardwareThreadsInBlock();
 }
 
-__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+uint32_t __kmpc_get_warp_size() {
   FunctionTracingRAII();
   return impl::getWarpSize();
 }

diff  --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
index 27d1ff2e5a55c..5b133b009a7f7 100644
--- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
@@ -243,8 +243,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
     __kmpc_end_sharing_variables();
 }
 
-__attribute__((noinline)) bool
-__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
+bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
   FunctionTracingRAII();
   // Work function and arguments for L1 parallel region.
   *WorkFn = state::ParallelRegionFn;
@@ -259,7 +258,7 @@ __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
   return ThreadIsActive;
 }
 
-__attribute__((noinline)) void __kmpc_kernel_end_parallel() {
+void __kmpc_kernel_end_parallel() {
   FunctionTracingRAII();
   // In case we have modified an ICV for this thread before a ThreadState was
   // created. We drop it now to not contaminate the next parallel region.

diff  --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 7a73330aa4cc0..92847f79829de 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -393,12 +393,12 @@ int omp_get_initial_device(void) { return -1; }
 }
 
 extern "C" {
-__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) {
+void *__kmpc_alloc_shared(uint64_t Bytes) {
   FunctionTracingRAII();
   return memory::allocShared(Bytes, "Frontend alloc shared");
 }
 
-__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
+void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
   FunctionTracingRAII();
   memory::freeShared(Ptr, Bytes, "Frontend free shared");
 }

diff  --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
index 43278715be8d1..350da0b460f15 100644
--- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -358,14 +358,12 @@ void __kmpc_barrier(IdentTy *Loc, int32_t TId) {
   impl::namedBarrier();
 }
 
-__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc,
-                                                          int32_t TId) {
+void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) {
   FunctionTracingRAII();
   synchronize::threadsAligned();
 }
 
-__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc,
-                                                             int32_t TId) {
+void __kmpc_barrier_simple_generic(IdentTy *Loc, int32_t TId) {
   FunctionTracingRAII();
   synchronize::threads();
 }


        


More information about the llvm-commits mailing list