[PATCH] D99506: [OpenMP][NFC] Move the `noinline` to the parallel entry point
Johannes Doerfert via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 29 09:10:46 PDT 2021
jdoerfert updated this revision to Diff 333895.
jdoerfert added a comment.
Add test for nvptx codegen, including wrapper attribute check
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D99506/new/
https://reviews.llvm.org/D99506
Files:
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/test/OpenMP/nvptx_parallel_codegen.cpp
Index: clang/test/OpenMP/nvptx_parallel_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -4,7 +4,7 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix PAR
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix SEQ
-// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix SEQ
+// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -disable-O0-optnone | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix SEQ
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix PAR
// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix PAR
// expected-no-diagnostics
@@ -318,7 +318,8 @@
// CHECK: [[EXIT]]
// CHECK: ret void
-// CHECK: define internal void [[PARALLEL_FN4]](
+// CHECK: noinline
+// CHECK-NEXT: define internal void [[PARALLEL_FN4]](
// CHECK: [[A:%.+]] = alloca i[[SZ:32|64]],
// CHECK: store i[[SZ]] 45, i[[SZ]]* %a,
// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
@@ -326,6 +327,9 @@
// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]]
+// CHECK: Function Attrs: convergent noinline norecurse nounwind
+// CHECK-NEXT: [[PARALLEL_FN4]]_wrapper
+
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker()
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}(
// CHECK-32: [[A_ADDR:%.+]] = alloca i32,
@@ -373,7 +377,6 @@
// CHECK: store i32 [[NEW_CC_VAL]], i32* [[CC]],
// CHECK: br label
-
// CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]]
// CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]]
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2093,14 +2093,6 @@
// Force inline this outlined function at its call site.
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
- // Ensure we do not inline the function. This is trivially true for the ones
- // passed to __kmpc_fork_call but the ones calles in serialized regions
- // could be inlined. This is not a perfect but it is closer to the invariant
- // we want, namely, every data environment starts with a new function.
- // TODO: We should pass the if condition to the runtime function and do the
- // handling there. Much cleaner code.
- cast<llvm::Function>(OutlinedFn)->addFnAttr(llvm::Attribute::NoInline);
-
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
@@ -4216,6 +4208,15 @@
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());
+
+ // Ensure we do not inline the function. This is trivially true for the ones
+ // passed to __kmpc_fork_call but the ones calles in serialized regions
+ // could be inlined. This is not a perfect but it is closer to the invariant
+ // we want, namely, every data environment starts with a new function.
+ // TODO: We should pass the if condition to the runtime function and do the
+ // handling there. Much cleaner code.
+ Fn->addFnAttr(llvm::Attribute::NoInline);
+
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
Fn->setDoesNotRecurse();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D99506.333895.patch
Type: text/x-patch
Size: 5283 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20210329/11632986/attachment.bin>
More information about the cfe-commits
mailing list