r356472 - [OPENMP] Codegen for local variables with the allocate pragma.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 19 09:41:16 PDT 2019
Author: abataev
Date: Tue Mar 19 09:41:16 2019
New Revision: 356472
URL: http://llvm.org/viewvc/llvm-project?rev=356472&view=rev
Log:
[OPENMP] Codegen for local variables with the allocate pragma.
Added initial codegen for the local variables with the #pragma omp
allocate directive. Instead of allocating the variables on the stack,
__kmpc_alloc|__kmpc_free functions are used for memory (de-)allocation.
Added:
cfe/trunk/test/OpenMP/allocate_codegen.cpp
- copied, changed from r356458, cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=356472&r1=356471&r2=356472&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Tue Mar 19 09:41:16 2019
@@ -667,6 +667,10 @@ enum OpenMPRTLFunction {
// Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
OMPRTL__kmpc_task_reduction_get_th_data,
+ // Call to void *__kmpc_alloc(int gtid, size_t sz, const omp_allocator_t *al);
+ OMPRTL__kmpc_alloc,
+ // Call to void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *al);
+ OMPRTL__kmpc_free,
//
// Offloading related calls
@@ -2195,6 +2199,26 @@ llvm::FunctionCallee CGOpenMPRuntime::cr
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
+ case OMPRTL__kmpc_alloc: {
+ // Build to void *__kmpc_alloc(int gtid, size_t sz, const omp_allocator_t
+ // *al);
+ // omp_allocator_t type is void *.
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
+ break;
+ }
+ case OMPRTL__kmpc_free: {
+ // Build to void __kmpc_free(int gtid, void *ptr, const omp_allocator_t
+ // *al);
+ // omp_allocator_t type is void *.
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
+ break;
+ }
case OMPRTL__kmpc_push_target_tripcount: {
// Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
// size);
@@ -9669,8 +9693,81 @@ Address CGOpenMPRuntime::getParameterAdd
return CGF.GetAddrOfLocalVar(NativeParam);
}
+namespace {
+/// Cleanup action for allocate support.
+class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
+public:
+ static const int CleanupArgs = 3;
+
+private:
+ llvm::FunctionCallee RTLFn;
+ llvm::Value *Args[CleanupArgs];
+
+public:
+ OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
+ ArrayRef<llvm::Value *> CallArgs)
+ : RTLFn(RTLFn) {
+ assert(CallArgs.size() == CleanupArgs &&
+ "Size of arguments does not match.");
+ std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
+ }
+ void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+ if (!CGF.HaveInsertPoint())
+ return;
+ CGF.EmitRuntimeCall(RTLFn, Args);
+ }
+};
+} // namespace
+
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
+ const VarDecl *CVD = VD->getCanonicalDecl();
+ if (!CVD->hasAttr<OMPAllocateDeclAttr>())
+ return Address::invalid();
+ for (const Attr *A: CVD->getAttrs()) {
+ if (const auto *AA = dyn_cast<OMPAllocateDeclAttr>(A)) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
+ CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
+ llvm::Value *Size;
+ CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+ if (CVD->getType()->isVariablyModifiedType()) {
+ Size = CGF.getTypeSize(CVD->getType());
+ Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
+ } else {
+ CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+ Align = CGM.getContext().getDeclAlign(CVD);
+ Size = CGM.getSize(Sz.alignTo(Align));
+ }
+ llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
+ llvm::Value *Allocator;
+ if (const Expr *AllocExpr = AA->getAllocator()) {
+ Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+ } else {
+ // Default allocator in libomp is nullptr.
+ Allocator = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+ }
+ llvm::Value *Args[] = {ThreadID, Size, Allocator};
+
+ llvm::Value *Addr =
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
+ CVD->getName() + ".void.addr");
+ llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {
+ ThreadID, Addr, Allocator};
+ llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
+
+ CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
+ NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs));
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Addr,
+ CGF.ConvertTypeForMem(
+ CGM.getContext().getPointerType(CVD->getType())),
+ CVD->getName() + ".addr");
+ return Address(Addr, Align);
+ }
+ }
return Address::invalid();
}
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=356472&r1=356471&r2=356472&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Tue Mar 19 09:41:16 2019
@@ -2163,6 +2163,12 @@ public:
/// \param TargetParam Corresponding target-specific parameter.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
const VarDecl *TargetParam) const override;
+
+ /// Gets the OpenMP-specific address of the local variable.
+ Address getAddressOfLocalVariable(CodeGenFunction &CGF,
+ const VarDecl *VD) override {
+ return Address::invalid();
+ }
};
} // namespace CodeGen
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=356472&r1=356471&r2=356472&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Tue Mar 19 09:41:16 2019
@@ -4746,6 +4746,9 @@ Address CGOpenMPRuntimeNVPTX::getAddress
return VDI->second.PrivateAddr;
}
}
+ // TODO: replace it with return
+ // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
+ // supports __kmpc_alloc|__kmpc_free.
return Address::invalid();
}
Modified: cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp?rev=356472&r1=356471&r2=356472&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp (original)
+++ cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp Tue Mar 19 09:41:16 2019
@@ -1,16 +1,16 @@
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -ast-print %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -ast-print %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -ast-print %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -ast-print %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
@@ -82,6 +82,10 @@ int main () {
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
// CHECK-NEXT: #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
a=2;
+ int b = 3;
+// CHECK: int b = 3;
+#pragma omp allocate(b)
+// CHECK-NEXT: #pragma omp allocate(b)
return (foo<int>());
}
Copied: cfe/trunk/test/OpenMP/allocate_codegen.cpp (from r356458, cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp)
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/allocate_codegen.cpp?p2=cfe/trunk/test/OpenMP/allocate_codegen.cpp&p1=cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp&r1=356458&r2=356472&rev=356472&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp (original)
+++ cfe/trunk/test/OpenMP/allocate_codegen.cpp Tue Mar 19 09:41:16 2019
@@ -1,16 +1,17 @@
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
@@ -33,20 +34,13 @@ struct St{
struct St1{
int a;
static int b;
-// CHECK: static int b;
#pragma omp allocate(b) allocator(omp_default_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(St1::b) allocator(omp_default_mem_alloc){{$}}
} d;
int a, b;
-// CHECK: int a;
-// CHECK: int b;
#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
#pragma omp allocate(a) allocator(omp_const_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(a) allocator(omp_const_mem_alloc)
#pragma omp allocate(d, b) allocator(omp_high_bw_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(d,b) allocator(omp_high_bw_mem_alloc)
template <class T>
struct ST {
@@ -60,30 +54,49 @@ template <class T> T foo() {
v = ST<T>::m;
return v;
}
-//CHECK: template <class T> T foo() {
-//CHECK-NEXT: T v;
-//CHECK-NEXT: #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
-//CHECK: template<> int foo<int>() {
-//CHECK-NEXT: int v;
-//CHECK-NEXT: #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
namespace ns{
int a;
}
-// CHECK: namespace ns {
-// CHECK-NEXT: int a;
-// CHECK-NEXT: }
#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
+// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}}
+
+// CHECK-LABEL: @main
int main () {
static int a;
-// CHECK: static int a;
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
a=2;
+ // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
+ // CHECK-NEXT: [[B_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8** null)
+ // CHECK-NEXT: [[B_ADDR:%.+]] = bitcast i8* [[B_VOID_ADDR]] to double*
+ // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
+ // CHECK: store double 3.000000e+00, double* [[B_ADDR]],
+ // CHECK: [[RES:%.+]] = call i32 [[FOO:@.+]]()
+ // CHECK: store i32 [[RES]], i32* [[RET:%.+]],
+ // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[B_VOID_ADDR]], i8** null)
+ // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
+ double b = 3;
+#pragma omp allocate(b)
+ // CHECK: [[RETVAL:%.+]] = load i32, i32* [[RET]],
+ // CHECK: ret i32 [[RETVAL]]
return (foo<int>());
}
+// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}}
+
+// CHECK: define {{.*}}i32 [[FOO]]()
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
+// CHECK-NEXT: [[OMP_CGROUP_MEM_ALLOC:%.+]] = load i8**, i8*** @omp_cgroup_mem_alloc,
+// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8** [[OMP_CGROUP_MEM_ALLOC]])
+// CHECK-NEXT: [[V_ADDR:%.+]] = bitcast i8* [[V_VOID_ADDR]] to i32*
+// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
+// CHECK: store i32 %{{.+}}, i32* [[V_ADDR]],
+// CHECK-NEXT: [[V_VAL:%.+]] = load i32, i32* [[V_ADDR]],
+// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[V_VOID_ADDR]], i8** [[OMP_CGROUP_MEM_ALLOC]])
+// CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
+// CHECK: ret i32 [[V_VAL]]
+
+// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}}
extern template int ST<int>::m;
#endif
More information about the cfe-commits
mailing list