r356472 - [OPENMP] Codegen for local variables with the allocate pragma.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 19 09:41:16 PDT 2019


Author: abataev
Date: Tue Mar 19 09:41:16 2019
New Revision: 356472

URL: http://llvm.org/viewvc/llvm-project?rev=356472&view=rev
Log:
[OPENMP] Codegen for local variables with the allocate pragma.

Added initial codegen for the local variables with the #pragma omp
allocate directive. Instead of allocating the variables on the stack,
__kmpc_alloc|__kmpc_free functions are used for memory (de-)allocation.

Added:
    cfe/trunk/test/OpenMP/allocate_codegen.cpp
      - copied, changed from r356458, cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp
Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=356472&r1=356471&r2=356472&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Tue Mar 19 09:41:16 2019
@@ -667,6 +667,10 @@ enum OpenMPRTLFunction {
   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
   // *d);
   OMPRTL__kmpc_task_reduction_get_th_data,
+  // Call to void *__kmpc_alloc(int gtid, size_t sz, const omp_allocator_t *al);
+  OMPRTL__kmpc_alloc,
+  // Call to void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *al);
+  OMPRTL__kmpc_free,
 
   //
   // Offloading related calls
@@ -2195,6 +2199,26 @@ llvm::FunctionCallee CGOpenMPRuntime::cr
         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
     break;
   }
+  case OMPRTL__kmpc_alloc: {
+    // Build to void *__kmpc_alloc(int gtid, size_t sz, const omp_allocator_t
+    // *al);
+    // omp_allocator_t type is void *.
+    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrPtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
+    break;
+  }
+  case OMPRTL__kmpc_free: {
+    // Build to void __kmpc_free(int gtid, void *ptr, const omp_allocator_t
+    // *al);
+    // omp_allocator_t type is void *.
+    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrPtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
+    break;
+  }
   case OMPRTL__kmpc_push_target_tripcount: {
     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
     // size);
@@ -9669,8 +9693,81 @@ Address CGOpenMPRuntime::getParameterAdd
   return CGF.GetAddrOfLocalVar(NativeParam);
 }
 
+namespace {
+/// Cleanup action for allocate support.
+class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
+public:
+  static const int CleanupArgs = 3;
+
+private:
+  llvm::FunctionCallee RTLFn;
+  llvm::Value *Args[CleanupArgs];
+
+public:
+  OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
+                       ArrayRef<llvm::Value *> CallArgs)
+      : RTLFn(RTLFn) {
+    assert(CallArgs.size() == CleanupArgs &&
+           "Size of arguments does not match.");
+    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
+  }
+  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+    if (!CGF.HaveInsertPoint())
+      return;
+    CGF.EmitRuntimeCall(RTLFn, Args);
+  }
+};
+} // namespace
+
 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                    const VarDecl *VD) {
+  const VarDecl *CVD = VD->getCanonicalDecl();
+  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
+    return Address::invalid();
+  for (const Attr *A: CVD->getAttrs()) {
+    if (const auto *AA = dyn_cast<OMPAllocateDeclAttr>(A)) {
+      auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+      if (!Elem.second.ServiceInsertPt)
+        setLocThreadIdInsertPt(CGF);
+      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+      CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
+      llvm::Value *Size;
+      CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+      if (CVD->getType()->isVariablyModifiedType()) {
+        Size = CGF.getTypeSize(CVD->getType());
+        Align = CGM.getContext().getTypeAlignInChars(CVD->getType());
+      } else {
+        CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+        Align = CGM.getContext().getDeclAlign(CVD);
+        Size = CGM.getSize(Sz.alignTo(Align));
+      }
+      llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
+      llvm::Value *Allocator;
+      if (const Expr *AllocExpr = AA->getAllocator()) {
+        Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+      } else {
+        // Default allocator in libomp is nullptr.
+        Allocator = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+      }
+      llvm::Value *Args[] = {ThreadID, Size, Allocator};
+
+      llvm::Value *Addr =
+          CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
+                              CVD->getName() + ".void.addr");
+      llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {
+          ThreadID, Addr, Allocator};
+      llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
+
+      CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
+          NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs));
+      Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          Addr,
+          CGF.ConvertTypeForMem(
+              CGM.getContext().getPointerType(CVD->getType())),
+          CVD->getName() + ".addr");
+      return Address(Addr, Align);
+    }
+  }
   return Address::invalid();
 }
 

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=356472&r1=356471&r2=356472&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Tue Mar 19 09:41:16 2019
@@ -2163,6 +2163,12 @@ public:
   /// \param TargetParam Corresponding target-specific parameter.
   Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
                               const VarDecl *TargetParam) const override;
+
+  /// Gets the OpenMP-specific address of the local variable.
+  Address getAddressOfLocalVariable(CodeGenFunction &CGF,
+                                    const VarDecl *VD) override {
+    return Address::invalid();
+  }
 };
 
 } // namespace CodeGen

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=356472&r1=356471&r2=356472&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Tue Mar 19 09:41:16 2019
@@ -4746,6 +4746,9 @@ Address CGOpenMPRuntimeNVPTX::getAddress
         return VDI->second.PrivateAddr;
     }
   }
+  // TODO: replace it with return
+  // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp
+  // supports __kmpc_alloc|__kmpc_free.
   return Address::invalid();
 }
 

Modified: cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp?rev=356472&r1=356471&r2=356472&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp (original)
+++ cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp Tue Mar 19 09:41:16 2019
@@ -1,16 +1,16 @@
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -ast-print %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -ast-print %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -o - | FileCheck %s
 
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -ast-print %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -ast-print %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -o - | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -82,6 +82,10 @@ int main () {
 #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
 // CHECK-NEXT: #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
   a=2;
+  int b = 3;
+// CHECK: int b = 3;
+#pragma omp allocate(b)
+// CHECK-NEXT: #pragma omp allocate(b)
   return (foo<int>());
 }
 

Copied: cfe/trunk/test/OpenMP/allocate_codegen.cpp (from r356458, cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp)
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/allocate_codegen.cpp?p2=cfe/trunk/test/OpenMP/allocate_codegen.cpp&p1=cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp&r1=356458&r2=356472&rev=356472&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/allocate_allocator_ast_print.cpp (original)
+++ cfe/trunk/test/OpenMP/allocate_codegen.cpp Tue Mar 19 09:41:16 2019
@@ -1,16 +1,17 @@
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
-// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
-// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10.6.0 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print
+// RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -triple x86_64-unknown-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -33,20 +34,13 @@ struct St{
 struct St1{
  int a;
  static int b;
-// CHECK: static int b;
 #pragma omp allocate(b) allocator(omp_default_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(St1::b) allocator(omp_default_mem_alloc){{$}}
 } d;
 
 int a, b;
-// CHECK: int a;
-// CHECK: int b;
 #pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
 #pragma omp allocate(a) allocator(omp_const_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(a) allocator(omp_const_mem_alloc)
 #pragma omp allocate(d, b) allocator(omp_high_bw_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(d,b) allocator(omp_high_bw_mem_alloc)
 
 template <class T>
 struct ST {
@@ -60,30 +54,49 @@ template <class T> T foo() {
   v = ST<T>::m;
   return v;
 }
-//CHECK: template <class T> T foo() {
-//CHECK-NEXT: T v;
-//CHECK-NEXT: #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
-//CHECK: template<> int foo<int>() {
-//CHECK-NEXT: int v;
-//CHECK-NEXT: #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
 
 namespace ns{
   int a;
 }
-// CHECK: namespace ns {
-// CHECK-NEXT: int a;
-// CHECK-NEXT: }
 #pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
 
+// CHECK-NOT:  call {{.+}} {{__kmpc_alloc|__kmpc_free}}
+
+// CHECK-LABEL: @main
 int main () {
   static int a;
-// CHECK: static int a;
 #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
-// CHECK-NEXT: #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
   a=2;
+  // CHECK:      [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
+  // CHECK-NEXT: [[B_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8** null)
+  // CHECK-NEXT: [[B_ADDR:%.+]] = bitcast i8* [[B_VOID_ADDR]] to double*
+  // CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
+  // CHECK:      store double 3.000000e+00, double* [[B_ADDR]],
+  // CHECK:      [[RES:%.+]] = call i32 [[FOO:@.+]]()
+  // CHECK:      store i32 [[RES]], i32* [[RET:%.+]],
+  // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[B_VOID_ADDR]], i8** null)
+  // CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
+  double b = 3;
+#pragma omp allocate(b)
+  // CHECK:      [[RETVAL:%.+]] = load i32, i32* [[RET]],
+  // CHECK:      ret i32 [[RETVAL]]
   return (foo<int>());
 }
 
+// CHECK-NOT:  call {{.+}} {{__kmpc_alloc|__kmpc_free}}
+
+// CHECK: define {{.*}}i32 [[FOO]]()
+// CHECK:      [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
+// CHECK-NEXT: [[OMP_CGROUP_MEM_ALLOC:%.+]] = load i8**, i8*** @omp_cgroup_mem_alloc,
+// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8** [[OMP_CGROUP_MEM_ALLOC]])
+// CHECK-NEXT: [[V_ADDR:%.+]] = bitcast i8* [[V_VOID_ADDR]] to i32*
+// CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
+// CHECK:      store i32 %{{.+}}, i32* [[V_ADDR]],
+// CHECK-NEXT: [[V_VAL:%.+]] = load i32, i32* [[V_ADDR]],
+// CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[V_VOID_ADDR]], i8** [[OMP_CGROUP_MEM_ALLOC]])
+// CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
+// CHECK:      ret i32 [[V_VAL]]
+
+// CHECK-NOT:  call {{.+}} {{__kmpc_alloc|__kmpc_free}}
 extern template int ST<int>::m;
 #endif




More information about the cfe-commits mailing list