[clang] 7df2371 - Add codegen for allocate directive's 'align' clause
Aaron Ballman via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 5 09:41:42 PST 2022
Author: David Pagan
Date: 2022-01-05T12:40:58-05:00
New Revision: 7df2371bc6518a63bdbe5f3c44bd064940808e35
URL: https://github.com/llvm/llvm-project/commit/7df2371bc6518a63bdbe5f3c44bd064940808e35
DIFF: https://github.com/llvm/llvm-project/commit/7df2371bc6518a63bdbe5f3c44bd064940808e35.diff
LOG: Add codegen for allocate directive's 'align' clause
Added:
clang/test/OpenMP/align_clause_codegen.cpp
Modified:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/test/OpenMP/allocate_codegen.cpp
clang/test/OpenMP/allocate_codegen_attr.cpp
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 40e2094ea4ce7..9c93181b0395d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3466,8 +3466,7 @@ static bool isAllocatableDecl(const VarDecl *VD) {
return false;
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
- return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
- AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
+ return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
!AA->getAllocator());
}
@@ -12217,6 +12216,26 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
return CGF.GetAddrOfLocalVar(NativeParam);
}
+/// Return allocator value from expression, or return a null allocator (default
+/// when no allocator specified).
+static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
+ const Expr *Allocator) {
+ llvm::Value *AllocVal;
+ if (Allocator) {
+ AllocVal = CGF.EmitScalarExpr(Allocator);
+ // According to the standard, the original allocator type is a enum
+ // (integer). Convert to pointer type, if required.
+ AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
+ CGF.getContext().VoidPtrTy,
+ Allocator->getExprLoc());
+ } else {
+ // If no allocator specified, it defaults to the null allocator.
+ AllocVal = llvm::Constant::getNullValue(
+ CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
+ }
+ return AllocVal;
+}
+
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
if (!VD)
@@ -12253,20 +12272,24 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
}
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
- assert(AA->getAllocator() &&
- "Expected allocator expression for non-default allocator.");
- llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
- // According to the standard, the original allocator type is a enum
- // (integer). Convert to pointer type, if required.
- Allocator = CGF.EmitScalarConversion(
- Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
- AA->getAllocator()->getExprLoc());
- llvm::Value *Args[] = {ThreadID, Size, Allocator};
-
- llvm::Value *Addr =
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_alloc),
- Args, getName({CVD->getName(), ".void.addr"}));
+ const Expr *Allocator = AA->getAllocator();
+ llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
+ llvm::Value *Alignment =
+ AA->getAlignment()
+ ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
+ CGM.SizeTy, /*isSigned=*/false)
+ : nullptr;
+ SmallVector<llvm::Value *, 4> Args;
+ Args.push_back(ThreadID);
+ if (Alignment)
+ Args.push_back(Alignment);
+ Args.push_back(Size);
+ Args.push_back(AllocVal);
+ llvm::omp::RuntimeFunction FnID =
+ Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
+ llvm::Value *Addr = CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
+ getName({CVD->getName(), ".void.addr"}));
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_free);
QualType Ty = CGM.getContext().getPointerType(CVD->getType());
@@ -12280,14 +12303,14 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
llvm::FunctionCallee RTLFn;
SourceLocation::UIntTy LocEncoding;
Address Addr;
- const Expr *Allocator;
+ const Expr *AllocExpr;
public:
OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
SourceLocation::UIntTy LocEncoding, Address Addr,
- const Expr *Allocator)
+ const Expr *AllocExpr)
: RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
- Allocator(Allocator) {}
+ AllocExpr(AllocExpr) {}
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
if (!CGF.HaveInsertPoint())
return;
@@ -12296,14 +12319,8 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
CGF, SourceLocation::getFromRawEncoding(LocEncoding));
Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr.getPointer(), CGF.VoidPtrTy);
- llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
- // According to the standard, the original allocator type is a enum
- // (integer). Convert to pointer type, if required.
- AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
- CGF.getContext().VoidPtrTy,
- Allocator->getExprLoc());
+ llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
Args[2] = AllocVal;
-
CGF.EmitRuntimeCall(RTLFn, Args);
}
};
@@ -12311,7 +12328,7 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
- VDAddr, AA->getAllocator());
+ VDAddr, Allocator);
if (UntiedRealAddr.isValid())
if (auto *Region =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
diff --git a/clang/test/OpenMP/align_clause_codegen.cpp b/clang/test/OpenMP/align_clause_codegen.cpp
new file mode 100644
index 0000000000000..5e0773687dc26
--- /dev/null
+++ b/clang/test/OpenMP/align_clause_codegen.cpp
@@ -0,0 +1,303 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
+// RUN: %clang_cc1 -emit-llvm -o - -fopenmp \
+// RUN: -triple i386-unknown-unknown -fopenmp-version=51 %s | \
+// RUN: FileCheck %s --check-prefix=CHECK-32
+// RUN: %clang_cc1 -emit-llvm -o - -fopenmp \
+// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp \
+// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 \
+// RUN: -emit-pch %s -o %t
+// RUN: %clang_cc1 -fopenmp \
+// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 \
+// RUN: -include-pch %t -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+typedef enum omp_allocator_handle_t {
+ omp_null_allocator = 0,
+ omp_default_mem_alloc = 1,
+ omp_large_cap_mem_alloc = 2,
+ omp_const_mem_alloc = 3,
+ omp_high_bw_mem_alloc = 4,
+ omp_low_lat_mem_alloc = 5,
+ omp_cgroup_mem_alloc = 6,
+ omp_pteam_mem_alloc = 7,
+ omp_thread_mem_alloc = 8,
+ KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+} omp_allocator_handle_t;
+
+int main() {
+ int foo0[5];
+ int foo1[10];
+ int foo2[20];
+ int foo3[30];
+ int foo4[40];
+ int foo5[50];
+ int foo6[60];
+ int foo7[70];
+ int foo8[80];
+ omp_allocator_handle_t MyAlloc = omp_large_cap_mem_alloc;
+
+#pragma omp allocate(foo0) align(1)
+#pragma omp allocate(foo1) allocator(omp_pteam_mem_alloc) align(2)
+#pragma omp allocate(foo2) align(4) allocator(omp_cgroup_mem_alloc)
+#pragma omp allocate(foo3) align(8) allocator(omp_low_lat_mem_alloc)
+#pragma omp allocate(foo4) align(16) allocator(omp_high_bw_mem_alloc)
+#pragma omp allocate(foo5) align(32) allocator(omp_const_mem_alloc)
+#pragma omp allocate(foo6) align(64) allocator(omp_large_cap_mem_alloc)
+#pragma omp allocate(foo7) align(32) allocator(omp_thread_mem_alloc)
+#pragma omp allocate(foo8) align(16) allocator(omp_null_allocator)
+ {
+ double foo9[80];
+ double foo10[90];
+#pragma omp allocate(foo9) align(8) allocator(omp_thread_mem_alloc)
+#pragma omp allocate(foo10) align(128)
+ }
+ {
+ int bar1;
+ int bar2[10];
+ int bar3[20];
+ int *bar4;
+ float bar5;
+ double bar6[30];
+#pragma omp allocate(bar1, bar2, bar3) align(2) allocator(MyAlloc)
+#pragma omp allocate(bar4, bar5, bar6) align(16)
+ }
+}
+
+// Verify align clause in template with non-type template parameter.
+template <typename T, unsigned size, unsigned align>
+T run() {
+ T foo[size];
+#pragma omp allocate(foo) align(align) allocator(omp_cgroup_mem_alloc)
+ return foo[0];
+}
+
+int template_test() {
+ double result;
+ result = run<double, 1000, 16>();
+ return 0;
+}
+#endif
+// CHECK-32-LABEL: define {{[^@]+}}@main
+// CHECK-32-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-32-NEXT: entry:
+// CHECK-32-NEXT: [[MYALLOC:%.*]] = alloca i32, align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK-32-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 1, i32 20, i8* null)
+// CHECK-32-NEXT: [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]*
+// CHECK-32-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* inttoptr (i32 7 to i8*))
+// CHECK-32-NEXT: [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]*
+// CHECK-32-NEXT: [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* inttoptr (i32 6 to i8*))
+// CHECK-32-NEXT: [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]*
+// CHECK-32-NEXT: [[DOTFOO3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 8, i32 120, i8* inttoptr (i32 5 to i8*))
+// CHECK-32-NEXT: [[DOTFOO3__ADDR:%.*]] = bitcast i8* [[DOTFOO3__VOID_ADDR]] to [30 x i32]*
+// CHECK-32-NEXT: [[DOTFOO4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 160, i8* inttoptr (i32 4 to i8*))
+// CHECK-32-NEXT: [[DOTFOO4__ADDR:%.*]] = bitcast i8* [[DOTFOO4__VOID_ADDR]] to [40 x i32]*
+// CHECK-32-NEXT: [[DOTFOO5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 32, i32 200, i8* inttoptr (i32 3 to i8*))
+// CHECK-32-NEXT: [[DOTFOO5__ADDR:%.*]] = bitcast i8* [[DOTFOO5__VOID_ADDR]] to [50 x i32]*
+// CHECK-32-NEXT: [[DOTFOO6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 64, i32 240, i8* inttoptr (i32 2 to i8*))
+// CHECK-32-NEXT: [[DOTFOO6__ADDR:%.*]] = bitcast i8* [[DOTFOO6__VOID_ADDR]] to [60 x i32]*
+// CHECK-32-NEXT: [[DOTFOO7__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 32, i32 280, i8* inttoptr (i32 8 to i8*))
+// CHECK-32-NEXT: [[DOTFOO7__ADDR:%.*]] = bitcast i8* [[DOTFOO7__VOID_ADDR]] to [70 x i32]*
+// CHECK-32-NEXT: [[DOTFOO8__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 320, i8* null)
+// CHECK-32-NEXT: [[DOTFOO8__ADDR:%.*]] = bitcast i8* [[DOTFOO8__VOID_ADDR]] to [80 x i32]*
+// CHECK-32-NEXT: store i32 2, i32* [[MYALLOC]], align 4
+// CHECK-32-NEXT: [[DOTFOO9__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 8, i32 640, i8* inttoptr (i32 8 to i8*))
+// CHECK-32-NEXT: [[DOTFOO9__ADDR:%.*]] = bitcast i8* [[DOTFOO9__VOID_ADDR]] to [80 x double]*
+// CHECK-32-NEXT: [[DOTFOO10__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 128, i32 720, i8* null)
+// CHECK-32-NEXT: [[DOTFOO10__ADDR:%.*]] = bitcast i8* [[DOTFOO10__VOID_ADDR]] to [90 x double]*
+// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [90 x double]* [[DOTFOO10__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null)
+// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [80 x double]* [[DOTFOO9__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 8 to i8*))
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYALLOC]], align 4
+// CHECK-32-NEXT: [[CONV:%.*]] = inttoptr i32 [[TMP3]] to i8*
+// CHECK-32-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 4, i8* [[CONV]])
+// CHECK-32-NEXT: [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32*
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[MYALLOC]], align 4
+// CHECK-32-NEXT: [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to i8*
+// CHECK-32-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* [[CONV1]])
+// CHECK-32-NEXT: [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]*
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[MYALLOC]], align 4
+// CHECK-32-NEXT: [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to i8*
+// CHECK-32-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 80, i8* [[CONV2]])
+// CHECK-32-NEXT: [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]*
+// CHECK-32-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null)
+// CHECK-32-NEXT: [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**
+// CHECK-32-NEXT: [[DOTBAR5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null)
+// CHECK-32-NEXT: [[DOTBAR5__ADDR:%.*]] = bitcast i8* [[DOTBAR5__VOID_ADDR]] to float*
+// CHECK-32-NEXT: [[DOTBAR6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 240, i8* null)
+// CHECK-32-NEXT: [[DOTBAR6__ADDR:%.*]] = bitcast i8* [[DOTBAR6__VOID_ADDR]] to [30 x double]*
+// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast [30 x double]* [[DOTBAR6__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP6]], i8* null)
+// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast float* [[DOTBAR5__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP7]], i8* null)
+// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i32** [[DOTBAR4__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP8]], i8* null)
+// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [20 x i32]* [[DOTBAR3__ADDR]] to i8*
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[MYALLOC]], align 4
+// CHECK-32-NEXT: [[CONV3:%.*]] = inttoptr i32 [[TMP10]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP9]], i8* [[CONV3]])
+// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[DOTBAR2__ADDR]] to i8*
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[MYALLOC]], align 4
+// CHECK-32-NEXT: [[CONV4:%.*]] = inttoptr i32 [[TMP12]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP11]], i8* [[CONV4]])
+// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast i32* [[DOTBAR1__ADDR]] to i8*
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[MYALLOC]], align 4
+// CHECK-32-NEXT: [[CONV5:%.*]] = inttoptr i32 [[TMP14]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP13]], i8* [[CONV5]])
+// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast [80 x i32]* [[DOTFOO8__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP15]], i8* null)
+// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast [70 x i32]* [[DOTFOO7__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP16]], i8* inttoptr (i32 8 to i8*))
+// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast [60 x i32]* [[DOTFOO6__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP17]], i8* inttoptr (i32 2 to i8*))
+// CHECK-32-NEXT: [[TMP18:%.*]] = bitcast [50 x i32]* [[DOTFOO5__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP18]], i8* inttoptr (i32 3 to i8*))
+// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast [40 x i32]* [[DOTFOO4__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP19]], i8* inttoptr (i32 4 to i8*))
+// CHECK-32-NEXT: [[TMP20:%.*]] = bitcast [30 x i32]* [[DOTFOO3__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP20]], i8* inttoptr (i32 5 to i8*))
+// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast [20 x i32]* [[DOTFOO2__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP21]], i8* inttoptr (i32 6 to i8*))
+// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast [10 x i32]* [[DOTFOO1__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP22]], i8* inttoptr (i32 7 to i8*))
+// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast [5 x i32]* [[DOTFOO0__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP23]], i8* null)
+// CHECK-32-NEXT: ret i32 0
+//
+//
+// CHECK-32-LABEL: define {{[^@]+}}@_Z13template_testv
+// CHECK-32-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK-32-NEXT: entry:
+// CHECK-32-NEXT: [[RESULT:%.*]] = alloca double, align 8
+// CHECK-32-NEXT: [[CALL:%.*]] = call double @_Z3runIdLj1000ELj16EET_v()
+// CHECK-32-NEXT: store double [[CALL]], double* [[RESULT]], align 8
+// CHECK-32-NEXT: ret i32 0
+//
+//
+// CHECK-32-LABEL: define {{[^@]+}}@_Z3runIdLj1000ELj16EET_v
+// CHECK-32-SAME: () #[[ATTR2]] comdat {
+// CHECK-32-NEXT: entry:
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK-32-NEXT: [[DOTFOO__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 8000, i8* inttoptr (i32 6 to i8*))
+// CHECK-32-NEXT: [[DOTFOO__ADDR:%.*]] = bitcast i8* [[DOTFOO__VOID_ADDR]] to [1000 x double]*
+// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], [1000 x double]* [[DOTFOO__ADDR]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX]], align 8
+// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [1000 x double]* [[DOTFOO__ADDR]] to i8*
+// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 6 to i8*))
+// CHECK-32-NEXT: ret double [[TMP1]]
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@main
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[MYALLOC:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 1, i64 32, i8* null)
+// CHECK-NEXT: [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]*
+// CHECK-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* inttoptr (i64 7 to i8*))
+// CHECK-NEXT: [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]*
+// CHECK-NEXT: [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* inttoptr (i64 6 to i8*))
+// CHECK-NEXT: [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]*
+// CHECK-NEXT: [[DOTFOO3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 8, i64 128, i8* inttoptr (i64 5 to i8*))
+// CHECK-NEXT: [[DOTFOO3__ADDR:%.*]] = bitcast i8* [[DOTFOO3__VOID_ADDR]] to [30 x i32]*
+// CHECK-NEXT: [[DOTFOO4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 160, i8* inttoptr (i64 4 to i8*))
+// CHECK-NEXT: [[DOTFOO4__ADDR:%.*]] = bitcast i8* [[DOTFOO4__VOID_ADDR]] to [40 x i32]*
+// CHECK-NEXT: [[DOTFOO5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 32, i64 208, i8* inttoptr (i64 3 to i8*))
+// CHECK-NEXT: [[DOTFOO5__ADDR:%.*]] = bitcast i8* [[DOTFOO5__VOID_ADDR]] to [50 x i32]*
+// CHECK-NEXT: [[DOTFOO6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 64, i64 240, i8* inttoptr (i64 2 to i8*))
+// CHECK-NEXT: [[DOTFOO6__ADDR:%.*]] = bitcast i8* [[DOTFOO6__VOID_ADDR]] to [60 x i32]*
+// CHECK-NEXT: [[DOTFOO7__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 32, i64 288, i8* inttoptr (i64 8 to i8*))
+// CHECK-NEXT: [[DOTFOO7__ADDR:%.*]] = bitcast i8* [[DOTFOO7__VOID_ADDR]] to [70 x i32]*
+// CHECK-NEXT: [[DOTFOO8__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 320, i8* null)
+// CHECK-NEXT: [[DOTFOO8__ADDR:%.*]] = bitcast i8* [[DOTFOO8__VOID_ADDR]] to [80 x i32]*
+// CHECK-NEXT: store i64 2, i64* [[MYALLOC]], align 8
+// CHECK-NEXT: [[DOTFOO9__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 8, i64 640, i8* inttoptr (i64 8 to i8*))
+// CHECK-NEXT: [[DOTFOO9__ADDR:%.*]] = bitcast i8* [[DOTFOO9__VOID_ADDR]] to [80 x double]*
+// CHECK-NEXT: [[DOTFOO10__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 128, i64 720, i8* null)
+// CHECK-NEXT: [[DOTFOO10__ADDR:%.*]] = bitcast i8* [[DOTFOO10__VOID_ADDR]] to [90 x double]*
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast [90 x double]* [[DOTFOO10__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null)
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast [80 x double]* [[DOTFOO9__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 8 to i8*))
+// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[MYALLOC]], align 8
+// CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP3]] to i8*
+// CHECK-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 4, i8* [[CONV]])
+// CHECK-NEXT: [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32*
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[MYALLOC]], align 8
+// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to i8*
+// CHECK-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* [[CONV1]])
+// CHECK-NEXT: [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]*
+// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[MYALLOC]], align 8
+// CHECK-NEXT: [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to i8*
+// CHECK-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 80, i8* [[CONV2]])
+// CHECK-NEXT: [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]*
+// CHECK-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, i8* null)
+// CHECK-NEXT: [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**
+// CHECK-NEXT: [[DOTBAR5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 4, i8* null)
+// CHECK-NEXT: [[DOTBAR5__ADDR:%.*]] = bitcast i8* [[DOTBAR5__VOID_ADDR]] to float*
+// CHECK-NEXT: [[DOTBAR6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 240, i8* null)
+// CHECK-NEXT: [[DOTBAR6__ADDR:%.*]] = bitcast i8* [[DOTBAR6__VOID_ADDR]] to [30 x double]*
+// CHECK-NEXT: [[TMP6:%.*]] = bitcast [30 x double]* [[DOTBAR6__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP6]], i8* null)
+// CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[DOTBAR5__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP7]], i8* null)
+// CHECK-NEXT: [[TMP8:%.*]] = bitcast i32** [[DOTBAR4__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP8]], i8* null)
+// CHECK-NEXT: [[TMP9:%.*]] = bitcast [20 x i32]* [[DOTBAR3__ADDR]] to i8*
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, i64* [[MYALLOC]], align 8
+// CHECK-NEXT: [[CONV3:%.*]] = inttoptr i64 [[TMP10]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP9]], i8* [[CONV3]])
+// CHECK-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[DOTBAR2__ADDR]] to i8*
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[MYALLOC]], align 8
+// CHECK-NEXT: [[CONV4:%.*]] = inttoptr i64 [[TMP12]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP11]], i8* [[CONV4]])
+// CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[DOTBAR1__ADDR]] to i8*
+// CHECK-NEXT: [[TMP14:%.*]] = load i64, i64* [[MYALLOC]], align 8
+// CHECK-NEXT: [[CONV5:%.*]] = inttoptr i64 [[TMP14]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP13]], i8* [[CONV5]])
+// CHECK-NEXT: [[TMP15:%.*]] = bitcast [80 x i32]* [[DOTFOO8__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP15]], i8* null)
+// CHECK-NEXT: [[TMP16:%.*]] = bitcast [70 x i32]* [[DOTFOO7__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP16]], i8* inttoptr (i64 8 to i8*))
+// CHECK-NEXT: [[TMP17:%.*]] = bitcast [60 x i32]* [[DOTFOO6__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP17]], i8* inttoptr (i64 2 to i8*))
+// CHECK-NEXT: [[TMP18:%.*]] = bitcast [50 x i32]* [[DOTFOO5__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP18]], i8* inttoptr (i64 3 to i8*))
+// CHECK-NEXT: [[TMP19:%.*]] = bitcast [40 x i32]* [[DOTFOO4__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP19]], i8* inttoptr (i64 4 to i8*))
+// CHECK-NEXT: [[TMP20:%.*]] = bitcast [30 x i32]* [[DOTFOO3__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP20]], i8* inttoptr (i64 5 to i8*))
+// CHECK-NEXT: [[TMP21:%.*]] = bitcast [20 x i32]* [[DOTFOO2__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP21]], i8* inttoptr (i64 6 to i8*))
+// CHECK-NEXT: [[TMP22:%.*]] = bitcast [10 x i32]* [[DOTFOO1__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP22]], i8* inttoptr (i64 7 to i8*))
+// CHECK-NEXT: [[TMP23:%.*]] = bitcast [5 x i32]* [[DOTFOO0__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP23]], i8* null)
+// CHECK-NEXT: ret i32 0
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_Z13template_testv
+// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RESULT:%.*]] = alloca double, align 8
+// CHECK-NEXT: [[CALL:%.*]] = call double @_Z3runIdLj1000ELj16EET_v()
+// CHECK-NEXT: store double [[CALL]], double* [[RESULT]], align 8
+// CHECK-NEXT: ret i32 0
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_Z3runIdLj1000ELj16EET_v
+// CHECK-SAME: () #[[ATTR2]] comdat {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK-NEXT: [[DOTFOO__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8000, i8* inttoptr (i64 6 to i8*))
+// CHECK-NEXT: [[DOTFOO__ADDR:%.*]] = bitcast i8* [[DOTFOO__VOID_ADDR]] to [1000 x double]*
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], [1000 x double]* [[DOTFOO__ADDR]], i64 0, i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast [1000 x double]* [[DOTFOO__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 6 to i8*))
+// CHECK-NEXT: ret double [[TMP1]]
+//
diff --git a/clang/test/OpenMP/allocate_codegen.cpp b/clang/test/OpenMP/allocate_codegen.cpp
index eb70bf3c29399..47314a4beca9c 100644
--- a/clang/test/OpenMP/allocate_codegen.cpp
+++ b/clang/test/OpenMP/allocate_codegen.cpp
@@ -30,13 +30,13 @@ enum omp_allocator_handle_t {
KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
};
-struct St{
- int a;
+struct St {
+ int a;
};
-struct St1{
- int a;
- static int b;
+struct St1 {
+ int a;
+ static int b;
#pragma omp allocate(b) allocator(omp_default_mem_alloc)
} d;
@@ -48,36 +48,49 @@ int a, b, c;
template <class T>
struct ST {
static T m;
- #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc)
+#pragma omp allocate(m) allocator(omp_low_lat_mem_alloc)
};
template <class T> T foo() {
T v;
- #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
+#pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
v = ST<T>::m;
return v;
}
-namespace ns{
- int a;
+namespace ns {
+int a;
}
#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}}
-// CHECK-LABEL: @main
-int main () {
+int main() {
static int a;
#pragma omp allocate(a) allocator(omp_thread_mem_alloc)
- a=2;
- // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
- // CHECK: alloca double,
- // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
+ a = 2;
double b = 3;
#pragma omp allocate(b)
return (foo<int>());
}
+// CHECK-LABEL: define {{[^@]+}}@main
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4
+// CHECK-NEXT: store i32 2, i32* @_ZZ4mainE1a, align 4
+// CHECK-NEXT: [[DOTB__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP0]], i64 8, i8* null)
+// CHECK-NEXT: [[DOTB__ADDR:%.*]] = bitcast i8* [[DOTB__VOID_ADDR]] to double*
+// CHECK-NEXT: store double 3.000000e+00, double* [[DOTB__ADDR]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v()
+// CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[DOTB__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 [[TMP2]]
+
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 6 to i8*))
@@ -101,11 +114,11 @@ void bar(int a, float &z) {
// CHECK: [[Z_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8* inttoptr (i64 1 to i8*))
// CHECK: [[Z_ADDR:%.+]] = bitcast i8* [[Z_VOID_PTR]] to float**
// CHECK: store float* %{{.+}}, float** [[Z_ADDR]],
-#pragma omp allocate(a,z) allocator(omp_default_mem_alloc)
-// CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8*
-// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
-// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
-// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
-// CHECK: ret void
+#pragma omp allocate(a, z) allocator(omp_default_mem_alloc)
+ // CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8*
+ // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
+ // CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
+ // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
+ // CHECK: ret void
}
#endif
diff --git a/clang/test/OpenMP/allocate_codegen_attr.cpp b/clang/test/OpenMP/allocate_codegen_attr.cpp
index 0ef5c77a2738f..87180f2a529f7 100644
--- a/clang/test/OpenMP/allocate_codegen_attr.cpp
+++ b/clang/test/OpenMP/allocate_codegen_attr.cpp
@@ -30,20 +30,20 @@ enum omp_allocator_handle_t {
KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
};
-struct St{
- int a;
+struct St {
+ int a;
};
-struct St1{
- int a;
- static int b;
- [[omp::directive(allocate(b) allocator(omp_default_mem_alloc))]];
+struct St1 {
+ int a;
+ static int b;
+ [[omp::directive(allocate(b) allocator(omp_default_mem_alloc))]];
} d;
int a, b, c;
-[[omp::directive(allocate(a) allocator(omp_large_cap_mem_alloc)),
- directive(allocate(b) allocator(omp_const_mem_alloc)),
- directive(allocate(d, c) allocator(omp_high_bw_mem_alloc))]];
+[[ omp::directive(allocate(a) allocator(omp_large_cap_mem_alloc)),
+ directive(allocate(b) allocator(omp_const_mem_alloc)),
+ directive(allocate(d, c) allocator(omp_high_bw_mem_alloc)) ]];
template <class T>
struct ST {
@@ -58,26 +58,39 @@ template <class T> T foo() {
return v;
}
-namespace ns{
- int a;
+namespace ns {
+int a;
}
[[omp::directive(allocate(ns::a) allocator(omp_pteam_mem_alloc))]];
// CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}}
-// CHECK-LABEL: @main
-int main () {
+int main() {
static int a;
[[omp::directive(allocate(a) allocator(omp_thread_mem_alloc))]];
- a=2;
- // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
- // CHECK: alloca double,
- // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}}
+ a = 2;
double b = 3;
[[omp::directive(allocate(b))]];
return (foo<int>());
}
+// CHECK-LABEL: define {{[^@]+}}@main
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4
+// CHECK-NEXT: store i32 2, i32* @_ZZ4mainE1a, align 4
+// CHECK-NEXT: [[DOTB__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP0]], i64 8, i8* null)
+// CHECK-NEXT: [[DOTB__ADDR:%.*]] = bitcast i8* [[DOTB__VOID_ADDR]] to double*
+// CHECK-NEXT: store double 3.000000e+00, double* [[DOTB__ADDR]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v()
+// CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[DOTB__ADDR]] to i8*
+// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 [[TMP2]]
+
// CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}()
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}})
// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 6 to i8*))
@@ -95,18 +108,17 @@ extern template int ST<int>::m;
// CHECK: define{{.*}} void @{{.+}}bar{{.+}}(i32 %{{.+}}, float* {{.+}})
void bar(int a, float &z) {
-// CHECK: [[A_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID:%.+]], i64 4, i8* inttoptr (i64 1 to i8*))
-// CHECK: [[A_ADDR:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32*
-// CHECK: store i32 %{{.+}}, i32* [[A_ADDR]],
-// CHECK: [[Z_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8* inttoptr (i64 1 to i8*))
-// CHECK: [[Z_ADDR:%.+]] = bitcast i8* [[Z_VOID_PTR]] to float**
-// CHECK: store float* %{{.+}}, float** [[Z_ADDR]],
-[[omp::directive(allocate(a,z) allocator(omp_default_mem_alloc))]];
-// CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8*
-// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
-// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
-// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
-// CHECK: ret void
+ // CHECK: [[A_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID:%.+]], i64 4, i8* inttoptr (i64 1 to i8*))
+ // CHECK: [[A_ADDR:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32*
+ // CHECK: store i32 %{{.+}}, i32* [[A_ADDR]],
+ // CHECK: [[Z_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8* inttoptr (i64 1 to i8*))
+ // CHECK: [[Z_ADDR:%.+]] = bitcast i8* [[Z_VOID_PTR]] to float**
+ // CHECK: store float* %{{.+}}, float** [[Z_ADDR]],
+ [[omp::directive(allocate(a, z) allocator(omp_default_mem_alloc))]];
+ // CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8*
+ // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
+ // CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
+ // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
+ // CHECK: ret void
}
#endif
-
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 5e82925f1b83c..d2b70edd4d87f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -382,6 +382,8 @@ __OMP_RTL(__kmpc_doacross_wait, false, Void, IdentPtr, Int32, Int64Ptr)
__OMP_RTL(__kmpc_doacross_fini, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, VoidPtr)
+__OMP_RTL(__kmpc_aligned_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, SizeTy,
+ VoidPtr)
__OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr)
__OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr,
@@ -905,6 +907,8 @@ __OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(),
ParamAttrs(NoCaptureAttrs))
__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_aligned_alloc, DefaultAttrs, ReturnPtrAttrs,
+ ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs,
More information about the cfe-commits
mailing list