r330154 - [OPENMP] General code improvements.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Mon Apr 16 13:16:22 PDT 2018
Author: abataev
Date: Mon Apr 16 13:16:21 2018
New Revision: 330154
URL: http://llvm.org/viewvc/llvm-project?rev=330154&view=rev
Log:
[OPENMP] General code improvements.
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=330154&r1=330153&r2=330154&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Mon Apr 16 13:16:21 2018
@@ -91,11 +91,11 @@ enum OpenMPRTLFunctionNVPTX {
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
class NVPTXActionTy final : public PrePostActionTy {
- llvm::Value *EnterCallee;
+ llvm::Value *EnterCallee = nullptr;
ArrayRef<llvm::Value *> EnterArgs;
- llvm::Value *ExitCallee;
+ llvm::Value *ExitCallee = nullptr;
ArrayRef<llvm::Value *> ExitArgs;
- bool Conditional;
+ bool Conditional = false;
llvm::BasicBlock *ContBlock = nullptr;
public:
@@ -179,7 +179,7 @@ class CheckVarsEscapingDeclContext final
static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
isDeclareTargetDeclaration(const ValueDecl *VD) {
- for (const auto *D : VD->redecls()) {
+ for (const Decl *D : VD->redecls()) {
if (!D->hasAttrs())
continue;
if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
@@ -233,7 +233,7 @@ class CheckVarsEscapingDeclContext final
void VisitOpenMPCapturedStmt(const CapturedStmt *S) {
if (!S)
return;
- for (const auto &C : S->captures()) {
+ for (const CapturedStmt::Capture &C : S->captures()) {
if (C.capturesVariable() && !C.capturesVariableByCopy()) {
const ValueDecl *VD = C.getCapturedVar();
markAsEscaped(VD);
@@ -255,7 +255,7 @@ class CheckVarsEscapingDeclContext final
return;
ASTContext &C = CGF.getContext();
SmallVector<VarsDataTy, 4> GlobalizedVars;
- for (const auto *D : EscapedDecls)
+ for (const ValueDecl *D : EscapedDecls)
GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
stable_sort_comparator);
@@ -296,7 +296,7 @@ public:
void VisitDeclStmt(const DeclStmt *S) {
if (!S)
return;
- for (const auto *D : S->decls())
+ for (const Decl *D : S->decls())
if (const auto *VD = dyn_cast_or_null<ValueDecl>(D))
VisitValueDecl(VD);
}
@@ -312,7 +312,7 @@ public:
void VisitCapturedStmt(const CapturedStmt *S) {
if (!S)
return;
- for (const auto &C : S->captures()) {
+ for (const CapturedStmt::Capture &C : S->captures()) {
if (C.capturesVariable() && !C.capturesVariableByCopy()) {
const ValueDecl *VD = C.getCapturedVar();
markAsEscaped(VD);
@@ -324,7 +324,7 @@ public:
void VisitLambdaExpr(const LambdaExpr *E) {
if (!E)
return;
- for (const auto &C : E->captures()) {
+ for (const LambdaCapture &C : E->captures()) {
if (C.capturesVariable()) {
if (C.getCaptureKind() == LCK_ByRef) {
const ValueDecl *VD = C.getCapturedVar();
@@ -338,7 +338,7 @@ public:
void VisitBlockExpr(const BlockExpr *E) {
if (!E)
return;
- for (const auto &C : E->getBlockDecl()->captures()) {
+ for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) {
if (C.isByRef()) {
const VarDecl *VD = C.getVariable();
markAsEscaped(VD);
@@ -358,8 +358,9 @@ public:
AllEscaped = true;
Visit(Arg);
AllEscaped = SavedAllEscaped;
- } else
+ } else {
Visit(Arg);
+ }
}
Visit(E->getCallee());
}
@@ -383,8 +384,9 @@ public:
AllEscaped = true;
Visit(E->getSubExpr());
AllEscaped = SavedAllEscaped;
- } else
+ } else {
Visit(E->getSubExpr());
+ }
}
void VisitImplicitCastExpr(const ImplicitCastExpr *E) {
if (!E)
@@ -394,8 +396,9 @@ public:
AllEscaped = true;
Visit(E->getSubExpr());
AllEscaped = SavedAllEscaped;
- } else
+ } else {
Visit(E->getSubExpr());
+ }
}
void VisitExpr(const Expr *E) {
if (!E)
@@ -403,7 +406,7 @@ public:
bool SavedAllEscaped = AllEscaped;
if (!E->isLValue())
AllEscaped = false;
- for (const auto *Child : E->children())
+ for (const Stmt *Child : E->children())
if (Child)
Visit(Child);
AllEscaped = SavedAllEscaped;
@@ -411,7 +414,7 @@ public:
void VisitStmt(const Stmt *S) {
if (!S)
return;
- for (const auto *Child : S->children())
+ for (const Stmt *Child : S->children())
if (Child)
Visit(Child);
}
@@ -553,19 +556,19 @@ static llvm::Value *getMasterThreadID(Co
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
CodeGenModule &CGM, SourceLocation Loc)
- : WorkerFn(nullptr), CGFI(nullptr), Loc(Loc) {
+ : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()),
+ Loc(Loc) {
createWorkerFunction(CGM);
}
void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
CodeGenModule &CGM) {
// Create an worker function with no arguments.
- CGFI = &CGM.getTypes().arrangeNullaryFunction();
WorkerFn = llvm::Function::Create(
- CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage,
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
/*placeholder=*/"_worker", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, *CGFI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI);
WorkerFn->setDoesNotRecurse();
}
@@ -617,7 +620,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericKe
// Now change the name of the worker function to correspond to this target
// region's entry function.
- WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
+ WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker"));
// Create the worker function
emitWorkerFunction(WST);
@@ -634,7 +637,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEn
llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
EST.ExitBB = CGF.createBasicBlock(".exit");
- auto *IsWorker =
+ llvm::Value *IsWorker =
Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
@@ -643,7 +646,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEn
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(MasterCheckBB);
- auto *IsMaster =
+ llvm::Value *IsMaster =
Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
@@ -728,7 +731,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKerne
void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
CodeGenFunction &CGF, EntryFunctionState &EST,
const OMPExecutableDirective &D) {
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
// Setup BBs in entry function.
llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
@@ -773,17 +776,18 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntry
// warps participate in parallel work.
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
CGOpenMPRuntimeNVPTX::ExecutionMode Mode) {
- (void)new llvm::GlobalVariable(
+ auto *GVMode = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
llvm::GlobalValue::WeakAnyLinkage,
- llvm::ConstantInt::get(CGM.Int8Ty, Mode), Name + Twine("_exec_mode"));
+ llvm::ConstantInt::get(CGM.Int8Ty, Mode), Twine(Name, "_exec_mode"));
+ CGM.addCompilerUsedGlobal(GVMode);
}
void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
ASTContext &Ctx = CGM.getContext();
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
- CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {},
+ CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {},
WST.Loc, WST.Loc);
emitWorkerLoop(CGF, WST);
CGF.FinishFunction();
@@ -850,9 +854,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoo
CGF.EmitBlock(ExecuteBB);
// Process work items: outlined parallel functions.
- for (auto *W : Work) {
+ for (llvm::Function *W : Work) {
// Try to match this outlined function.
- auto *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
+ llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
llvm::Value *WorkFnMatch =
Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
@@ -906,7 +910,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
// Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
// RequiresOMPRuntime);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
break;
@@ -914,7 +918,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
case OMPRTL_NVPTX__kmpc_kernel_deinit: {
// Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
llvm::Type *TypeParams[] = {CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
break;
@@ -923,14 +927,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
// Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
break;
}
case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
// Build void __kmpc_spmd_kernel_deinit();
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
break;
@@ -939,7 +943,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
/// Build void __kmpc_kernel_prepare_parallel(
/// void *outlined_function, int16_t IsOMPRuntimeInitialized);
llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
break;
@@ -949,14 +953,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
/// int16_t IsOMPRuntimeInitialized);
llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty};
llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
/// Build void __kmpc_kernel_end_parallel();
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
break;
@@ -965,7 +969,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
// Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
break;
@@ -974,7 +978,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
// Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
break;
@@ -983,7 +987,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
// Build int32_t __kmpc_shuffle_int32(int32_t element,
// int16_t lane_offset, int16_t warp_size);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
break;
@@ -992,7 +996,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
// Build int64_t __kmpc_shuffle_int64(int64_t element,
// int16_t lane_offset, int16_t warp_size);
llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
break;
@@ -1018,7 +1022,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
CGM.VoidPtrTy,
ShuffleReduceFnTy->getPointerTo(),
InterWarpCopyFnTy->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait");
@@ -1061,7 +1065,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
InterWarpCopyFnTy->getPointerTo(),
CopyToScratchpadFnTy->getPointerTo(),
LoadReduceFnTy->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait");
@@ -1070,7 +1074,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
// Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
llvm::Type *TypeParams[] = {CGM.Int32Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
@@ -1078,7 +1082,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
}
case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
/// Build void __kmpc_data_sharing_init_stack();
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
break;
@@ -1087,7 +1091,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
// Build void *__kmpc_data_sharing_push_stack(size_t size,
// int16_t UseSharedMemory);
llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
@@ -1096,7 +1100,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
// Build void __kmpc_data_sharing_pop_stack(void *a);
llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy,
/*Name=*/"__kmpc_data_sharing_pop_stack");
@@ -1106,14 +1110,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
/// Build void __kmpc_begin_sharing_variables(void ***args,
/// size_t n_args);
llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
break;
}
case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
/// Build void __kmpc_end_sharing_variables();
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
break;
@@ -1121,7 +1125,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime
case OMPRTL_NVPTX__kmpc_get_shared_variables: {
/// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
- llvm::FunctionType *FnTy =
+ auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
break;
@@ -1134,19 +1138,18 @@ void CGOpenMPRuntimeNVPTX::createOffload
llvm::Constant *Addr,
uint64_t Size, int32_t,
llvm::GlobalValue::LinkageTypes) {
- auto *F = dyn_cast<llvm::Function>(Addr);
// TODO: Add support for global variables on the device after declare target
// support.
- if (!F)
+ if (!isa<llvm::Function>(Addr))
return;
- llvm::Module *M = F->getParent();
- llvm::LLVMContext &Ctx = M->getContext();
+ llvm::Module &M = CGM.getModule();
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
// Get "nvvm.annotations" metadata node
- llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+ llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
llvm::Metadata *MDVals[] = {
- llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"),
+ llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
// Append metadata to nvvm.annotations
@@ -1336,7 +1339,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVa
// handle the specifics of the allocation of the memory.
// Use actual memory size of the record including the padding
// for alignment purposes.
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
llvm::Value *Size = CGF.getTypeSize(VD->getType());
CharUnits Align = CGM.getContext().getDeclAlign(VD);
Size = Bld.CreateNUWAdd(
@@ -1496,8 +1499,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa
Work.emplace_back(WFn);
};
- auto *RTLoc = emitUpdateLocation(CGF, Loc);
- auto *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *Args[] = {RTLoc, ThreadID};
auto &&SeqGen = [this, Fn, CapturedVars, &Args, Loc](CodeGenFunction &CGF,
@@ -1528,9 +1531,9 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa
RCG(CGF);
};
- if (IfCond)
+ if (IfCond) {
emitOMPIfClause(CGF, IfCond, L0ParallelGen, SeqGen);
- else {
+ } else {
CodeGenFunction::RunCleanupsScope Scope(CGF);
RegionCodeGenTy ThenRCG(L0ParallelGen);
ThenRCG(CGF);
@@ -1588,8 +1591,8 @@ static llvm::Value *createRuntimeShuffle
QualType ElemType,
llvm::Value *Offset,
SourceLocation Loc) {
- auto &CGM = CGF.CGM;
- auto &Bld = CGF.Builder;
+ CodeGenModule &CGM = CGF.CGM;
+ CGBuilderTy &Bld = CGF.Builder;
CGOpenMPRuntimeNVPTX &RT =
*(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
@@ -1605,12 +1608,11 @@ static llvm::Value *createRuntimeShuffle
QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
Size.getQuantity() <= 4 ? 32 : 64, /*Signed=*/1);
llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc);
- auto *WarpSize =
+ llvm::Value *WarpSize =
Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
- auto *ShuffledVal =
- CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn),
- {ElemCast, Offset, WarpSize});
+ llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
+ RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
}
@@ -1643,19 +1645,19 @@ static void emitReductionListCopy(
ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase,
CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
- auto &CGM = CGF.CGM;
- auto &C = CGM.getContext();
- auto &Bld = CGF.Builder;
-
- auto *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
- auto *ScratchpadIndex = CopyOptions.ScratchpadIndex;
- auto *ScratchpadWidth = CopyOptions.ScratchpadWidth;
+ CodeGenModule &CGM = CGF.CGM;
+ ASTContext &C = CGM.getContext();
+ CGBuilderTy &Bld = CGF.Builder;
+
+ llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
+ llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
+ llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
// Iterates, element-by-element, through the source Reduce list and
// make a copy.
unsigned Idx = 0;
unsigned Size = Privates.size();
- for (auto &Private : Privates) {
+ for (const Expr *Private : Privates) {
Address SrcElementAddr = Address::invalid();
Address DestElementAddr = Address::invalid();
Address DestElementPtrAddr = Address::invalid();
@@ -1716,9 +1718,9 @@ static void emitReductionListCopy(
// Step 1.2: Get the address for dest element:
// address = base + index * ElementSizeInChars.
llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
- auto *CurrentOffset =
+ llvm::Value *CurrentOffset =
Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
- auto *ScratchPadElemAbsolutePtrVal =
+ llvm::Value *ScratchPadElemAbsolutePtrVal =
Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
@@ -1731,9 +1733,9 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the scratchpad.
// address = base + index * ElementSizeInChars.
llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
- auto *CurrentOffset =
+ llvm::Value *CurrentOffset =
Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
- auto *ScratchPadElemAbsolutePtrVal =
+ llvm::Value *ScratchPadElemAbsolutePtrVal =
Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
@@ -1816,7 +1818,7 @@ static void emitReductionListCopy(
SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
}
- Idx++;
+ ++Idx;
}
}
@@ -1834,8 +1836,8 @@ static void emitReductionListCopy(
static llvm::Value *emitReduceScratchpadFunction(
CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
- auto &C = CGM.getContext();
- auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
+ ASTContext &C = CGM.getContext();
+ QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1);
// Destination of the copy.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
@@ -1864,7 +1866,8 @@ static llvm::Value *emitReduceScratchpad
Args.push_back(&WidthArg);
Args.push_back(&ShouldReduceArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
"_omp_reduction_load_and_reduce", &CGM.getModule());
@@ -1873,7 +1876,7 @@ static llvm::Value *emitReduceScratchpad
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
// Get local Reduce list pointer.
Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
@@ -1923,7 +1926,7 @@ static llvm::Value *emitReduceScratchpad
llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
- auto CondReduce = Bld.CreateICmpEQ(ShouldReduceVal, Bld.getInt32(1));
+ llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal);
Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
CGF.EmitBlock(ThenBB);
@@ -1961,8 +1964,8 @@ static llvm::Value *emitCopyToScratchpad
QualType ReductionArrayTy,
SourceLocation Loc) {
- auto &C = CGM.getContext();
- auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true);
+ ASTContext &C = CGM.getContext();
+ QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1);
// Source of the copy.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
@@ -1986,7 +1989,8 @@ static llvm::Value *emitCopyToScratchpad
Args.push_back(&IndexArg);
Args.push_back(&WidthArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
"_omp_reduction_copy_to_scratchpad", &CGM.getModule());
@@ -1995,7 +1999,7 @@ static llvm::Value *emitCopyToScratchpad
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
Address SrcDataAddr(
@@ -2051,8 +2055,8 @@ static llvm::Value *emitInterWarpCopyFun
ArrayRef<const Expr *> Privates,
QualType ReductionArrayTy,
SourceLocation Loc) {
- auto &C = CGM.getContext();
- auto &M = CGM.getModule();
+ ASTContext &C = CGM.getContext();
+ llvm::Module &M = CGM.getModule();
// ReduceList: thread local Reduce list.
// At the stage of the computation when this function is called, partially
@@ -2068,7 +2072,8 @@ static llvm::Value *emitInterWarpCopyFun
Args.push_back(&ReduceListArg);
Args.push_back(&NumWarpsArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
"_omp_reduction_inter_warp_copy_func", &CGM.getModule());
@@ -2077,7 +2082,7 @@ static llvm::Value *emitInterWarpCopyFun
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
// This array is used as a medium to transfer, one reduce element at a time,
// the data from the first lane of every warp to lanes in the first warp
@@ -2086,7 +2091,7 @@ static llvm::Value *emitInterWarpCopyFun
// for reduced latency, as well as to have a distinct copy for concurrently
// executing target regions. The array is declared with common linkage so
// as to be shared across compilation units.
- const char *TransferMediumName =
+ StringRef TransferMediumName =
"__openmp_nvptx_data_transfer_temporary_storage";
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
@@ -2099,14 +2104,15 @@ static llvm::Value *emitInterWarpCopyFun
llvm::Constant::getNullValue(Ty), TransferMediumName,
/*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
SharedAddressSpace);
+ CGM.addCompilerUsedGlobal(TransferMedium);
}
// Get the CUDA thread id of the current OpenMP thread on the GPU.
- auto *ThreadID = getNVPTXThreadID(CGF);
+ llvm::Value *ThreadID = getNVPTXThreadID(CGF);
// nvptx_lane_id = nvptx_id % warpsize
- auto *LaneID = getNVPTXLaneID(CGF);
+ llvm::Value *LaneID = getNVPTXLaneID(CGF);
// nvptx_warp_id = nvptx_id / warpsize
- auto *WarpID = getNVPTXWarpID(CGF);
+ llvm::Value *WarpID = getNVPTXWarpID(CGF);
Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
Address LocalReduceList(
@@ -2117,7 +2123,7 @@ static llvm::Value *emitInterWarpCopyFun
CGF.getPointerAlign());
unsigned Idx = 0;
- for (auto &Private : Privates) {
+ for (const Expr *Private : Privates) {
//
// Warp master copies reduce element to transfer medium in __shared__
// memory.
@@ -2127,8 +2133,7 @@ static llvm::Value *emitInterWarpCopyFun
llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
// if (lane_id == 0)
- auto IsWarpMaster =
- Bld.CreateICmpEQ(LaneID, Bld.getInt32(0), "warp_master");
+ llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
CGF.EmitBlock(ThenBB);
@@ -2170,7 +2175,7 @@ static llvm::Value *emitInterWarpCopyFun
llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation());
- auto *NumActiveThreads = Bld.CreateNSWMul(
+ llvm::Value *NumActiveThreads = Bld.CreateNSWMul(
NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads");
// named_barrier_sync(ParallelBarrierID, num_active_threads)
syncParallelThreads(CGF, NumActiveThreads);
@@ -2183,7 +2188,7 @@ static llvm::Value *emitInterWarpCopyFun
llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
// Up to 32 threads in warp 0 are active.
- auto IsActiveThread =
+ llvm::Value *IsActiveThread =
Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
@@ -2223,7 +2228,7 @@ static llvm::Value *emitInterWarpCopyFun
// While warp 0 copies values from transfer medium, all other warps must
// wait.
syncParallelThreads(CGF, NumActiveThreads);
- Idx++;
+ ++Idx;
}
CGF.FinishFunction();
@@ -2299,7 +2304,7 @@ static llvm::Value *emitInterWarpCopyFun
static llvm::Value *emitShuffleAndReduceFunction(
CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
// Thread local Reduce list used to host the values of data to be reduced.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
@@ -2319,7 +2324,8 @@ static llvm::Value *emitShuffleAndReduce
Args.push_back(&RemoteLaneOffsetArg);
Args.push_back(&AlgoVerArg);
- auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
@@ -2328,7 +2334,7 @@ static llvm::Value *emitShuffleAndReduce
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
- auto &Bld = CGF.Builder;
+ CGBuilderTy &Bld = CGF.Builder;
Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
Address LocalReduceList(
@@ -2385,21 +2391,19 @@ static llvm::Value *emitShuffleAndReduce
// When AlgoVer==2, the third conjunction has only the second part to be
// evaluated during runtime. Other conjunctions evaluates to false
// during compile time.
- auto CondAlgo0 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(0));
+ llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
- auto Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
- auto CondAlgo1 = Bld.CreateAnd(
+ llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+ llvm::Value *CondAlgo1 = Bld.CreateAnd(
Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
- auto Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
- auto CondAlgo2 = Bld.CreateAnd(
- Algo2,
- Bld.CreateICmpEQ(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)),
- Bld.getInt16(0)));
+ llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
+ llvm::Value *CondAlgo2 = Bld.CreateAnd(
+ Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
CondAlgo2 = Bld.CreateAnd(
CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
- auto CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
+ llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
@@ -2425,7 +2429,7 @@ static llvm::Value *emitShuffleAndReduce
// if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
// Reduce list.
Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
- auto CondCopy = Bld.CreateAnd(
+ llvm::Value *CondCopy = Bld.CreateAnd(
Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then");
@@ -2702,12 +2706,12 @@ void CGOpenMPRuntimeNVPTX::emitReduction
assert((TeamsReduction || ParallelReduction) &&
"Invalid reduction selection in emitReduction.");
- auto &C = CGM.getContext();
+ ASTContext &C = CGM.getContext();
// 1. Build a list of reduction variables.
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
auto Size = RHSExprs.size();
- for (auto *E : Privates) {
+ for (const Expr *E : Privates) {
if (E->getType()->isVariablyModifiedType())
// Reserve place for array size.
++Size;
@@ -2743,20 +2747,20 @@ void CGOpenMPRuntimeNVPTX::emitReduction
}
// 2. Emit reduce_func().
- auto *ReductionFn = emitReductionFunction(
+ llvm::Value *ReductionFn = emitReductionFunction(
CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
Privates, LHSExprs, RHSExprs, ReductionOps);
// 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
// RedList, shuffle_reduce_func, interwarp_copy_func);
- auto *ThreadId = getThreadID(CGF, Loc);
- auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
- auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ llvm::Value *ThreadId = getThreadID(CGF, Loc);
+ llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+ llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
ReductionList.getPointer(), CGF.VoidPtrTy);
- auto *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
+ llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
- auto *InterWarpCopyFn =
+ llvm::Value *InterWarpCopyFn =
emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
llvm::Value *Res = nullptr;
@@ -2774,9 +2778,9 @@ void CGOpenMPRuntimeNVPTX::emitReduction
}
if (TeamsReduction) {
- auto *ScratchPadCopyFn =
+ llvm::Value *ScratchPadCopyFn =
emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc);
- auto *LoadAndReduceFn = emitReduceScratchpadFunction(
+ llvm::Value *LoadAndReduceFn = emitReduceScratchpadFunction(
CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
llvm::Value *Args[] = {ThreadId,
@@ -2793,25 +2797,26 @@ void CGOpenMPRuntimeNVPTX::emitReduction
}
// 5. Build switch(res)
- auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
- auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1);
+ llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
+ llvm::SwitchInst *SwInst =
+ CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1);
// 6. Build case 1: where we have reduced values in the master
// thread in each team.
// __kmpc_end_reduce{_nowait}(<gtid>);
// break;
- auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
+ llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
CGF.EmitBlock(Case1BB);
// Add emission of __kmpc_end_reduce{_nowait}(<gtid>);
llvm::Value *EndArgs[] = {ThreadId};
- auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps,
+ auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
this](CodeGenFunction &CGF, PrePostActionTy &Action) {
auto IPriv = Privates.begin();
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
- for (auto *E : ReductionOps) {
+ for (const Expr *E : ReductionOps) {
emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
cast<DeclRefExpr>(*IRHS));
++IPriv;
@@ -2850,11 +2855,10 @@ CGOpenMPRuntimeNVPTX::translateParameter
enum { NVPTX_local_addr = 5 };
QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr));
ArgType = QC.apply(CGM.getContext(), ArgType);
- if (isa<ImplicitParamDecl>(NativeParam)) {
+ if (isa<ImplicitParamDecl>(NativeParam))
return ImplicitParamDecl::Create(
CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
- }
return ParmVarDecl::Create(
CGM.getContext(),
const_cast<DeclContext *>(NativeParam->getDeclContext()),
@@ -2945,12 +2949,12 @@ llvm::Function *CGOpenMPRuntimeNVPTX::cr
WrapperArgs.emplace_back(&ParallelLevelArg);
WrapperArgs.emplace_back(&WrapperArg);
- auto &CGFI =
+ const CGFunctionInfo &CGFI =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
- OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule());
+ Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
Fn->setDoesNotRecurse();
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=330154&r1=330153&r2=330154&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Mon Apr 16 13:16:21 2018
@@ -35,7 +35,7 @@ private:
class WorkerFunctionState {
public:
llvm::Function *WorkerFn;
- const CGFunctionInfo *CGFI;
+ const CGFunctionInfo &CGFI;
SourceLocation Loc;
WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
Modified: cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp?rev=330154&r1=330153&r2=330154&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp Mon Apr 16 13:16:21 2018
@@ -283,7 +283,7 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_PTR:%.+]] = bitcast double* [[REMOTE_ELT]] to i8*
// CHECK: store i8* [[REMOTE_ELT_PTR]], i8** [[REMOTE_ELT_REF]], align
//
- // CHECK: [[REDUCE:%.+]] = icmp eq i32 [[SHOULD_REDUCE]], 1
+ // CHECK: [[REDUCE:%.+]] = icmp ne i32 [[SHOULD_REDUCE]], 0
// CHECK: br i1 [[REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]]
//
// CHECK: [[DO_REDUCE]]
@@ -658,7 +658,7 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_PTR:%.+]] = bitcast float* [[REMOTE_ELT2]] to i8*
// CHECK: store i8* [[REMOTE_ELT_PTR]], i8** [[REMOTE_ELT_REF]], align
//
- // CHECK: [[REDUCE:%.+]] = icmp eq i32 [[SHOULD_REDUCE]], 1
+ // CHECK: [[REDUCE:%.+]] = icmp ne i32 [[SHOULD_REDUCE]], 0
// CHECK: br i1 [[REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]]
//
// CHECK: [[DO_REDUCE]]
@@ -1087,7 +1087,7 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_PTR:%.+]] = bitcast i16* [[REMOTE_ELT2]] to i8*
// CHECK: store i8* [[REMOTE_ELT_PTR]], i8** [[REMOTE_ELT_REF]], align
//
- // CHECK: [[REDUCE:%.+]] = icmp eq i32 [[SHOULD_REDUCE]], 1
+ // CHECK: [[REDUCE:%.+]] = icmp ne i32 [[SHOULD_REDUCE]], 0
// CHECK: br i1 [[REDUCE]], label {{%?}}[[DO_REDUCE:.+]], label {{%?}}[[REDUCE_ELSE:.+]]
//
// CHECK: [[DO_REDUCE]]
More information about the cfe-commits
mailing list