[PATCH] D17877: [OpenMP] Base support for target directive codegen on NVPTX device.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 3 21:11:00 PST 2016
ABataev added inline comments.
================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3790
@@ -3923,3 +3789,3 @@
// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
- // mangled name of the function that encloses the target region and BB is the
+ // mangled name of the function that encloses the target region, BB is the
// line number of the target region.
----------------
Revert back
================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3796-3808
@@ -3929,13 +3795,15 @@
unsigned Line;
+ SmallString<256> OutlinedFnName;
getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
Line);
- SmallString<64> EntryFnName;
{
- llvm::raw_svector_ostream OS(EntryFnName);
+ llvm::raw_svector_ostream OS(OutlinedFnName);
OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
<< llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
}
+ const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
CodeGenFunction CGF(CGM, true);
- CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
+ CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, OutlinedFnName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
----------------
What is the purpose of these massive changes?
================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3918-3919
@@ -3917,4 +3786,1 @@
-
- // Create a unique name for the entry function using the source location
- // information of the current target region. The name will be something like:
//
----------------
Restore it back, please.
================
Comment at: lib/CodeGen/CGOpenMPRuntime.h:50-54
@@ -49,2 +49,7 @@
class CGOpenMPRuntime {
+protected:
CodeGenModule &CGM;
+
+ enum OpenMPRTLFunction {
+ /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
+ /// kmpc_micro microtask, ...);
----------------
No way!!! Revert it back, please. No need to expose all these stuff in header file
================
Comment at: lib/CodeGen/CGOpenMPRuntime.h:383
@@ -253,2 +382,3 @@
+protected:
/// \brief Creates offloading entry for the provided entry ID \a ID,
----------------
Please, gather all 'protected' members in a single 'protected' section
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:30
@@ +29,3 @@
+void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
+ llvm::Constant *Addr,
+ uint64_t Size) {
----------------
If you expect 'llvm::Function*' here, why you can't change it to 'llvm::Function*'?
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:47
@@ +46,3 @@
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+ return;
+}
----------------
Remove, not required
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:133
@@ +132,3 @@
+
+ CodeGenFunction CGF(CGM, true);
+ CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
----------------
Comment for 'true' value
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:242
@@ +241,3 @@
+
+void CGOpenMPRuntimeNVPTX::emitEntryFooter(CodeGenFunction &CGF,
+ EntryFunctionState &EST) {
----------------
What about exceptions? Do you plan to support them? If yes, add tests for classes with constructors/destructors and exceptions
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:30
@@ +29,3 @@
+
+private:
+ /// \brief Creates offloading entry for the provided entry ID \a ID,
----------------
Please, gather all private members in single 'private' section, protected to single 'protected' and public to single 'public'
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:36-41
@@ +35,8 @@
+
+ enum OpenMPRTLFunctionNVPTX {
+ /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
+ /// kmp_int32 thread_limit);
+ OMPRTL_NVPTX__kmpc_kernel_init = OMPRTL_last + 1,
+ };
+
+ /// \brief Returns specified OpenMP runtime function for the current OpenMP
----------------
No way!!! This must go to .cpp file!
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:68-102
@@ +67,37 @@
+
+ /// \brief Get the GPU warp size.
+ llvm::Value *GetNVPTXWarpSize(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateCall(
+ llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
+ {}, "nvptx_warp_size");
+ }
+
+ /// \brief Get the id of the current thread on the GPU.
+ llvm::Value *GetNVPTXThreadID(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateCall(
+ llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+ {}, "nvptx_tid");
+ }
+
+ // \brief Get the maximum number of threads in a block of the GPU.
+ llvm::Value *GetNVPTXNumThreads(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateCall(
+ llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+ {}, "nvptx_num_threads");
+ }
+
+ /// \brief Get barrier to synchronize all threads in a block.
+ void GetNVPTXCTABarrier(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ Bld.CreateCall(llvm::Intrinsic::getDeclaration(
+ &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0),
+ {});
+ }
+
+ // \brief Synchronize all GPU threads in a block.
----------------
Move all implementations to .cpp file
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:117-126
@@ +116,12 @@
+ /// If NumThreads is 1024, master id is 992.
+ llvm::Value *GetMasterThreadID(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::Value *NumThreads = GetNVPTXNumThreads(CGF);
+
+ // We assume that the warp size is a power of 2.
+ llvm::Value *Mask = Bld.CreateSub(GetNVPTXWarpSize(CGF), Bld.getInt32(1));
+
+ return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
+ Bld.CreateNot(Mask), "master_tid");
+ }
+
----------------
Implementation must be in .cpp
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:129-131
@@ +128,5 @@
+private:
+ // NVPTX Address space
+ enum ADDRESS_SPACE { GLOBAL_ADDRESS_SPACE = 1, SHARED_ADDRESS_SPACE = 3 };
+
+ // Master-worker control state.
----------------
Again, move it to .cpp
================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:138-172
@@ +137,37 @@
+
+ class EntryFunctionState {
+ public:
+ llvm::BasicBlock *ExitBB;
+
+ EntryFunctionState() : ExitBB(nullptr){};
+ };
+
+ class WorkerFunctionState {
+ public:
+ llvm::Function *WorkerFn;
+ const CGFunctionInfo *CGFI;
+
+ WorkerFunctionState(CodeGenModule &CGM) : WorkerFn(nullptr), CGFI(nullptr) {
+ createWorkerFunction(CGM);
+ };
+
+ private:
+ void createWorkerFunction(CodeGenModule &CGM) {
+ auto &Ctx = CGM.getContext();
+
+ // Create an worker function with no arguments.
+ FunctionType::ExtInfo EI;
+ CGFI = &CGM.getTypes().arrangeFreeFunctionDeclaration(
+ Ctx.VoidTy, {}, EI, /*isVariadic=*/false);
+
+ WorkerFn =
+ llvm::Function::Create(CGM.getTypes().GetFunctionType(*CGFI),
+ llvm::GlobalValue::InternalLinkage,
+ /* placeholder */ "_worker", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
+ WorkerFn->setLinkage(llvm::GlobalValue::InternalLinkage);
+ WorkerFn->addFnAttr(llvm::Attribute::NoInline);
+ }
+ };
+
+ /// \brief Initialize master-worker control state.
----------------
You can leave just declaration here, as this class is used by reference always. Definition must be moved to .cpp
http://reviews.llvm.org/D17877
More information about the cfe-commits
mailing list