[PATCH] D17877: [OpenMP] Base support for target directive codegen on NVPTX device.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 3 21:11:00 PST 2016


ABataev added inline comments.

================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3790
@@ -3923,3 +3789,3 @@
   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
-  // mangled name of the function that encloses the target region and BB is the
+  // mangled name of the function that encloses the target region, BB is the
   // line number of the target region.
----------------
Revert back

================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3796-3808
@@ -3929,13 +3795,15 @@
   unsigned Line;
+  SmallString<256> OutlinedFnName;
   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
                            Line);
-  SmallString<64> EntryFnName;
   {
-    llvm::raw_svector_ostream OS(EntryFnName);
+    llvm::raw_svector_ostream OS(OutlinedFnName);
     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
   }
 
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
+  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, OutlinedFnName);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
----------------
What is the purpose of these massive changes?

================
Comment at: lib/CodeGen/CGOpenMPRuntime.cpp:3918-3919
@@ -3917,4 +3786,1 @@
-
-  // Create a unique name for the entry function using the source location
-  // information of the current target region. The name will be something like:
   //
----------------
Restore it back, please.

================
Comment at: lib/CodeGen/CGOpenMPRuntime.h:50-54
@@ -49,2 +49,7 @@
 class CGOpenMPRuntime {
+protected:
   CodeGenModule &CGM;
+
+  enum OpenMPRTLFunction {
+    /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
+    /// kmpc_micro microtask, ...);
----------------
No way!!! Revert it back, please. No need to expose all these stuff in header file

================
Comment at: lib/CodeGen/CGOpenMPRuntime.h:383
@@ -253,2 +382,3 @@
 
+protected:
   /// \brief Creates offloading entry for the provided entry ID \a ID,
----------------
Please, gather all 'protected' members in a single 'protected' section

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:30
@@ +29,3 @@
+void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
+                                              llvm::Constant *Addr,
+                                              uint64_t Size) {
----------------
If you expect 'llvm::Function*' here, why you can't change it to 'llvm::Function*'?

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:47
@@ +46,3 @@
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+  return;
+}
----------------
Remove, not required

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:133
@@ +132,3 @@
+
+  CodeGenFunction CGF(CGM, true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
----------------
Comment for 'true' value

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp:242
@@ +241,3 @@
+
+void CGOpenMPRuntimeNVPTX::emitEntryFooter(CodeGenFunction &CGF,
+                                           EntryFunctionState &EST) {
----------------
What about exceptions? Do you plan to support them? If yes, add tests for classes with constructors/destructors and exceptions

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:30
@@ +29,3 @@
+
+private:
+  /// \brief Creates offloading entry for the provided entry ID \a ID,
----------------
Please, gather all private members in single 'private' section, protected to single 'protected' and public to single 'public'

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:36-41
@@ +35,8 @@
+
+  enum OpenMPRTLFunctionNVPTX {
+    /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
+    /// kmp_int32 thread_limit);
+    OMPRTL_NVPTX__kmpc_kernel_init = OMPRTL_last + 1,
+  };
+
+  /// \brief Returns specified OpenMP runtime function for the current OpenMP
----------------
No way!!! This must go to .cpp file!

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:68-102
@@ +67,37 @@
+
+  /// \brief Get the GPU warp size.
+  llvm::Value *GetNVPTXWarpSize(CodeGenFunction &CGF) {
+    CGBuilderTy &Bld = CGF.Builder;
+    return Bld.CreateCall(
+        llvm::Intrinsic::getDeclaration(
+            &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
+        {}, "nvptx_warp_size");
+  }
+
+  /// \brief Get the id of the current thread on the GPU.
+  llvm::Value *GetNVPTXThreadID(CodeGenFunction &CGF) {
+    CGBuilderTy &Bld = CGF.Builder;
+    return Bld.CreateCall(
+        llvm::Intrinsic::getDeclaration(
+            &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+        {}, "nvptx_tid");
+  }
+
+  // \brief Get the maximum number of threads in a block of the GPU.
+  llvm::Value *GetNVPTXNumThreads(CodeGenFunction &CGF) {
+    CGBuilderTy &Bld = CGF.Builder;
+    return Bld.CreateCall(
+        llvm::Intrinsic::getDeclaration(
+            &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+        {}, "nvptx_num_threads");
+  }
+
+  /// \brief Get barrier to synchronize all threads in a block.
+  void GetNVPTXCTABarrier(CodeGenFunction &CGF) {
+    CGBuilderTy &Bld = CGF.Builder;
+    Bld.CreateCall(llvm::Intrinsic::getDeclaration(
+                       &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0),
+                   {});
+  }
+
+  // \brief Synchronize all GPU threads in a block.
----------------
Move all implementations to .cpp file

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:117-126
@@ +116,12 @@
+  ///      If NumThreads is 1024, master id is 992.
+  llvm::Value *GetMasterThreadID(CodeGenFunction &CGF) {
+    CGBuilderTy &Bld = CGF.Builder;
+    llvm::Value *NumThreads = GetNVPTXNumThreads(CGF);
+
+    // We assume that the warp size is a power of 2.
+    llvm::Value *Mask = Bld.CreateSub(GetNVPTXWarpSize(CGF), Bld.getInt32(1));
+
+    return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
+                         Bld.CreateNot(Mask), "master_tid");
+  }
+
----------------
Implementation must be in .cpp

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:129-131
@@ +128,5 @@
+private:
+  // NVPTX Address space
+  enum ADDRESS_SPACE { GLOBAL_ADDRESS_SPACE = 1, SHARED_ADDRESS_SPACE = 3 };
+
+  // Master-worker control state.
----------------
Again, move it to .cpp

================
Comment at: lib/CodeGen/CGOpenMPRuntimeNVPTX.h:138-172
@@ +137,37 @@
+
+  class EntryFunctionState {
+  public:
+    llvm::BasicBlock *ExitBB;
+
+    EntryFunctionState() : ExitBB(nullptr){};
+  };
+
+  class WorkerFunctionState {
+  public:
+    llvm::Function *WorkerFn;
+    const CGFunctionInfo *CGFI;
+
+    WorkerFunctionState(CodeGenModule &CGM) : WorkerFn(nullptr), CGFI(nullptr) {
+      createWorkerFunction(CGM);
+    };
+
+  private:
+    void createWorkerFunction(CodeGenModule &CGM) {
+      auto &Ctx = CGM.getContext();
+
+      // Create an worker function with no arguments.
+      FunctionType::ExtInfo EI;
+      CGFI = &CGM.getTypes().arrangeFreeFunctionDeclaration(
+          Ctx.VoidTy, {}, EI, /*isVariadic=*/false);
+
+      WorkerFn =
+          llvm::Function::Create(CGM.getTypes().GetFunctionType(*CGFI),
+                                 llvm::GlobalValue::InternalLinkage,
+                                 /* placeholder */ "_worker", &CGM.getModule());
+      CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
+      WorkerFn->setLinkage(llvm::GlobalValue::InternalLinkage);
+      WorkerFn->addFnAttr(llvm::Attribute::NoInline);
+    }
+  };
+
+  /// \brief Initialize master-worker control state.
----------------
You can leave just declaration here, as this class is used by reference always. Definition must be moved to .cpp


http://reviews.llvm.org/D17877





More information about the cfe-commits mailing list