[cfe-commits] r141304 - in /cfe/trunk: lib/CodeGen/CGCUDANV.cpp lib/CodeGen/CGCUDARuntime.h lib/CodeGen/CodeGenFunction.cpp test/CodeGenCUDA/device-stub.cu

Peter Collingbourne peter at pcc.me.uk
Thu Oct 6 11:51:56 PDT 2011


Author: pcc
Date: Thu Oct  6 13:51:56 2011
New Revision: 141304

URL: http://llvm.org/viewvc/llvm-project?rev=141304&view=rev
Log:
CUDA: IR generation support for device stubs

Added:
    cfe/trunk/test/CodeGenCUDA/device-stub.cu
Modified:
    cfe/trunk/lib/CodeGen/CGCUDANV.cpp
    cfe/trunk/lib/CodeGen/CGCUDARuntime.h
    cfe/trunk/lib/CodeGen/CodeGenFunction.cpp

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=141304&r1=141303&r2=141304&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Thu Oct  6 13:51:56 2011
@@ -13,6 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGCUDARuntime.h"
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
+#include "clang/AST/Decl.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/CallSite.h"
+
+#include <vector>
 
 using namespace clang;
 using namespace CodeGen;
@@ -20,13 +29,96 @@
 namespace {
 
 class CGNVCUDARuntime : public CGCUDARuntime {
+
+private:
+  llvm::Type *IntTy, *SizeTy;
+  llvm::PointerType *CharPtrTy, *VoidPtrTy;
+
+  llvm::Constant *getSetupArgumentFn() const;
+  llvm::Constant *getLaunchFn() const;
+
 public:
   CGNVCUDARuntime(CodeGenModule &CGM);
+
+  void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
 };
 
 }
 
 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
+  CodeGen::CodeGenTypes &Types = CGM.getTypes();
+  ASTContext &Ctx = CGM.getContext();
+
+  IntTy = Types.ConvertType(Ctx.IntTy);
+  SizeTy = Types.ConvertType(Ctx.getSizeType());
+
+  CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
+  VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
+}
+
+llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
+  // cudaError_t cudaSetupArgument(void *, size_t, size_t)
+  std::vector<llvm::Type*> Params;
+  Params.push_back(VoidPtrTy);
+  Params.push_back(SizeTy);
+  Params.push_back(SizeTy);
+  return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
+                                                           Params, false),
+                                   "cudaSetupArgument");
+}
+
+llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
+  // cudaError_t cudaLaunch(char *)
+  std::vector<llvm::Type*> Params;
+  Params.push_back(CharPtrTy);
+  return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
+                                                           Params, false),
+                                   "cudaLaunch");
+}
+
+void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
+                                         FunctionArgList &Args) {
+  // Build the argument value list and the argument stack struct type.
+  llvm::SmallVector<llvm::Value *, 16> ArgValues;
+  std::vector<llvm::Type *> ArgTypes;
+  for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
+       I != E; ++I) {
+    llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
+    ArgValues.push_back(V);
+    assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
+    ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
+  }
+  llvm::StructType *ArgStackTy = llvm::StructType::get(
+      CGF.getLLVMContext(), ArgTypes);
+
+  llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
+
+  // Emit the calls to cudaSetupArgument
+  llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
+  for (unsigned I = 0, E = Args.size(); I != E; ++I) {
+    llvm::Value *Args[3];
+    llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
+    Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
+    Args[1] = CGF.Builder.CreateIntCast(
+        llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
+        SizeTy, false);
+    Args[2] = CGF.Builder.CreateIntCast(
+        llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
+        SizeTy, false);
+    llvm::CallSite CS = CGF.EmitCallOrInvoke(cudaSetupArgFn, Args);
+    llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
+    llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
+    CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
+    CGF.EmitBlock(NextBlock);
+  }
+
+  // Emit the call to cudaLaunch
+  llvm::Constant *cudaLaunchFn = getLaunchFn();
+  llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
+  CGF.EmitCallOrInvoke(cudaLaunchFn, Arg);
+  CGF.EmitBranch(EndBlock);
+
+  CGF.EmitBlock(EndBlock);
 }
 
 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {

Modified: cfe/trunk/lib/CodeGen/CGCUDARuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDARuntime.h?rev=141304&r1=141303&r2=141304&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDARuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGCUDARuntime.h Thu Oct  6 13:51:56 2011
@@ -24,6 +24,7 @@
 
 class CodeGenFunction;
 class CodeGenModule;
+class FunctionArgList;
 class ReturnValueSlot;
 class RValue;
 
@@ -39,6 +40,9 @@
                                         const CUDAKernelCallExpr *E,
                                         ReturnValueSlot ReturnValue);
   
+  virtual void EmitDeviceStubBody(CodeGenFunction &CGF,
+                                  FunctionArgList &Args) = 0;
+
 };
 
 /// Creates an instance of a CUDA runtime class.

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.cpp?rev=141304&r1=141303&r2=141304&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.cpp Thu Oct  6 13:51:56 2011
@@ -13,6 +13,7 @@
 
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
+#include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGDebugInfo.h"
 #include "CGException.h"
@@ -404,6 +405,10 @@
     EmitDestructorBody(Args);
   else if (isa<CXXConstructorDecl>(FD))
     EmitConstructorBody(Args);
+  else if (getContext().getLangOptions().CUDA &&
+           !CGM.getCodeGenOpts().CUDAIsDevice &&
+           FD->hasAttr<CUDAGlobalAttr>())
+    CGM.getCUDARuntime().EmitDeviceStubBody(*this, Args);
   else
     EmitFunctionBody(Args);
 

Added: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=141304&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (added)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Thu Oct  6 13:51:56 2011
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+
+#include "../SemaCUDA/cuda.h"
+
+// Test that we build the correct number of calls to cudaSetupArgument followed
+// by a call to cudaLaunch.
+
+// CHECK: define{{.*}}kernelfunc
+// CHECK: call{{.*}}cudaSetupArgument
+// CHECK: call{{.*}}cudaSetupArgument
+// CHECK: call{{.*}}cudaSetupArgument
+// CHECK: call{{.*}}cudaLaunch
+__global__ void kernelfunc(int i, int j, int k) {}





More information about the cfe-commits mailing list