[PATCH] D16664: [CUDA] Generate CUDA's printf alloca in its function's entry block.

Justin Lebar via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 28 10:25:41 PST 2016


jlebar updated this revision to Diff 46293.
jlebar added a comment.

Address echristo's review comments.


http://reviews.llvm.org/D16664

Files:
  lib/CodeGen/CGCUDABuiltin.cpp
  test/CodeGenCUDA/printf.cu

Index: test/CodeGenCUDA/printf.cu
===================================================================
--- test/CodeGenCUDA/printf.cu
+++ test/CodeGenCUDA/printf.cu
@@ -10,9 +10,9 @@
 
 // Check a simple call to printf end-to-end.
 __device__ int CheckSimple() {
+  // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca i8, i32 4, align 4
   // CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt
   const char* fmt = "%d";
-  // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca i8, i32 4, align 4
   // CHECK: [[PTR:%[0-9]+]] = getelementptr i8, i8* [[BUF]], i32 0
   // CHECK: [[CAST:%[0-9]+]] = bitcast i8* [[PTR]] to i32*
   // CHECK: store i32 42, i32* [[CAST]], align 4
@@ -51,3 +51,14 @@
   // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
   printf("hello, world!");
 }
+
+// Check that printf's alloca happens in the entry block, not inside the if
+// statement.
+__device__ bool foo();
+__device__ void CheckAllocaIsInEntryBlock() {
+  // CHECK: alloca i8, i32 4, align 4
+  // CHECK: call {{.*}} @_Z3foov()
+  if (foo()) {
+    printf("%d", 42);
+  }
+}
Index: lib/CodeGen/CGCUDABuiltin.cpp
===================================================================
--- lib/CodeGen/CGCUDABuiltin.cpp
+++ lib/CodeGen/CGCUDABuiltin.cpp
@@ -102,9 +102,15 @@
     // If there are no args, pass a null pointer to vprintf.
     BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
   } else {
-    BufferPtr = Builder.Insert(new llvm::AllocaInst(
+    // Insert our alloca not into the current BB, but into the function's entry
+    // block.  This is important because nvvm doesn't support alloca -- if we
+    // put the alloca anywhere else, llvm may eventually output
+    // stacksave/stackrestore intrinsics, which cause our nvvm backend to choke.
+    auto *Alloca = new llvm::AllocaInst(
         llvm::Type::getInt8Ty(Ctx), llvm::ConstantInt::get(Int32Ty, BufSize),
-        BufAlign, "printf_arg_buf"));
+        BufAlign, "printf_arg_buf");
+    Alloca->insertAfter(AllocaInsertPt);
+    BufferPtr = Alloca;
 
     unsigned Offset = 0;
     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D16664.46293.patch
Type: text/x-patch
Size: 2103 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20160128/7d3d19d2/attachment.bin>


More information about the cfe-commits mailing list