PATCH: fix clang to emit correct addrspacecast for CUDA

Jingyue Wu jingyue at google.com
Fri Mar 21 11:20:55 PDT 2014


Hi,

Static local variables in CUDA can be declared with address space
qualifiers, such as __shared__. Therefore, the codegen needs to potentially
addrspacecast a static local variable to the type expected by its
declaration. Peter did something similar for global variables in r157167.

All clang tests passed.

Justin: The NVPTX backend support for addrspacecast seems not complete. We
can send you follow-up patches once this one gets in.

Jingyue
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20140321/fc737cb2/attachment.html>
-------------- next part --------------
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index e16845c..430b56c 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -183,7 +183,7 @@ static std::string GetStaticDeclName(CodeGenFunction &CGF, const VarDecl &D,
   return ContextName + Separator + D.getNameAsString();
 }
 
-llvm::GlobalVariable *
+llvm::Constant *
 CodeGenFunction::CreateStaticVarDecl(const VarDecl &D,
                                      const char *Separator,
                                      llvm::GlobalValue::LinkageTypes Linkage) {
@@ -212,6 +212,13 @@ CodeGenFunction::CreateStaticVarDecl(const VarDecl &D,
   if (D.getTLSKind())
     CGM.setTLSMode(GV, D);
 
+  // Make sure the result is of the correct type.
+  unsigned ExpectedAddrSpace = CGM.getContext().getTargetAddressSpace(Ty);
+  if (AddrSpace != ExpectedAddrSpace) {
+    llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace);
+    return llvm::ConstantExpr::getAddrSpaceCast(GV, PTy);
+  }
+
   return GV;
 }
 
@@ -298,12 +305,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   llvm::Constant *addr =
     CGM.getStaticLocalDeclAddress(&D);
 
-  llvm::GlobalVariable *var;
-  if (addr) {
-    var = cast<llvm::GlobalVariable>(addr->stripPointerCasts());
-  } else {
-    addr = var = CreateStaticVarDecl(D, ".", Linkage);
-  }
+  if (!addr)
+    addr = CreateStaticVarDecl(D, ".", Linkage);
 
   // Store into LocalDeclMap before generating initializer to handle
   // circular references.
@@ -319,6 +322,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   // Save the type in case adding the initializer forces a type change.
   llvm::Type *expectedType = addr->getType();
 
+  llvm::GlobalVariable *var =
+    cast<llvm::GlobalVariable>(addr->stripPointerCasts());
   // If this value has an initializer, emit it.
   if (D.getInit())
     var = AddInitializerToStaticVarDecl(D, var);
@@ -339,7 +344,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   //
   // FIXME: It is really dangerous to store this in the map; if anyone
   // RAUW's the GV uses of this constant will be invalid.
-  llvm::Constant *castedAddr = llvm::ConstantExpr::getBitCast(var, expectedType);
+  llvm::Constant *castedAddr =
+    llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(var, expectedType);
   DMEntry = castedAddr;
   CGM.setStaticLocalDeclAddress(&D, castedAddr);
 
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index bbc4fae..c583a30 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -2336,9 +2336,9 @@ public:
 
   /// CreateStaticVarDecl - Create a zero-initialized LLVM global for
   /// a static local variable.
-  llvm::GlobalVariable *CreateStaticVarDecl(const VarDecl &D,
-                                            const char *Separator,
-                                       llvm::GlobalValue::LinkageTypes Linkage);
+  llvm::Constant *CreateStaticVarDecl(const VarDecl &D,
+                                      const char *Separator,
+                                      llvm::GlobalValue::LinkageTypes Linkage);
 
   /// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the
   /// global variable that has already been created for it.  If the initializer
diff --git a/test/CodeGenCUDA/address-space-conversion.cu b/test/CodeGenCUDA/address-space-conversion.cu
new file mode 100644
index 0000000..9b9ae43
--- /dev/null
+++ b/test/CodeGenCUDA/address-space-conversion.cu
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck %s
+
+// Verifies LLVM generates correct addrspacecast instructions for CUDA code.
+
+#include "../SemaCUDA/cuda.h"
+
+struct MyStruct {
+  int data1;
+  int data2;
+};
+
+// CHECK: @_ZZ5func0vE1a = internal addrspace(3) global %struct.MyStruct zeroinitializer
+// CHECK: @_ZZ5func1vE1a = internal addrspace(3) global float 0.000000e+00
+// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
+// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
+// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float 0.000000e+00
+
+__device__ void func0() {
+  __shared__ MyStruct a;
+  MyStruct *ap = &a; // composite type
+  ap->data1 = 1;
+  ap->data2 = 2;
+}
+// CHECK: define void @_Z5func0v()
+// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap
+
+__device__ void callee(float *ap) {
+  *ap = 1.0f;
+}
+
+__device__ void func1() {
+  __shared__ float a;
+  callee(&a); // implicit cast from parameters
+}
+// CHECK: define void @_Z5func1v()
+// CHECK: call void @_Z6calleePf(float* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*))
+
+__device__ void func2() {
+  __shared__ float a[256];
+  float *ap = &a[128]; // implicit cast from a decayed array
+  *ap = 1.0f;
+}
+// CHECK: define void @_Z5func2v()
+// CHECK: store float* getelementptr inbounds ([256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i64 128), float** %ap
+
+__device__ void func3() {
+  __shared__ float a;
+  float *ap = reinterpret_cast<float *>(&a); // explicit cast
+  *ap = 1.0f;
+}
+// CHECK: define void @_Z5func3v()
+// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap
+
+__device__ void func4() {
+  __shared__ float a;
+  float *ap = (float *)&a; // explicit c-style cast
+  *ap = 1.0f;
+}
+// CHECK: define void @_Z5func4v()
+// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap
+
+__shared__ float b;
+
+__device__ float *func5() {
+  return &b; // implicit cast from a return value
+}
+// CHECK: define float* @_Z5func5v()
+// CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*)
diff --git a/test/CodeGenCUDA/address-spaces.cu b/test/CodeGenCUDA/address-spaces.cu
index 0434452..a560965 100644
--- a/test/CodeGenCUDA/address-spaces.cu
+++ b/test/CodeGenCUDA/address-spaces.cu
@@ -22,15 +22,15 @@ __device__ void foo() {
   k++;
 
   static int li;
-  // CHECK: load i32 addrspace(1)* @_ZZ3foovE2li
+  // CHECK: load i32* addrspacecast (i32 addrspace(1)* @_ZZ3foovE2li to i32*)
   li++;
 
   __constant__ int lj;
-  // CHECK: load i32 addrspace(4)* @_ZZ3foovE2lj
+  // CHECK: load i32* addrspacecast (i32 addrspace(4)* @_ZZ3foovE2lj to i32*)
   lj++;
 
   __shared__ int lk;
-  // CHECK: load i32 addrspace(3)* @_ZZ3foovE2lk
+  // CHECK: load i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*)
   lk++;
 }
 


More information about the cfe-commits mailing list