PATCH: fix clang to emit correct addrspacecast for CUDA
Jingyue Wu
jingyue at google.com
Fri Mar 21 11:20:55 PDT 2014
Hi,
Static local variables in CUDA can be declared with address space
qualifiers, such as __shared__. Therefore, the codegen needs to potentially
addrspacecast a static local variable to the type expected by its
declaration. Peter did something similar for global variables in r157167.
All clang tests passed.
Justin: The NVPTX backend support for addrspacecast seems not complete. We
can send you follow-up patches once this one gets in.
Jingyue
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20140321/fc737cb2/attachment.html>
-------------- next part --------------
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index e16845c..430b56c 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -183,7 +183,7 @@ static std::string GetStaticDeclName(CodeGenFunction &CGF, const VarDecl &D,
return ContextName + Separator + D.getNameAsString();
}
-llvm::GlobalVariable *
+llvm::Constant *
CodeGenFunction::CreateStaticVarDecl(const VarDecl &D,
const char *Separator,
llvm::GlobalValue::LinkageTypes Linkage) {
@@ -212,6 +212,13 @@ CodeGenFunction::CreateStaticVarDecl(const VarDecl &D,
if (D.getTLSKind())
CGM.setTLSMode(GV, D);
+ // Make sure the result is of the correct type.
+ unsigned ExpectedAddrSpace = CGM.getContext().getTargetAddressSpace(Ty);
+ if (AddrSpace != ExpectedAddrSpace) {
+ llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace);
+ return llvm::ConstantExpr::getAddrSpaceCast(GV, PTy);
+ }
+
return GV;
}
@@ -298,12 +305,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
llvm::Constant *addr =
CGM.getStaticLocalDeclAddress(&D);
- llvm::GlobalVariable *var;
- if (addr) {
- var = cast<llvm::GlobalVariable>(addr->stripPointerCasts());
- } else {
- addr = var = CreateStaticVarDecl(D, ".", Linkage);
- }
+ if (!addr)
+ addr = CreateStaticVarDecl(D, ".", Linkage);
// Store into LocalDeclMap before generating initializer to handle
// circular references.
@@ -319,6 +322,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
// Save the type in case adding the initializer forces a type change.
llvm::Type *expectedType = addr->getType();
+ llvm::GlobalVariable *var =
+ cast<llvm::GlobalVariable>(addr->stripPointerCasts());
// If this value has an initializer, emit it.
if (D.getInit())
var = AddInitializerToStaticVarDecl(D, var);
@@ -339,7 +344,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
//
// FIXME: It is really dangerous to store this in the map; if anyone
// RAUW's the GV uses of this constant will be invalid.
- llvm::Constant *castedAddr = llvm::ConstantExpr::getBitCast(var, expectedType);
+ llvm::Constant *castedAddr =
+ llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(var, expectedType);
DMEntry = castedAddr;
CGM.setStaticLocalDeclAddress(&D, castedAddr);
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index bbc4fae..c583a30 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -2336,9 +2336,9 @@ public:
/// CreateStaticVarDecl - Create a zero-initialized LLVM global for
/// a static local variable.
- llvm::GlobalVariable *CreateStaticVarDecl(const VarDecl &D,
- const char *Separator,
- llvm::GlobalValue::LinkageTypes Linkage);
+ llvm::Constant *CreateStaticVarDecl(const VarDecl &D,
+ const char *Separator,
+ llvm::GlobalValue::LinkageTypes Linkage);
/// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the
/// global variable that has already been created for it. If the initializer
diff --git a/test/CodeGenCUDA/address-space-conversion.cu b/test/CodeGenCUDA/address-space-conversion.cu
new file mode 100644
index 0000000..9b9ae43
--- /dev/null
+++ b/test/CodeGenCUDA/address-space-conversion.cu
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck %s
+
+// Verifies LLVM generates correct addrspacecast instructions for CUDA code.
+
+#include "../SemaCUDA/cuda.h"
+
+struct MyStruct {
+ int data1;
+ int data2;
+};
+
+// CHECK: @_ZZ5func0vE1a = internal addrspace(3) global %struct.MyStruct zeroinitializer
+// CHECK: @_ZZ5func1vE1a = internal addrspace(3) global float 0.000000e+00
+// CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
+// CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
+// CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float 0.000000e+00
+
+__device__ void func0() {
+ __shared__ MyStruct a;
+ MyStruct *ap = &a; // composite type
+ ap->data1 = 1;
+ ap->data2 = 2;
+}
+// CHECK: define void @_Z5func0v()
+// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap
+
+__device__ void callee(float *ap) {
+ *ap = 1.0f;
+}
+
+__device__ void func1() {
+ __shared__ float a;
+ callee(&a); // implicit cast from parameters
+}
+// CHECK: define void @_Z5func1v()
+// CHECK: call void @_Z6calleePf(float* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*))
+
+__device__ void func2() {
+ __shared__ float a[256];
+ float *ap = &a[128]; // implicit cast from a decayed array
+ *ap = 1.0f;
+}
+// CHECK: define void @_Z5func2v()
+// CHECK: store float* getelementptr inbounds ([256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i64 128), float** %ap
+
+__device__ void func3() {
+ __shared__ float a;
+ float *ap = reinterpret_cast<float *>(&a); // explicit cast
+ *ap = 1.0f;
+}
+// CHECK: define void @_Z5func3v()
+// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap
+
+__device__ void func4() {
+ __shared__ float a;
+ float *ap = (float *)&a; // explicit c-style cast
+ *ap = 1.0f;
+}
+// CHECK: define void @_Z5func4v()
+// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap
+
+__shared__ float b;
+
+__device__ float *func5() {
+ return &b; // implicit cast from a return value
+}
+// CHECK: define float* @_Z5func5v()
+// CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*)
diff --git a/test/CodeGenCUDA/address-spaces.cu b/test/CodeGenCUDA/address-spaces.cu
index 0434452..a560965 100644
--- a/test/CodeGenCUDA/address-spaces.cu
+++ b/test/CodeGenCUDA/address-spaces.cu
@@ -22,15 +22,15 @@ __device__ void foo() {
k++;
static int li;
- // CHECK: load i32 addrspace(1)* @_ZZ3foovE2li
+ // CHECK: load i32* addrspacecast (i32 addrspace(1)* @_ZZ3foovE2li to i32*)
li++;
__constant__ int lj;
- // CHECK: load i32 addrspace(4)* @_ZZ3foovE2lj
+ // CHECK: load i32* addrspacecast (i32 addrspace(4)* @_ZZ3foovE2lj to i32*)
lj++;
__shared__ int lk;
- // CHECK: load i32 addrspace(3)* @_ZZ3foovE2lk
+ // CHECK: load i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*)
lk++;
}
More information about the cfe-commits
mailing list