[PATCH] D69818: [HIP] Fix pointer type kernel arg for amdgpu
Yaxun Liu via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 4 12:50:25 PST 2019
yaxunl created this revision.
yaxunl added reviewers: tra, rjmccall.
Herald added subscribers: t-tye, tpr, dstuttard, nhaehnle, wdng, jvesely, kzhuravl.
amdgpu target prefers pointer type kernel arg in default address space
to be coerced to device address space for better performance.
This patch fixes that.
https://reviews.llvm.org/D69818
Files:
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -x hip %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+// CHECK: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
+__global__ void kernel1(int *x) {
+ x[0]++;
+}
+
+// CHECK: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
+__global__ void kernel2(int &x) {
+ x++;
+}
+
+// CHECK: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+ __attribute__((address_space(1))) int *y) {
+ y[0] = x[0];
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7816,6 +7816,27 @@
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+ // Coerce pointer type kernel arguments in default address space to
+ // device address space for HIP.
+ QualType PointeeTy;
+ if (getContext().getLangOpts().HIP) {
+ if (auto *PT = Ty->getAs<PointerType>()) {
+ if (PT->getPointeeType().getAddressSpace() == LangAS::Default) {
+ PointeeTy = PT->getPointeeType();
+ }
+ } else if (auto *RT = Ty->getAs<ReferenceType>()) {
+ if (RT->getPointeeType().getAddressSpace() == LangAS::Default) {
+ PointeeTy = RT->getPointeeType();
+ }
+ }
+
+ if (PointeeTy != QualType()) {
+ return ABIArgInfo::getDirect(
+ CGT.ConvertType(PointeeTy)
+ ->getPointerTo(
+ getContext().getTargetAddressSpace(LangAS::cuda_device)));
+ }
+ }
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
// have to set it to false here. Other args of getDirect() are just defaults.
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1169,7 +1169,7 @@
if (isa<llvm::PointerType>(Val->getType())) {
// If this is Pointer->Pointer avoid conversion to and from int.
if (isa<llvm::PointerType>(Ty))
- return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val");
+ return CGF.Builder.CreatePointerCast(Val, Ty, "coerce.val");
// Convert the pointer to an integer so we can play with its width.
Val = CGF.Builder.CreatePtrToInt(Val, CGF.IntPtrTy, "coerce.val.pi");
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D69818.227761.patch
Type: text/x-patch
Size: 2891 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20191104/8a972b10/attachment-0001.bin>
More information about the cfe-commits
mailing list