[clang] 15140e4 - [hip] Enable pointer argument lowering through coercing type.

Michael Liao via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 5 10:05:40 PST 2019


Author: Michael Liao
Date: 2019-11-05T13:05:05-05:00
New Revision: 15140e4bacf94fbc509e5a139909aefcd1cc3363

URL: https://github.com/llvm/llvm-project/commit/15140e4bacf94fbc509e5a139909aefcd1cc3363
DIFF: https://github.com/llvm/llvm-project/commit/15140e4bacf94fbc509e5a139909aefcd1cc3363.diff

LOG: [hip] Enable pointer argument lowering through coercing type.

Reviewers: tra, rjmccall, yaxunl

Subscribers: jvesely, nhaehnle, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D69826

Added: 
    clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu

Modified: 
    clang/lib/CodeGen/CGCall.cpp
    clang/lib/CodeGen/TargetInfo.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 62e8fa037013..e832e4c28334 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1305,6 +1305,15 @@ static void CreateCoercedStore(llvm::Value *Src,
     DstTy = Dst.getType()->getElementType();
   }
 
+  llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
+  llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
+  if (SrcPtrTy && DstPtrTy &&
+      SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
+    Src = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DstTy);
+    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
+    return;
+  }
+
   // If the source and destination are integer or pointer types, just do an
   // extension or truncation to the desired type.
   if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&

diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index e33d69c86b3c..26c527d7c983 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -7685,6 +7685,42 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
   bool isHomogeneousAggregateSmallEnough(const Type *Base,
                                          uint64_t Members) const override;
 
+  // Coerce HIP pointer arguments from generic pointers to global ones.
+  llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
+                                       unsigned ToAS) const {
+    // Structure types.
+    if (auto STy = dyn_cast<llvm::StructType>(Ty)) {
+      SmallVector<llvm::Type *, 8> EltTys;
+      bool Changed = false;
+      for (auto T : STy->elements()) {
+        auto NT = coerceKernelArgumentType(T, FromAS, ToAS);
+        EltTys.push_back(NT);
+        Changed |= (NT != T);
+      }
+      // Skip if there is no change in element types.
+      if (!Changed)
+        return STy;
+      if (STy->hasName())
+        return llvm::StructType::create(
+            EltTys, (STy->getName() + ".coerce").str(), STy->isPacked());
+      return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked());
+    }
+    // Arrary types.
+    if (auto ATy = dyn_cast<llvm::ArrayType>(Ty)) {
+      auto T = ATy->getElementType();
+      auto NT = coerceKernelArgumentType(T, FromAS, ToAS);
+      // Skip if there is no change in that element type.
+      if (NT == T)
+        return ATy;
+      return llvm::ArrayType::get(NT, ATy->getNumElements());
+    }
+    // Single value types.
+    if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS)
+      return llvm::PointerType::get(
+          cast<llvm::PointerType>(Ty)->getElementType(), ToAS);
+    return Ty;
+  }
+
 public:
   explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
     DefaultABIInfo(CGT) {}
@@ -7812,14 +7848,22 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
 
   // TODO: Can we omit empty structs?
 
-  // Coerce single element structs to its element.
+  llvm::Type *LTy = nullptr;
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
-    return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+    LTy = CGT.ConvertType(QualType(SeltTy, 0));
+
+  if (getContext().getLangOpts().HIP) {
+    if (!LTy)
+      LTy = CGT.ConvertType(Ty);
+    LTy = coerceKernelArgumentType(
+        LTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
+        /*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
+  }
 
   // If we set CanBeFlattened to true, CodeGen will expand the struct to its
   // individual elements, which confuses the Clover OpenCL backend; therefore we
   // have to set it to false here. Other args of getDirect() are just defaults.
-  return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
+  return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
 ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,

diff  --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
new file mode 100644
index 000000000000..cb8a75882d4d
--- /dev/null
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
+
+#include "Inputs/cuda.h"
+
+// Coerced struct from `struct S` without all generic pointers lowered into
+// global ones.
+// CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
+// CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] }
+
+// On the host-side compilation, generic pointer won't be coerced.
+// HOST-NOT: %struct.S.coerce
+// HOST-NOT: %struct.T.coerce
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
+// HOST: define void @_Z7kernel1Pi.stub(i32* %x)
+__global__ void kernel1(int *x) {
+  x[0]++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
+// HOST: define void @_Z7kernel2Ri.stub(i32* dereferenceable(4) %x)
+__global__ void kernel2(int &x) {
+  x++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+// HOST: define void @_Z7kernel3PU3AS2iPU3AS1i.stub(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+                        __attribute__((address_space(1))) int *y) {
+  y[0] = x[0];
+}
+
+// CHECK: define void @_Z4funcPi(i32* %x)
+__device__ void func(int *x) {
+  x[0]++;
+}
+
+struct S {
+  int *x;
+  float *y;
+};
+// `by-val` struct will be coerced into a similar struct with all generic
+// pointers lowerd into global ones.
+// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
+// HOST: define void @_Z7kernel41S.stub(i32* %s.coerce0, float* %s.coerce1)
+__global__ void kernel4(struct S s) {
+  s.x[0]++;
+  s.y[0] += 1.f;
+}
+
+// If a pointer to struct is passed, only the pointer itself is coerced into the global one.
+// CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce)
+// HOST: define void @_Z7kernel5P1S.stub(%struct.S* %s)
+__global__ void kernel5(struct S *s) {
+  s->x[0]++;
+  s->y[0] += 1.f;
+}
+
+struct T {
+  float *x[2];
+};
+// `by-val` array is also coerced.
+// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
+// HOST: define void @_Z7kernel61T.stub(float* %t.coerce0, float* %t.coerce1)
+__global__ void kernel6(struct T t) {
+  t.x[0][0] += 1.f;
+  t.x[1][0] += 2.f;
+}


        


More information about the cfe-commits mailing list