[clang] 83c431f - [amdgpu] Add amdgpu_kernel calling conv attribute to clang

Jon Chesterfield via cfe-commits cfe-commits at lists.llvm.org
Fri May 20 00:51:04 PDT 2022


Author: Jon Chesterfield
Date: 2022-05-20T08:50:37+01:00
New Revision: 83c431fb9e72abbd2eddf26388245eb4963370e2

URL: https://github.com/llvm/llvm-project/commit/83c431fb9e72abbd2eddf26388245eb4963370e2
DIFF: https://github.com/llvm/llvm-project/commit/83c431fb9e72abbd2eddf26388245eb4963370e2.diff

LOG: [amdgpu] Add amdgpu_kernel calling conv attribute to clang

Allows emitting define amdgpu_kernel void @func() IR from C or C++.

This replaces the current workflow which is to write a stub in opencl that
calls an external C function implemented in C++ combined through llvm-link.

Calling the resulting function still requires a manual implementation of the
ABI from the host side. The primary application is for more rapid debugging
of the amdgpu backend by permuting a C or C++ test file instead of manually
updating an IR file.

Implementation closely follows D54425. Non-amd reviewers from there.

Reviewed By: yaxunl

Differential Revision: https://reviews.llvm.org/D125970

Added: 
    clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp

Modified: 
    clang/include/clang/Basic/Attr.td
    clang/include/clang/Basic/Specifiers.h
    clang/lib/AST/ItaniumMangle.cpp
    clang/lib/AST/Type.cpp
    clang/lib/AST/TypePrinter.cpp
    clang/lib/Basic/Targets/AMDGPU.h
    clang/lib/CodeGen/CGCall.cpp
    clang/lib/CodeGen/CGDebugInfo.cpp
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/lib/Sema/SemaType.cpp
    clang/test/Sema/callingconv.c
    clang/tools/libclang/CXType.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 0c95adfa237d7..fed29b03a8b14 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1857,6 +1857,11 @@ def AMDGPUNumVGPR : InheritableAttr {
   let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
+def AMDGPUKernelCall : DeclOrTypeAttr {
+  let Spellings = [Clang<"amdgpu_kernel">];
+  let Documentation = [Undocumented];
+}
+
 def BPFPreserveAccessIndex : InheritableAttr,
                              TargetSpecificAttr<TargetBPF>  {
   let Spellings = [Clang<"preserve_access_index">];

diff  --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 7a727e7088deb..7657ae36d21bb 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -281,6 +281,7 @@ namespace clang {
     CC_PreserveAll,  // __attribute__((preserve_all))
     CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs))
     CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs))
+    CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel))
   };
 
   /// Checks whether the given calling convention supports variadic

diff  --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 1be70487c1b4e..b380e02fc8f7d 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3150,6 +3150,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
   case CC_AAPCS_VFP:
   case CC_AArch64VectorCall:
   case CC_AArch64SVEPCS:
+  case CC_AMDGPUKernelCall:
   case CC_IntelOclBicc:
   case CC_SpirFunction:
   case CC_OpenCLKernel:

diff  --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 200a129437ed5..ece4165c51f53 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -3186,6 +3186,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
   case CC_AAPCS_VFP: return "aapcs-vfp";
   case CC_AArch64VectorCall: return "aarch64_vector_pcs";
   case CC_AArch64SVEPCS: return "aarch64_sve_pcs";
+  case CC_AMDGPUKernelCall: return "amdgpu_kernel";
   case CC_IntelOclBicc: return "intel_ocl_bicc";
   case CC_SpirFunction: return "spir_function";
   case CC_OpenCLKernel: return "opencl_kernel";
@@ -3622,6 +3623,7 @@ bool AttributedType::isCallingConv() const {
   case attr::VectorCall:
   case attr::AArch64VectorPcs:
   case attr::AArch64SVEPcs:
+  case attr::AMDGPUKernelCall:
   case attr::Pascal:
   case attr::MSABI:
   case attr::SysVABI:

diff  --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 7bca45b5f5601..b5286de5b1cab 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -964,6 +964,9 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
     case CC_AArch64SVEPCS:
       OS << "__attribute__((aarch64_sve_pcs))";
       break;
+    case CC_AMDGPUKernelCall:
+      OS << "__attribute__((amdgpu_kernel))";
+      break;
     case CC_IntelOclBicc:
       OS << " __attribute__((intel_ocl_bicc))";
       break;
@@ -1754,6 +1757,7 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   }
   case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
   case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
+  case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break;
   case attr::IntelOclBicc: OS << "inteloclbicc"; break;
   case attr::PreserveMost:
     OS << "preserve_most";

diff  --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 9749221914888..4e3db7a181f58 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -411,6 +411,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
       return CCCR_Warning;
     case CC_C:
     case CC_OpenCLKernel:
+    case CC_AMDGPUKernelCall:
       return CCCR_OK;
     }
   }

diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a35e3d811862d..0d838b9aeb529 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -63,6 +63,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
   case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
   case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall;
   case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall;
+  case CC_AMDGPUKernelCall: return llvm::CallingConv::AMDGPU_KERNEL;
   case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
   case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
   case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
@@ -232,6 +233,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
   if (D->hasAttr<AArch64SVEPcsAttr>())
     return CC_AArch64SVEPCS;
 
+  if (D->hasAttr<AMDGPUKernelCallAttr>())
+    return CC_AMDGPUKernelCall;
+
   if (D->hasAttr<IntelOclBiccAttr>())
     return CC_IntelOclBicc;
 

diff  --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 7534270294415..1f528fe4065c2 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1335,6 +1335,7 @@ static unsigned getDwarfCC(CallingConv CC) {
   case CC_SpirFunction:
     return llvm::dwarf::DW_CC_LLVM_SpirFunction;
   case CC_OpenCLKernel:
+  case CC_AMDGPUKernelCall:
     return llvm::dwarf::DW_CC_LLVM_OpenCLKernel;
   case CC_Swift:
     return llvm::dwarf::DW_CC_LLVM_Swift;

diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 1b055543dc485..f39e1af10e3d7 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5011,6 +5011,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   case ParsedAttr::AT_AArch64SVEPcs:
     D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL));
     return;
+  case ParsedAttr::AT_AMDGPUKernelCall:
+    D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL));
+    return;
   case ParsedAttr::AT_IntelOclBicc:
     D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL));
     return;
@@ -5171,6 +5174,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
   case ParsedAttr::AT_AArch64SVEPcs:
     CC = CC_AArch64SVEPCS;
     break;
+  case ParsedAttr::AT_AMDGPUKernelCall:
+    CC = CC_AMDGPUKernelCall;
+    break;
   case ParsedAttr::AT_RegCall:
     CC = CC_X86RegCall;
     break;
@@ -8784,6 +8790,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case ParsedAttr::AT_PreserveAll:
   case ParsedAttr::AT_AArch64VectorPcs:
   case ParsedAttr::AT_AArch64SVEPcs:
+  case ParsedAttr::AT_AMDGPUKernelCall:
     handleCallConvAttr(S, D, AL);
     break;
   case ParsedAttr::AT_Suppress:

diff  --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 314110c926828..5c4d3b9c2a293 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -122,6 +122,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
   case ParsedAttr::AT_VectorCall:                                              \
   case ParsedAttr::AT_AArch64VectorPcs:                                        \
   case ParsedAttr::AT_AArch64SVEPcs:                                           \
+  case ParsedAttr::AT_AMDGPUKernelCall:                                        \
   case ParsedAttr::AT_MSABI:                                                   \
   case ParsedAttr::AT_SysVABI:                                                 \
   case ParsedAttr::AT_Pcs:                                                     \
@@ -7482,6 +7483,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
     return createSimpleAttr<AArch64VectorPcsAttr>(Ctx, Attr);
   case ParsedAttr::AT_AArch64SVEPcs:
     return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
+  case ParsedAttr::AT_AMDGPUKernelCall:
+    return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr);
   case ParsedAttr::AT_Pcs: {
     // The attribute may have had a fixit applied where we treated an
     // identifier as a string literal.  The contents of the string are valid,

diff  --git a/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp b/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp
new file mode 100644
index 0000000000000..90a9366ac9a20
--- /dev/null
+++ b/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp
@@ -0,0 +1,83 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
+
+// Derived from CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu by deleting references to HOST
+// The original test passes the result through opt O2, but that seems to introduce invalid
+// addrspace casts which are not being fixed as part of the present change.
+
+// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32* {{.*}} %x)
+// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+__attribute__((amdgpu_kernel)) void kernel1(int *x) {
+  x[0]++;
+}
+
+// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32* {{.*}} nonnull align 4 dereferenceable(4) %x)
+// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+__attribute__((amdgpu_kernel)) void kernel2(int &x) {
+  x++;
+}
+
+// CHECK-LABEL: define{{.*}} amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y)
+// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+__attribute__((amdgpu_kernel)) void kernel3(__attribute__((address_space(2))) int *x,
+                                            __attribute__((address_space(1))) int *y) {
+  y[0] = x[0];
+}
+
+// COMMON-LABEL: define{{.*}} void @_Z4funcPi(i32*{{.*}} %x)
+// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+__attribute__((amdgpu_kernel)) void func(int *x) {
+  x[0]++;
+}
+
+struct S {
+  int *x;
+  float *y;
+};
+// `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect
+// by-val). However, the enhanced address inferring pass should be able to
+// assume they are global pointers.
+//
+
+// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0)
+__attribute__((amdgpu_kernel)) void kernel4(struct S s) {
+  s.x[0]++;
+  s.y[0] += 1.f;
+}
+
+// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(%struct.S* {{.*}} %s)
+__attribute__((amdgpu_kernel)) void kernel5(struct S *s) {
+  s->x[0]++;
+  s->y[0] += 1.f;
+}
+
+struct T {
+  float *x[2];
+};
+// `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect
+// by-val). However, the enhanced address inferring pass should be able to
+// assume they are global pointers.
+//
+// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0)
+__attribute__((amdgpu_kernel)) void kernel6(struct T t) {
+  t.x[0][0] += 1.f;
+  t.x[1][0] += 2.f;
+}
+
+// Check that coerced pointers retain the noalias attribute when qualified with __restrict.
+// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(i32* noalias{{.*}} %x)
+__attribute__((amdgpu_kernel)) void kernel7(int *__restrict x) {
+  x[0]++;
+}
+
+// Single element struct.
+struct SS {
+  float *x;
+};
+// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float* %a.coerce)
+// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+__attribute__((amdgpu_kernel)) void kernel8(struct SS a) {
+  *a.x += 3.f;
+}
+

diff  --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c
index 675200cc9cfc7..1555a993218f7 100644
--- a/clang/test/Sema/callingconv.c
+++ b/clang/test/Sema/callingconv.c
@@ -54,6 +54,8 @@ int __attribute__((pcs("foo"))) pcs7(void); // expected-error {{invalid PCS type
 int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aarch64_vector_pcs' calling convention is not supported for this target}}
 int __attribute__((aarch64_sve_pcs)) aasvepcs(void);  // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}}
 
+int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}}
+
 // PR6361
 void ctest3();
 void __attribute__((cdecl)) ctest3() {}

diff  --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp
index 4aee32210df1f..cac5d3d4fa90d 100644
--- a/clang/tools/libclang/CXType.cpp
+++ b/clang/tools/libclang/CXType.cpp
@@ -677,6 +677,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) {
       TCALLINGCONV(PreserveMost);
       TCALLINGCONV(PreserveAll);
     case CC_SpirFunction: return CXCallingConv_Unexposed;
+    case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed;
     case CC_OpenCLKernel: return CXCallingConv_Unexposed;
       break;
     }


        


More information about the cfe-commits mailing list