[clang] [llvm] [clang] Add spir_kernel attribute (PR #137882)

Thu May 8 12:54:21 PDT 2025

https://github.com/sarnex updated https://github.com/llvm/llvm-project/pull/137882

>From 080a9d43ba6544d46c2b36c5dc6a5af421264580 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <nick.sarnie at intel.com>
Date: Wed, 7 May 2025 12:17:30 -0700
Subject: [PATCH 1/2] [clang] Simplify device kernel attributes

Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>
---
 clang/include/clang/AST/GlobalDecl.h          |  2 +-
 clang/include/clang/Basic/Attr.td             | 66 +++++++++++--------
 clang/include/clang/Basic/Specifiers.h        |  5 +-
 clang/lib/AST/Decl.cpp                        |  4 +-
 clang/lib/AST/ItaniumMangle.cpp               |  6 +-
 clang/lib/AST/MicrosoftMangle.cpp             |  3 +-
 clang/lib/AST/Type.cpp                        |  8 +--
 clang/lib/AST/TypePrinter.cpp                 |  9 +--
 clang/lib/Basic/Targets/AArch64.cpp           |  4 +-
 clang/lib/Basic/Targets/AMDGPU.h              |  3 +-
 clang/lib/Basic/Targets/ARM.cpp               |  4 +-
 clang/lib/Basic/Targets/BPF.h                 |  2 +-
 clang/lib/Basic/Targets/Mips.cpp              |  2 +-
 clang/lib/Basic/Targets/SPIR.h                |  2 +-
 clang/lib/Basic/Targets/SystemZ.h             |  2 +-
 clang/lib/Basic/Targets/X86.h                 | 23 +++++--
 clang/lib/CodeGen/CGCall.cpp                  | 37 +++++++----
 clang/lib/CodeGen/CGDebugInfo.cpp             |  5 +-
 clang/lib/CodeGen/CGExpr.cpp                  |  4 +-
 clang/lib/CodeGen/CodeGenFunction.cpp         |  4 +-
 clang/lib/CodeGen/CodeGenModule.cpp           |  9 +--
 clang/lib/CodeGen/TargetInfo.cpp              |  2 +-
 clang/lib/CodeGen/Targets/AMDGPU.cpp          |  6 +-
 clang/lib/CodeGen/Targets/NVPTX.cpp           |  4 +-
 clang/lib/CodeGen/Targets/SPIR.cpp            |  2 +-
 clang/lib/CodeGen/Targets/TCE.cpp             |  2 +-
 clang/lib/Sema/SemaDecl.cpp                   | 14 ++--
 clang/lib/Sema/SemaDeclAttr.cpp               | 61 ++++++++++++-----
 clang/lib/Sema/SemaSYCL.cpp                   |  2 +-
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  8 +--
 clang/lib/Sema/SemaType.cpp                   | 27 ++++++--
 ...a-attribute-supported-attributes-list.test |  1 -
 clang/tools/libclang/CXType.cpp               |  4 +-
 llvm/include/llvm/BinaryFormat/Dwarf.def      |  2 +-
 .../llvm/DebugInfo/DWARF/DWARFTypePrinter.h   |  8 ++-
 .../preload-implicit-kernargs-debug-info.ll   |  2 +-
 36 files changed, 213 insertions(+), 136 deletions(-)

diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index baf5371d2682d..97caff0198cb0 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -164,7 +164,7 @@ class GlobalDecl {
   }
 
   static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) {
-    return (D->hasAttr<OpenCLKernelAttr>() || D->getLangOpts().CUDAIsDevice)
+    return (D->hasAttr<DeviceKernelAttr>() || D->getLangOpts().CUDAIsDevice)
                ? KernelReferenceKind::Kernel
                : KernelReferenceKind::Stub;
   }
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index df7bba094fce6..1ce7f0c1835ac 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -190,8 +190,9 @@ def FunctionPointer : SubsetSubject<DeclBase,
                                     "functions pointers">;
 
 def OpenCLKernelFunction
-    : SubsetSubject<Function, [{S->hasAttr<OpenCLKernelAttr>()}],
-                    "kernel functions">;
+    : SubsetSubject<Function, [{S->getASTContext().getLangOpts().OpenCL &&
+                               S->hasAttr<DeviceKernelAttr>()}],
+                               "kernel functions">;
 
 // HasFunctionProto is a more strict version of FunctionLike, so it should
 // never be specified in a Subjects list along with FunctionLike (due to the
@@ -1498,12 +1499,6 @@ def CUDAGridConstant : InheritableAttr {
   let Documentation = [CUDAGridConstantAttrDocs];
 }
 
-def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
-  let Spellings = [Clang<"nvptx_kernel">];
-  let Subjects = SubjectList<[Function]>;
-  let Documentation = [Undocumented];
-}
-
 def HIPManaged : InheritableAttr {
   let Spellings = [GNU<"managed">, Declspec<"__managed__">];
   let Subjects = SubjectList<[Var]>;
@@ -1538,11 +1533,44 @@ def CUDAShared : InheritableAttr {
 }
 def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;
 
-def SYCLKernel : InheritableAttr {
-  let Spellings = [Clang<"sycl_kernel">];
-  let Subjects = SubjectList<[FunctionTmpl]>;
-  let LangOpts = [SYCLDevice];
+def DeviceKernel : DeclOrTypeAttr {
+  let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">,
+                   Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
+                   CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
+  let LangOpts = [];
   let Documentation = [SYCLKernelDocs];
+  let AdditionalMembers =
+  [{
+   inline bool isAMDGPUSpelling() const {
+     return isAMDGPUSpelling(*this);
+   }
+   template<typename T>
+   static inline bool isAMDGPUSpelling(const T& Attr) {
+     return Attr.getAttrName()->getName() == "amdgpu_kernel";
+   }
+   inline bool isNVPTXSpelling() const {
+     return isNVPTXSpelling(*this);
+   }
+   template<typename T>
+   static inline bool isNVPTXSpelling(const T& Attr) {
+     return Attr.getAttrName()->getName() == "nvptx_kernel";
+   }
+   inline bool isOpenCLSpelling() const {
+     return isOpenCLSpelling(*this);
+   }
+   template<typename T>
+   static inline bool isOpenCLSpelling(const T& Attr) {
+   return Attr.getAttrName()->getName() == "kernel" ||
+          Attr.getAttrName()->getName() == "__kernel";
+   }
+   inline bool isSYCLSpelling() const {
+     return isSYCLSpelling(*this);
+   }
+   template<typename T>
+   static inline bool isSYCLSpelling(const T& Attr) {
+     return Attr.getAttrName()->getName() == "sycl_kernel";
+   }
+}];
 }
 
 def SYCLKernelEntryPoint : InheritableAttr {
@@ -1608,15 +1636,6 @@ def Allocating : TypeAttr {
   let Documentation = [AllocatingDocs];
 }
 
-// Similar to CUDA, OpenCL attributes do not receive a [[]] spelling because
-// the specification does not expose them with one currently.
-def OpenCLKernel : InheritableAttr {
-  let Spellings = [CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
-  let Subjects = SubjectList<[Function], ErrorDiag>;
-  let Documentation = [Undocumented];
-  let SimpleHandler = 1;
-}
-
 def OpenCLUnrollHint : StmtAttr {
   let Spellings = [GNU<"opencl_unroll_hint">];
   let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
@@ -2351,11 +2370,6 @@ def AMDGPUMaxNumWorkGroups : InheritableAttr {
   let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
 }
 
-def AMDGPUKernelCall : DeclOrTypeAttr {
-  let Spellings = [Clang<"amdgpu_kernel">];
-  let Documentation = [Undocumented];
-}
-
 def BPFPreserveAccessIndex : InheritableAttr,
                              TargetSpecificAttr<TargetBPF>  {
   let Spellings = [Clang<"preserve_access_index">];
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 491badcc804e7..698fd9da5ced1 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -289,14 +289,13 @@ namespace clang {
     CC_AAPCS_VFP,          // __attribute__((pcs("aapcs-vfp")))
     CC_IntelOclBicc,       // __attribute__((intel_ocl_bicc))
     CC_SpirFunction,       // default for OpenCL functions on SPIR target
-    CC_OpenCLKernel,       // inferred for OpenCL kernels
+    CC_DeviceKernel,       // __attribute__((device_kernel))
     CC_Swift,              // __attribute__((swiftcall))
     CC_SwiftAsync,         // __attribute__((swiftasynccall))
     CC_PreserveMost,       // __attribute__((preserve_most))
     CC_PreserveAll,        // __attribute__((preserve_all))
     CC_AArch64VectorCall,  // __attribute__((aarch64_vector_pcs))
     CC_AArch64SVEPCS,      // __attribute__((aarch64_sve_pcs))
-    CC_AMDGPUKernelCall,   // __attribute__((amdgpu_kernel))
     CC_M68kRTD,            // __attribute__((m68k_rtd))
     CC_PreserveNone,       // __attribute__((preserve_none))
     CC_RISCVVectorCall,    // __attribute__((riscv_vector_cc))
@@ -326,7 +325,7 @@ namespace clang {
     case CC_X86Pascal:
     case CC_X86VectorCall:
     case CC_SpirFunction:
-    case CC_OpenCLKernel:
+    case CC_DeviceKernel:
     case CC_Swift:
     case CC_SwiftAsync:
     case CC_M68kRTD:
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 07b4d77bd2ab7..c07e1c4d14ddf 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3541,7 +3541,7 @@ bool FunctionDecl::isExternC() const {
 }
 
 bool FunctionDecl::isInExternCContext() const {
-  if (hasAttr<OpenCLKernelAttr>())
+  if (hasAttr<DeviceKernelAttr>() && getASTContext().getLangOpts().OpenCL)
     return true;
   return getLexicalDeclContext()->isExternCContext();
 }
@@ -5512,7 +5512,7 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) {
 }
 
 bool FunctionDecl::isReferenceableKernel() const {
-  return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>();
+  return hasAttr<CUDAGlobalAttr>() || hasAttr<DeviceKernelAttr>();
 }
 
 BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) {
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 33a8728728574..9e9d5d946e788 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -1556,7 +1556,8 @@ void CXXNameMangler::mangleUnqualifiedName(
           FD && FD->hasAttr<CUDAGlobalAttr>() &&
           GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
       bool IsOCLDeviceStub =
-          FD && FD->hasAttr<OpenCLKernelAttr>() &&
+          getASTContext().getLangOpts().OpenCL && FD &&
+          FD->hasAttr<DeviceKernelAttr>() &&
           GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
       if (IsDeviceStub)
         mangleDeviceStubName(II);
@@ -3529,10 +3530,9 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
   case CC_AAPCS_VFP:
   case CC_AArch64VectorCall:
   case CC_AArch64SVEPCS:
-  case CC_AMDGPUKernelCall:
   case CC_IntelOclBicc:
   case CC_SpirFunction:
-  case CC_OpenCLKernel:
+  case CC_DeviceKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
   case CC_M68kRTD:
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index add737b762ccc..36dd3ecd17626 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1164,7 +1164,8 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
                   ->hasAttr<CUDAGlobalAttr>())) &&
             GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
         bool IsOCLDeviceStub =
-            ND && isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>() &&
+            getASTContext().getLangOpts().OpenCL && ND &&
+            isa<FunctionDecl>(ND) && ND->hasAttr<DeviceKernelAttr>() &&
             GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
         if (IsDeviceStub)
           mangleSourceName(
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 392a95d042353..61b4febf6159c 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -3594,14 +3594,12 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
     return "aarch64_vector_pcs";
   case CC_AArch64SVEPCS:
     return "aarch64_sve_pcs";
-  case CC_AMDGPUKernelCall:
-    return "amdgpu_kernel";
   case CC_IntelOclBicc:
     return "intel_ocl_bicc";
   case CC_SpirFunction:
     return "spir_function";
-  case CC_OpenCLKernel:
-    return "opencl_kernel";
+  case CC_DeviceKernel:
+    return "device_kernel";
   case CC_Swift:
     return "swiftcall";
   case CC_SwiftAsync:
@@ -4302,7 +4300,7 @@ bool AttributedType::isCallingConv() const {
   case attr::VectorCall:
   case attr::AArch64VectorPcs:
   case attr::AArch64SVEPcs:
-  case attr::AMDGPUKernelCall:
+  case attr::DeviceKernel:
   case attr::Pascal:
   case attr::MSABI:
   case attr::SysVABI:
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index cba1a2d98d660..1c92abba73905 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1096,8 +1096,8 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
     case CC_AArch64SVEPCS:
       OS << "__attribute__((aarch64_sve_pcs))";
       break;
-    case CC_AMDGPUKernelCall:
-      OS << "__attribute__((amdgpu_kernel))";
+    case CC_DeviceKernel:
+      OS << "__attribute__((device_kernel))";
       break;
     case CC_IntelOclBicc:
       OS << " __attribute__((intel_ocl_bicc))";
@@ -1112,7 +1112,6 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
       OS << " __attribute__((regcall))";
       break;
     case CC_SpirFunction:
-    case CC_OpenCLKernel:
       // Do nothing. These CCs are not available as attributes.
       break;
     case CC_Swift:
@@ -2065,7 +2064,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   }
   case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
   case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
-  case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break;
+  case attr::DeviceKernel:
+    OS << T->getAttr()->getSpelling();
+    break;
   case attr::IntelOclBicc: OS << "inteloclbicc"; break;
   case attr::PreserveMost:
     OS << "preserve_most";
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 3633bab6e0df9..ad3c64fa68049 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -1341,7 +1341,7 @@ AArch64TargetInfo::checkCallingConvention(CallingConv CC) const {
   case CC_PreserveMost:
   case CC_PreserveAll:
   case CC_PreserveNone:
-  case CC_OpenCLKernel:
+  case CC_DeviceKernel:
   case CC_AArch64VectorCall:
   case CC_AArch64SVEPCS:
   case CC_Win64:
@@ -1699,7 +1699,7 @@ WindowsARM64TargetInfo::checkCallingConvention(CallingConv CC) const {
   case CC_X86FastCall:
     return CCCR_Ignore;
   case CC_C:
-  case CC_OpenCLKernel:
+  case CC_DeviceKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
   case CC_PreserveNone:
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 8ea544ba28b10..509128f3cf070 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -415,8 +415,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
     default:
       return CCCR_Warning;
     case CC_C:
-    case CC_OpenCLKernel:
-    case CC_AMDGPUKernelCall:
+    case CC_DeviceKernel:
       return CCCR_OK;
     }
   }
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index ca2c1ffbb0eb7..acf28b7a71454 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -1405,7 +1405,7 @@ ARMTargetInfo::checkCallingConvention(CallingConv CC) const {
   case CC_AAPCS_VFP:
   case CC_Swift:
   case CC_SwiftAsync:
-  case CC_OpenCLKernel:
+  case CC_DeviceKernel:
     return CCCR_OK;
   default:
     return CCCR_Warning;
@@ -1480,7 +1480,7 @@ WindowsARMTargetInfo::checkCallingConvention(CallingConv CC) const {
   case CC_X86VectorCall:
     return CCCR_Ignore;
   case CC_C:
-  case CC_OpenCLKernel:
+  case CC_DeviceKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
   case CC_Swift:
diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h
index d1f68b842348e..d9e5cf4d8a92f 100644
--- a/clang/lib/Basic/Targets/BPF.h
+++ b/clang/lib/Basic/Targets/BPF.h
@@ -94,7 +94,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo {
     default:
       return CCCR_Warning;
     case CC_C:
-    case CC_OpenCLKernel:
+    case CC_DeviceKernel:
       return CCCR_OK;
     }
   }
diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp
index 0bf5a062d3192..971a62b6c4037 100644
--- a/clang/lib/Basic/Targets/Mips.cpp
+++ b/clang/lib/Basic/Targets/Mips.cpp
@@ -337,7 +337,7 @@ WindowsMipsTargetInfo::checkCallingConvention(CallingConv CC) const {
   case CC_X86VectorCall:
     return CCCR_Ignore;
   case CC_C:
-  case CC_OpenCLKernel:
+  case CC_DeviceKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
   case CC_Swift:
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index bf249e271a870..1521b3e9eada3 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -191,7 +191,7 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
   }
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
-    return (CC == CC_SpirFunction || CC == CC_OpenCLKernel) ? CCCR_OK
+    return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK
                                                             : CCCR_Warning;
   }
 
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index 1f69530c4757f..8a54ca4b75d7b 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -244,7 +244,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
     switch (CC) {
     case CC_C:
     case CC_Swift:
-    case CC_OpenCLKernel:
+    case CC_DeviceKernel:
       return CCCR_OK;
     case CC_SwiftAsync:
       return CCCR_Error;
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 2f6fb33a7b597..711c5acb5d8b8 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -408,10 +408,11 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
     case CC_Swift:
     case CC_X86Pascal:
     case CC_IntelOclBicc:
-    case CC_OpenCLKernel:
       return CCCR_OK;
     case CC_SwiftAsync:
       return CCCR_Error;
+    case CC_DeviceKernel:
+      return IsOpenCL ? CCCR_OK : CCCR_Warning;
     default:
       return CCCR_Warning;
     }
@@ -439,7 +440,13 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   uint64_t getPointerAlignV(LangAS AddrSpace) const override {
     return getPointerWidthV(AddrSpace);
   }
+  void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+    TargetInfo::adjust(Diags, Opts);
+    IsOpenCL = Opts.OpenCL;
+  }
 
+private:
+  bool IsOpenCL = false;
 };
 
 // X86-32 generic target
@@ -785,8 +792,9 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
     case CC_PreserveAll:
     case CC_PreserveNone:
     case CC_X86RegCall:
-    case CC_OpenCLKernel:
       return CCCR_OK;
+    case CC_DeviceKernel:
+      return IsOpenCL ? CCCR_OK : CCCR_Warning;
     default:
       return CCCR_Warning;
     }
@@ -817,7 +825,6 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
     return X86TargetInfo::validateGlobalRegisterVariable(RegName, RegSize,
                                                          HasSizeMismatch);
   }
-
   void setMaxAtomicWidth() override {
     if (hasFeature("cx16"))
       MaxAtomicInlineWidth = 128;
@@ -829,6 +836,14 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
   size_t getMaxBitIntWidth() const override {
     return llvm::IntegerType::MAX_INT_BITS;
   }
+
+  void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
+    TargetInfo::adjust(Diags, Opts);
+    IsOpenCL = Opts.OpenCL;
+  }
+
+private:
+  bool IsOpenCL = false;
 };
 
 // x86-64 UEFI target
@@ -913,7 +928,7 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_64TargetInfo
     case CC_Swift:
     case CC_SwiftAsync:
     case CC_X86RegCall:
-    case CC_OpenCLKernel:
+    case CC_DeviceKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 2f1c7699d27c3..6c64193a3777d 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -80,12 +80,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
     return llvm::CallingConv::AArch64_VectorCall;
   case CC_AArch64SVEPCS:
     return llvm::CallingConv::AArch64_SVE_VectorCall;
-  case CC_AMDGPUKernelCall:
-    return llvm::CallingConv::AMDGPU_KERNEL;
   case CC_SpirFunction:
     return llvm::CallingConv::SPIR_FUNC;
-  case CC_OpenCLKernel:
-    return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
+  case CC_DeviceKernel: {
+    if (CGM.getLangOpts().OpenCL)
+      return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
+    if (CGM.getTriple().isSPIROrSPIRV())
+      return llvm::CallingConv::SPIR_KERNEL;
+    if (CGM.getTriple().isAMDGPU())
+      return llvm::CallingConv::AMDGPU_KERNEL;
+    if (CGM.getTriple().isNVPTX())
+      return llvm::CallingConv::PTX_Kernel;
+    llvm_unreachable("Unknown kernel calling convention");
+  }
   case CC_PreserveMost:
     return llvm::CallingConv::PreserveMost;
   case CC_PreserveAll:
@@ -253,7 +260,8 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
                                    FTP);
 }
 
-static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
+static CallingConv getCallingConventionForDecl(const CodeGenModule &CGM,
+                                               const ObjCMethodDecl *D,
                                                bool IsWindows) {
   // Set the appropriate calling convention for the Function.
   if (D->hasAttr<StdCallAttr>())
@@ -283,8 +291,8 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
   if (D->hasAttr<AArch64SVEPcsAttr>())
     return CC_AArch64SVEPCS;
 
-  if (D->hasAttr<AMDGPUKernelCallAttr>())
-    return CC_AMDGPUKernelCall;
+  if (D->hasAttr<DeviceKernelAttr>())
+    return CC_DeviceKernel;
 
   if (D->hasAttr<IntelOclBiccAttr>())
     return CC_IntelOclBicc;
@@ -532,7 +540,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const GlobalDecl GD) {
   assert(isa<FunctionType>(FTy));
   setCUDAKernelCallingConvention(FTy, CGM, FD);
 
-  if (FD->hasAttr<OpenCLKernelAttr>() &&
+  if (getContext().getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>() &&
       GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
     const FunctionType *FT = FTy->getAs<FunctionType>();
     CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
@@ -582,7 +590,8 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
 
   FunctionType::ExtInfo einfo;
   bool IsWindows = getContext().getTargetInfo().getTriple().isOSWindows();
-  einfo = einfo.withCallingConv(getCallingConventionForDecl(MD, IsWindows));
+  einfo =
+      einfo.withCallingConv(getCallingConventionForDecl(CGM, MD, IsWindows));
 
   if (getContext().getLangOpts().ObjCAutoRefCount &&
       MD->hasAttr<NSReturnsRetainedAttr>())
@@ -757,7 +766,7 @@ CodeGenTypes::arrangeSYCLKernelCallerDeclaration(QualType resultType,
 
   return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
                                  argTypes,
-                                 FunctionType::ExtInfo(CC_OpenCLKernel),
+                                 FunctionType::ExtInfo(CC_DeviceKernel),
                                  /*paramInfos=*/{}, RequiredArgs::All);
 }
 
@@ -2510,7 +2519,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
                                  NumElemsParam);
     }
 
-    if (TargetDecl->hasAttr<OpenCLKernelAttr>() &&
+    if (getLangOpts().OpenCL && TargetDecl->hasAttr<DeviceKernelAttr>() &&
         CallingConv != CallingConv::CC_C &&
         CallingConv != CallingConv::CC_SpirFunction) {
       // Check CallingConv to avoid adding uniform-work-group-size attribute to
@@ -2889,8 +2898,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
     // > For arguments to a __kernel function declared to be a pointer to a
     // > data type, the OpenCL compiler can assume that the pointee is always
     // > appropriately aligned as required by the data type.
-    if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
-        ParamType->isPointerType()) {
+    if (getLangOpts().OpenCL && TargetDecl &&
+        TargetDecl->hasAttr<DeviceKernelAttr>() && ParamType->isPointerType()) {
       QualType PTy = ParamType->getPointeeType();
       if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
         llvm::Align Alignment =
@@ -4598,7 +4607,7 @@ void CodeGenFunction::EmitCallArgs(
     if (MD) {
       IsVariadic = MD->isVariadic();
       ExplicitCC = getCallingConventionForDecl(
-          MD, CGM.getTarget().getTriple().isOSWindows());
+          CGM, MD, CGM.getTarget().getTriple().isOSWindows());
       ArgTypes.assign(MD->param_type_begin() + ParamsToSkip,
                       MD->param_type_end());
     } else {
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index f3ec498d4064b..e6f133bc5c68e 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1589,9 +1589,8 @@ static unsigned getDwarfCC(CallingConv CC) {
     return llvm::dwarf::DW_CC_LLVM_IntelOclBicc;
   case CC_SpirFunction:
     return llvm::dwarf::DW_CC_LLVM_SpirFunction;
-  case CC_OpenCLKernel:
-  case CC_AMDGPUKernelCall:
-    return llvm::dwarf::DW_CC_LLVM_OpenCLKernel;
+  case CC_DeviceKernel:
+    return llvm::dwarf::DW_CC_LLVM_DeviceKernel;
   case CC_Swift:
     return llvm::dwarf::DW_CC_LLVM_Swift;
   case CC_SwiftAsync:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 2e01adc51fdf0..7f3c3984b9a9d 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5813,7 +5813,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
 }
 
 static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) {
-  if (FD->hasAttr<OpenCLKernelAttr>())
+  if (FD->hasAttr<DeviceKernelAttr>())
     return GlobalDecl(FD, KernelReferenceKind::Stub);
   return GlobalDecl(FD);
 }
@@ -6235,7 +6235,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
   const auto *FnType = cast<FunctionType>(PointeeType);
 
   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
-      FD && FD->hasAttr<OpenCLKernelAttr>())
+      FD && FD->hasAttr<DeviceKernelAttr>())
     CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FnType);
 
   // If we are checking indirect calls and this call is indirect, check that the
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index d773cdd505ff4..9a5824002e8d9 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -621,7 +621,7 @@ CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const {
 
 void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD,
                                          llvm::Function *Fn) {
-  if (!FD->hasAttr<OpenCLKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>())
+  if (!FD->hasAttr<DeviceKernelAttr>() && !FD->hasAttr<CUDAGlobalAttr>())
     return;
 
   llvm::LLVMContext &Context = getLLVMContext();
@@ -1595,7 +1595,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
     // Implicit copy-assignment gets the same special treatment as implicit
     // copy-constructors.
     emitImplicitAssignmentOperatorBody(Args);
-  } else if (FD->hasAttr<OpenCLKernelAttr>() &&
+  } else if (getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>() &&
              GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) {
     CallArgList CallArgs;
     for (unsigned i = 0; i < Args.size(); ++i) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c27817604f6ca..7d93911ce92ff 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1908,7 +1908,8 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
     } else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
                GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
       Out << "__device_stub__" << II->getName();
-    } else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
+    } else if (FD && FD->hasAttr<DeviceKernelAttr>() &&
+               CGM.getLangOpts().OpenCL &&
                GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
       Out << "__clang_ocl_kern_imp_" << II->getName();
     } else {
@@ -3923,7 +3924,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
 
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
-    if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody())
+    if (FD->hasAttr<DeviceKernelAttr>() && FD->doesThisDeclarationHaveABody())
       addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
 
     // Update deferred annotations with the latest declaration if the function
@@ -4893,7 +4894,7 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable,
   if (!Ty) {
     const auto *FD = cast<FunctionDecl>(GD.getDecl());
     Ty = getTypes().ConvertType(FD->getType());
-    if (FD->hasAttr<OpenCLKernelAttr>() &&
+    if (FD->hasAttr<DeviceKernelAttr>() &&
         GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
       const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
       Ty = getTypes().GetFunctionType(FI);
@@ -6177,7 +6178,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
                           (CodeGenOpts.OptimizationLevel == 0) &&
                           !D->hasAttr<MinSizeAttr>();
 
-  if (D->hasAttr<OpenCLKernelAttr>()) {
+  if (getLangOpts().OpenCL && D->hasAttr<DeviceKernelAttr>()) {
     if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub &&
         !D->hasAttr<NoInlineAttr>() &&
         !Fn->hasFnAttribute(llvm::Attribute::NoInline) &&
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 981488eb4dc37..d0c8e40ee5a17 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -191,7 +191,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
   auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name,
                                    &CGF.CGM.getModule());
   llvm::CallingConv::ID KernelCC =
-      CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel);
+      CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_DeviceKernel);
   F->setCallingConv(KernelCC);
 
   llvm::AttrBuilder KernelAttrs(C);
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index bcf039d9f268a..f5d9e30ec7fbd 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -336,7 +336,7 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
     return false;
 
   return !D->hasAttr<OMPDeclareTargetDeclAttr>() &&
-         (D->hasAttr<OpenCLKernelAttr>() ||
+         (D->hasAttr<DeviceKernelAttr>() ||
           (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
           (isa<VarDecl>(D) &&
            (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
@@ -349,7 +349,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
   const auto *ReqdWGS =
       M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
   const bool IsOpenCLKernel =
-      M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
+      M.getLangOpts().OpenCL && FD->hasAttr<DeviceKernelAttr>();
   const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
 
   const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
@@ -571,7 +571,7 @@ bool AMDGPUTargetCodeGenInfo::shouldEmitDWARFBitFieldSeparators() const {
 void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
     const FunctionType *&FT) const {
   FT = getABIInfo().getContext().adjustFunctionType(
-      FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
+      FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel));
 }
 
 /// Return IR struct type for rtinfo struct in rocm-device-libs used for device
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 25ab28c54b659..139a0094b16fc 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -263,7 +263,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
   if (M.getLangOpts().OpenCL) {
     // Use OpenCL function attributes to check for kernel functions
     // By default, all functions are device functions
-    if (FD->hasAttr<OpenCLKernelAttr>()) {
+    if (FD->hasAttr<DeviceKernelAttr>()) {
       // OpenCL __kernel functions get kernel metadata
       // Create !{<func-ref>, metadata !"kernel", i32 1} node
       F->setCallingConv(llvm::CallingConv::PTX_Kernel);
@@ -292,7 +292,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
   }
 
   // Attach kernel metadata directly if compiling for NVPTX.
-  if (FD->hasAttr<NVPTXKernelAttr>()) {
+  if (FD->hasAttr<DeviceKernelAttr>()) {
     F->setCallingConv(llvm::CallingConv::PTX_Kernel);
   }
 }
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index f35c124f50aa0..9c186a16e787a 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -227,7 +227,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
   // Convert HIP kernels to SPIR-V kernels.
   if (getABIInfo().getContext().getLangOpts().HIP) {
     FT = getABIInfo().getContext().adjustFunctionType(
-        FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
+        FT, FT->getExtInfo().withCallingConv(CC_DeviceKernel));
     return;
   }
 }
diff --git a/clang/lib/CodeGen/Targets/TCE.cpp b/clang/lib/CodeGen/Targets/TCE.cpp
index f3685ccd9825a..df49aea49a1e3 100644
--- a/clang/lib/CodeGen/Targets/TCE.cpp
+++ b/clang/lib/CodeGen/Targets/TCE.cpp
@@ -39,7 +39,7 @@ void TCETargetCodeGenInfo::setTargetAttributes(
   llvm::Function *F = cast<llvm::Function>(GV);
 
   if (M.getLangOpts().OpenCL) {
-    if (FD->hasAttr<OpenCLKernelAttr>()) {
+    if (FD->hasAttr<DeviceKernelAttr>()) {
       // OpenCL C Kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
       const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 6b561d7bfc6e7..2a11c212d090c 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8782,7 +8782,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
         FunctionDecl *FD = getCurFunctionDecl();
         // OpenCL v1.1 s6.5.2 and s6.5.3: no local or constant variables
         // in functions.
-        if (FD && !FD->hasAttr<OpenCLKernelAttr>()) {
+        if (FD && !FD->hasAttr<DeviceKernelAttr>()) {
           if (T.getAddressSpace() == LangAS::opencl_constant)
             Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
                 << 0 /*non-kernel only*/ << "constant";
@@ -8794,7 +8794,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
         }
         // OpenCL v2.0 s6.5.2 and s6.5.3: local and constant variables must be
         // in the outermost scope of a kernel function.
-        if (FD && FD->hasAttr<OpenCLKernelAttr>()) {
+        if (FD && FD->hasAttr<DeviceKernelAttr>()) {
           if (!getCurScope()->isFunctionScope()) {
             if (T.getAddressSpace() == LangAS::opencl_constant)
               Diag(NewVD->getLocation(), diag::err_opencl_addrspace_scope)
@@ -10927,9 +10927,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
 
   MarkUnusedFileScopedDecl(NewFD);
 
-
-
-  if (getLangOpts().OpenCL && NewFD->hasAttr<OpenCLKernelAttr>()) {
+  if (getLangOpts().OpenCL && NewFD->hasAttr<DeviceKernelAttr>()) {
     // OpenCL v1.2 s6.8 static is invalid for kernel functions.
     if (SC == SC_Static) {
       Diag(D.getIdentifierLoc(), diag::err_static_kernel);
@@ -12434,7 +12432,7 @@ void Sema::CheckMain(FunctionDecl *FD, const DeclSpec &DS) {
 
   if (getLangOpts().OpenCL) {
     Diag(FD->getLocation(), diag::err_opencl_no_main)
-        << FD->hasAttr<OpenCLKernelAttr>();
+        << FD->hasAttr<DeviceKernelAttr>();
     FD->setInvalidDecl();
     return;
   }
@@ -15696,7 +15694,7 @@ ShouldWarnAboutMissingPrototype(const FunctionDecl *FD,
     return false;
 
   // Don't warn for OpenCL kernels.
-  if (FD->hasAttr<OpenCLKernelAttr>())
+  if (FD->hasAttr<DeviceKernelAttr>())
     return false;
 
   // Don't warn on explicitly deleted functions.
@@ -20580,7 +20578,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD,
 
   // SYCL functions can be template, so we check if they have appropriate
   // attribute prior to checking if it is a template.
-  if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>())
+  if (LangOpts.SYCLIsDevice && FD->hasAttr<DeviceKernelAttr>())
     return FunctionEmissionStatus::Emitted;
 
   // Templates are emitted when they're instantiated.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index bfb3ee9dcbd16..012017e559324 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5087,8 +5087,8 @@ static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
     S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
 
-  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
-    D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  if (AL.getKind() == ParsedAttr::AT_DeviceKernel)
+    D->addAttr(::new (S.Context) DeviceKernelAttr(S.Context, AL));
   else
     D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
   // In host compilation the kernel is emitted as a stub function, which is
@@ -5223,9 +5223,11 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   case ParsedAttr::AT_AArch64SVEPcs:
     D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL));
     return;
-  case ParsedAttr::AT_AMDGPUKernelCall:
-    D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL));
+  case ParsedAttr::AT_DeviceKernel: {
+    // The attribute should already be applied.
+    assert(D->hasAttr<DeviceKernelAttr>() && "Expected attribute");
     return;
+  }
   case ParsedAttr::AT_IntelOclBicc:
     D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL));
     return;
@@ -5268,6 +5270,33 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   }
 }
 
+static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+  bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
+  if (S.getLangOpts().SYCLIsDevice) {
+    if (!IsFunctionTemplate) {
+      S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str)
+          << AL << AL.isRegularKeywordAttribute() << "function templates";
+    } else {
+      S.SYCL().handleKernelAttr(D, AL);
+    }
+  } else if (DeviceKernelAttr::isSYCLSpelling(AL)) {
+    S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL;
+  } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
+    handleGlobalAttr(S, D, AL);
+  } else {
+    // OpenCL C++ will throw a more specific error.
+    if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
+      S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
+          << AL << AL.isRegularKeywordAttribute() << "functions";
+    }
+    handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
+  }
+  // Make sure we validate the CC with the target
+  // and warn/error if necessary.
+  handleCallConvAttr(S, D, AL);
+}
+
 static void handleSuppressAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (AL.getAttributeSpellingListIndex() == SuppressAttr::CXX11_gsl_suppress) {
     // Suppression attribute with GSL spelling requires at least 1 argument.
@@ -5429,9 +5458,6 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
   case ParsedAttr::AT_AArch64SVEPcs:
     CC = CC_AArch64SVEPCS;
     break;
-  case ParsedAttr::AT_AMDGPUKernelCall:
-    CC = CC_AMDGPUKernelCall;
-    break;
   case ParsedAttr::AT_RegCall:
     CC = CC_X86RegCall;
     break;
@@ -5503,6 +5529,11 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
                                   llvm::Log2_64(ABIVLen) - 5);
     break;
   }
+  case ParsedAttr::AT_DeviceKernel: {
+    // Validation was handled in handleDeviceKernelAttr.
+    CC = CC_DeviceKernel;
+    break;
+  }
   default: llvm_unreachable("unexpected attribute kind");
   }
 
@@ -7106,9 +7137,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_EnumExtensibility:
     handleEnumExtensibilityAttr(S, D, AL);
     break;
-  case ParsedAttr::AT_SYCLKernel:
-    S.SYCL().handleKernelAttr(D, AL);
-    break;
   case ParsedAttr::AT_SYCLKernelEntryPoint:
     S.SYCL().handleKernelEntryPointAttr(D, AL);
     break;
@@ -7133,7 +7161,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_CalledOnce:
     handleCalledOnceAttr(S, D, AL);
     break;
-  case ParsedAttr::AT_NVPTXKernel:
   case ParsedAttr::AT_CUDAGlobal:
     handleGlobalAttr(S, D, AL);
     break;
@@ -7397,13 +7424,15 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_PreserveAll:
   case ParsedAttr::AT_AArch64VectorPcs:
   case ParsedAttr::AT_AArch64SVEPcs:
-  case ParsedAttr::AT_AMDGPUKernelCall:
   case ParsedAttr::AT_M68kRTD:
   case ParsedAttr::AT_PreserveNone:
   case ParsedAttr::AT_RISCVVectorCC:
   case ParsedAttr::AT_RISCVVLSCC:
     handleCallConvAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_DeviceKernel:
+    handleDeviceKernelAttr(S, D, AL);
+    break;
   case ParsedAttr::AT_Suppress:
     handleSuppressAttr(S, D, AL);
     break;
@@ -7713,9 +7742,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
 
 static bool isKernelDecl(Decl *D) {
   const FunctionType *FnTy = D->getFunctionType();
-  return D->hasAttr<OpenCLKernelAttr>() ||
-         (FnTy && FnTy->getCallConv() == CallingConv::CC_AMDGPUKernelCall) ||
-         D->hasAttr<CUDAGlobalAttr>() || D->getAttr<NVPTXKernelAttr>();
+  return D->hasAttr<DeviceKernelAttr>() ||
+         (FnTy && FnTy->getCallConv() == CallingConv::CC_DeviceKernel) ||
+         D->hasAttr<CUDAGlobalAttr>();
 }
 
 void Sema::ProcessDeclAttributeList(
@@ -7742,7 +7771,7 @@ void Sema::ProcessDeclAttributeList(
   // good to have a way to specify "these attributes must appear as a group",
   // for these. Additionally, it would be good to have a way to specify "these
   // attribute must never appear as a group" for attributes like cold and hot.
-  if (!(D->hasAttr<OpenCLKernelAttr>() ||
+  if (!(D->hasAttr<DeviceKernelAttr>() ||
         (D->hasAttr<CUDAGlobalAttr>() &&
          Context.getTargetInfo().getTriple().isSPIRV()))) {
     // These attributes cannot be applied to a non-kernel function.
diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index 1969d7b0ba837..3e03cb4bd5f99 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -199,7 +199,7 @@ void SemaSYCL::handleKernelAttr(Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  handleSimpleAttribute<SYCLKernelAttr>(*this, D, AL);
+  handleSimpleAttribute<DeviceKernelAttr>(*this, D, AL);
 }
 
 void SemaSYCL::handleKernelEntryPointAttr(Decl *D, const ParsedAttr &AL) {
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 08b3a423d1526..4f21166db34b0 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -668,9 +668,9 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr(
 // This doesn't take any template parameters, but we have a custom action that
 // needs to happen when the kernel itself is instantiated. We need to run the
 // ItaniumMangler to mark the names required to name this kernel.
-static void instantiateDependentSYCLKernelAttr(
+static void instantiateDependentDeviceKernelAttr(
     Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
-    const SYCLKernelAttr &Attr, Decl *New) {
+    const DeviceKernelAttr &Attr, Decl *New) {
   New->addAttr(Attr.clone(S.getASTContext()));
 }
 
@@ -912,8 +912,8 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
       continue;
     }
 
-    if (auto *A = dyn_cast<SYCLKernelAttr>(TmplAttr)) {
-      instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New);
+    if (auto *A = dyn_cast<DeviceKernelAttr>(TmplAttr)) {
+      instantiateDependentDeviceKernelAttr(*this, TemplateArgs, *A, New);
       continue;
     }
 
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 294daef70c339..3c6532e85eee5 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -134,7 +134,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
   case ParsedAttr::AT_VectorCall:                                              \
   case ParsedAttr::AT_AArch64VectorPcs:                                        \
   case ParsedAttr::AT_AArch64SVEPcs:                                           \
-  case ParsedAttr::AT_AMDGPUKernelCall:                                        \
+  case ParsedAttr::AT_DeviceKernel:                                            \
   case ParsedAttr::AT_MSABI:                                                   \
   case ParsedAttr::AT_SysVABI:                                                 \
   case ParsedAttr::AT_Pcs:                                                     \
@@ -3754,14 +3754,14 @@ static CallingConv getCCForDeclaratorChunk(
   CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
                                                          IsCXXInstanceMethod);
 
-  // Attribute AT_OpenCLKernel affects the calling convention for SPIR
+  // Attribute AT_DeviceKernel affects the calling convention for SPIR
   // and AMDGPU targets, hence it cannot be treated as a calling
   // convention attribute. This is the simplest place to infer
   // calling convention for OpenCL kernels.
   if (S.getLangOpts().OpenCL) {
     for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
-      if (AL.getKind() == ParsedAttr::AT_OpenCLKernel) {
-        CC = CC_OpenCLKernel;
+      if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
+        CC = CC_DeviceKernel;
         break;
       }
     }
@@ -3774,7 +3774,7 @@ static CallingConv getCCForDeclaratorChunk(
     if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) {
       for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
         if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
-          CC = CC_OpenCLKernel;
+          CC = CC_DeviceKernel;
           break;
         }
       }
@@ -7530,8 +7530,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
     return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
   case ParsedAttr::AT_ArmStreaming:
     return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
-  case ParsedAttr::AT_AMDGPUKernelCall:
-    return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr);
+  case ParsedAttr::AT_DeviceKernel:
+    return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
   case ParsedAttr::AT_Pcs: {
     // The attribute may have had a fixit applied where we treated an
     // identifier as a string literal.  The contents of the string are valid,
@@ -8719,6 +8719,16 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State,
   }
 }
 
+static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
+  // The DeviceKernel attribute is shared for many targets, and
+  // it is only allowed to be a type attribute with the AMDGPU
+  // spelling, so skip processing the attr as a type attr
+  // unless it has that spelling.
+  if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
+    return true;
+  return DeviceKernelAttr::isAMDGPUSpelling(Attr);
+}
+
 static void processTypeAttrs(TypeProcessingState &state, QualType &type,
                              TypeAttrLocation TAL,
                              const ParsedAttributesView &attrs,
@@ -8974,6 +8984,9 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
         break;
       [[fallthrough]];
     FUNCTION_TYPE_ATTRS_CASELIST:
+      if (!isMultiSubjectAttrAllowedOnType(attr))
+        break;
+
       attr.setUsedAsTypeAttr();
 
       // Attributes with standard syntax have strict rules for what they
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 7affacb1a109a..af05deb3a13da 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -109,7 +109,6 @@
 // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method)
 // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum)
-// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function)
 // CHECK-NEXT: Naked (SubjectMatchRule_function)
 // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function)
 // CHECK-NEXT: NoCommon (SubjectMatchRule_variable)
diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp
index ffa942d10669c..f5907d74b0339 100644
--- a/clang/tools/libclang/CXType.cpp
+++ b/clang/tools/libclang/CXType.cpp
@@ -731,8 +731,8 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) {
       TCALLINGCONV(RISCVVLSCall_32768);
       TCALLINGCONV(RISCVVLSCall_65536);
     case CC_SpirFunction: return CXCallingConv_Unexposed;
-    case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed;
-    case CC_OpenCLKernel: return CXCallingConv_Unexposed;
+    case CC_DeviceKernel:
+      return CXCallingConv_Unexposed;
       break;
     }
 #undef TCALLINGCONV
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index e52324a8ebc12..803ed67d534ea 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -1117,7 +1117,7 @@ HANDLE_DW_CC(0xc3, LLVM_AAPCS)
 HANDLE_DW_CC(0xc4, LLVM_AAPCS_VFP)
 HANDLE_DW_CC(0xc5, LLVM_IntelOclBicc)
 HANDLE_DW_CC(0xc6, LLVM_SpirFunction)
-HANDLE_DW_CC(0xc7, LLVM_OpenCLKernel)
+HANDLE_DW_CC(0xc7, LLVM_DeviceKernel)
 HANDLE_DW_CC(0xc8, LLVM_Swift)
 HANDLE_DW_CC(0xc9, LLVM_PreserveMost)
 HANDLE_DW_CC(0xca, LLVM_PreserveAll)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h
index bd25f6c30ebf1..a760f773055d2 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h
@@ -734,13 +734,15 @@ void DWARFTypePrinter<DieType>::appendSubroutineNameAfter(
       OS << " __attribute__((intel_ocl_bicc))";
       break;
     case dwarf::CallingConvention::DW_CC_LLVM_SpirFunction:
-    case dwarf::CallingConvention::DW_CC_LLVM_OpenCLKernel:
-      // These aren't available as attributes, but maybe we should still
-      // render them somehow? (Clang doesn't render them, but that's an issue
+      // This isn't available as an attribute, but maybe we should still
+      // render it somehow? (Clang doesn't render it, but that's an issue
       // for template names too - since then the DWARF names of templates
       // instantiated with function types with these calling conventions won't
       // have distinct names - so we'd need to fix that too)
       break;
+    case dwarf::CallingConvention::DW_CC_LLVM_DeviceKernel:
+      OS << " __attribute__((device_kernel))";
+      break;
     case dwarf::CallingConvention::DW_CC_LLVM_Swift:
       // SwiftAsync missing
       OS << " __attribute__((swiftcall))";
diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
index 89c9801b5e466..a95f9a0f9c43e 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
@@ -28,6 +28,6 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
 !2 = !{i32 7, !"Dwarf Version", i32 5}
 !3 = !{i32 2, !"Debug Info Version", i32 3}
 !4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
-!5 = !DISubroutineType(cc: DW_CC_LLVM_OpenCLKernel, types: !6)
+!5 = !DISubroutineType(cc: DW_CC_LLVM_DeviceKernel, types: !6)
 !6 = !{null}
 !7 = !{i32 1024, i32 1, i32 1}

>From 0c752470566e9f74b1d9d1330ecd977674726412 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <nick.sarnie at intel.com>
Date: Thu, 8 May 2025 12:53:52 -0700
Subject: [PATCH 2/2] apply feedback and rework member functions

Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>
---
 clang/include/clang/Basic/Attr.td | 48 ++++++++++++-------------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 1ce7f0c1835ac..90a7c17e6c07a 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1537,39 +1537,27 @@ def DeviceKernel : DeclOrTypeAttr {
   let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">,
                    Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
                    CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
-  let LangOpts = [];
   let Documentation = [SYCLKernelDocs];
   let AdditionalMembers =
   [{
-   inline bool isAMDGPUSpelling() const {
-     return isAMDGPUSpelling(*this);
-   }
-   template<typename T>
-   static inline bool isAMDGPUSpelling(const T& Attr) {
-     return Attr.getAttrName()->getName() == "amdgpu_kernel";
-   }
-   inline bool isNVPTXSpelling() const {
-     return isNVPTXSpelling(*this);
-   }
-   template<typename T>
-   static inline bool isNVPTXSpelling(const T& Attr) {
-     return Attr.getAttrName()->getName() == "nvptx_kernel";
-   }
-   inline bool isOpenCLSpelling() const {
-     return isOpenCLSpelling(*this);
-   }
-   template<typename T>
-   static inline bool isOpenCLSpelling(const T& Attr) {
-   return Attr.getAttrName()->getName() == "kernel" ||
-          Attr.getAttrName()->getName() == "__kernel";
-   }
-   inline bool isSYCLSpelling() const {
-     return isSYCLSpelling(*this);
-   }
-   template<typename T>
-   static inline bool isSYCLSpelling(const T& Attr) {
-     return Attr.getAttrName()->getName() == "sycl_kernel";
-   }
+    static inline bool isAMDGPUSpelling(const AttributeCommonInfo& A) {
+      return A.getAttributeSpellingListIndex() == GNU_amdgpu_kernel ||
+             A.getAttributeSpellingListIndex() == CXX11_clang_amdgpu_kernel ||
+             A.getAttributeSpellingListIndex() == C23_clang_amdgpu_kernel;
+    }
+    static inline bool isNVPTXSpelling(const AttributeCommonInfo& A) {
+      return A.getAttributeSpellingListIndex() == GNU_nvptx_kernel ||
+             A.getAttributeSpellingListIndex() == CXX11_clang_nvptx_kernel ||
+             A.getAttributeSpellingListIndex() == C23_clang_nvptx_kernel;
+    }
+    static inline bool isSYCLSpelling(const AttributeCommonInfo& A) {
+      return A.getAttributeSpellingListIndex() == GNU_sycl_kernel ||
+             A.getAttributeSpellingListIndex() == CXX11_clang_sycl_kernel ||
+             A.getAttributeSpellingListIndex() == C23_clang_sycl_kernel;
+    }
+    static inline bool isOpenCLSpelling(const AttributeCommonInfo& A) {
+      return A.getAttributeSpellingListIndex() == Keyword_kernel;
+    }
 }];
 }