[clang] [llvm] [SPIRV] Use AMDGPU ABI for AMDGCN flavoured SPIRV (PR #169865)
Alex Voicu via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 27 19:05:44 PST 2025
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/169865
>From 829faa3884be286fe97ca07a2e3ab8d76b6a91fc Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 28 Nov 2025 01:26:40 +0000
Subject: [PATCH 1/2] Use AMDGPU ABI for AMDGCNSPIRV; add lowering for `byref`.
---
clang/lib/CodeGen/Targets/SPIR.cpp | 292 +++++++++++++---
.../amdgpu-kernel-arg-pointer-type.cu | 144 ++++----
clang/test/CodeGenCUDA/kernel-args.cu | 8 +-
.../amdgcnspirv-uses-amdgpu-abi.cpp | 321 ++++++++++++++++++
llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 5 +-
.../ptr-argument-byref-amdgcnspirv.ll | 24 ++
6 files changed, 669 insertions(+), 125 deletions(-)
create mode 100644 clang/test/CodeGenHIP/amdgcnspirv-uses-amdgpu-abi.cpp
create mode 100644 llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref-amdgcnspirv.ll
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 1a8c85d8871ec..3540093074bfe 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -9,6 +9,11 @@
#include "ABIInfoImpl.h"
#include "HLSLBufferLayoutBuilder.h"
#include "TargetInfo.h"
+#include "clang/Basic/LangOptions.h"
+#include "llvm/IR/DerivedTypes.h"
+
+#include <stdint.h>
+#include <utility>
using namespace clang;
using namespace clang::CodeGen;
@@ -33,9 +38,41 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
void computeInfo(CGFunctionInfo &FI) const override;
private:
+ ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+};
+
+class AMDGCNSPIRVABIInfo : public SPIRVABIInfo {
+ // TODO: this should be unified / shared with AMDGPU, ideally we'd like to
+ // re-use AMDGPUABIInfo eventually, rather than duplicate.
+ static constexpr unsigned MaxNumRegsForArgsRet = 16; // 16 32-bit registers
+ mutable unsigned NumRegsLeft = 0;
+
+ unsigned numRegsForType(QualType Ty) const;
+
+ bool isHomogeneousAggregateBaseType(QualType Ty) const override {
+ return true;
+ }
+ bool isHomogeneousAggregateSmallEnough(const Type *Base,
+ uint64_t Members) const override {
+ uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
+
+ // Homogeneous Aggregates may occupy at most 16 registers.
+ return Members * NumRegs <= MaxNumRegsForArgsRet;
+ }
+
+ // Coerce HIP scalar pointer arguments from generic pointers to global ones.
+ llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
+ unsigned ToAS) const;
+
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
+public:
+ AMDGCNSPIRVABIInfo(CodeGenTypes &CGT) : SPIRVABIInfo(CGT) {}
+ void computeInfo(CGFunctionInfo &FI) const override;
+
+ llvm::FixedVectorType *getOptimalVectorMemoryType(
+ llvm::FixedVectorType *Ty, const LangOptions &LangOpt) const override;
};
} // end anonymous namespace
namespace {
@@ -83,7 +120,10 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
public:
SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
- : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
+ : CommonSPIRTargetCodeGenInfo(
+ (CGT.getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+ ? std::make_unique<AMDGCNSPIRVABIInfo>(CGT)
+ : std::make_unique<SPIRVABIInfo>(CGT)) {}
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
@@ -132,25 +172,6 @@ void CommonSPIRABIInfo::setCCs() {
RuntimeCC = llvm::CallingConv::SPIR_FUNC;
}
-ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
- if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
- return DefaultABIInfo::classifyReturnType(RetTy);
- if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
- return DefaultABIInfo::classifyReturnType(RetTy);
-
- if (const auto *RD = RetTy->getAsRecordDecl();
- RD && RD->hasFlexibleArrayMember())
- return DefaultABIInfo::classifyReturnType(RetTy);
-
- // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
- // avoid encoding various architecture specific bits here we return everything
- // as direct to retain type info for things like aggregates, for later perusal
- // when translating back to LLVM/lowering in the BE. This is also why we
- // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
- // This will be revisited / optimised in the future.
- return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
-}
-
ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
if (getContext().getLangOpts().isTargetDevice()) {
// Coerce pointer arguments with default address space to CrossWorkGroup
@@ -167,18 +188,6 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
}
if (isAggregateTypeForABI(Ty)) {
- if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
- // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
- // currently expressible in SPIR-V; SPIR-V passes aggregates byval,
- // which the AMDGPU kernel ABI does not allow. Passing aggregates as
- // direct works around this impedance mismatch, as it retains type info
- // and can be correctly handled, post reverse-translation, by the AMDGPU
- // BE, which has to support this CC for legacy OpenCL purposes. It can
- // be brittle and does lead to performance degradation in certain
- // pathological cases. This will be revisited / optimised in the future,
- // once a way to deal with the byref/byval impedance mismatch is
- // identified.
- return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
// Force copying aggregate type in kernel arguments by value when
// compiling CUDA targeting SPIR-V. This is required for the object
// copied to be valid on the device.
@@ -193,11 +202,150 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
return classifyArgumentType(Ty);
}
-ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
- if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
- return DefaultABIInfo::classifyArgumentType(Ty);
- if (!isAggregateTypeForABI(Ty))
- return DefaultABIInfo::classifyArgumentType(Ty);
+void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ // The logic is same as in DefaultABIInfo with an exception on the kernel
+ // arguments handling.
+ llvm::CallingConv::ID CC = FI.getCallingConvention();
+
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+
+ for (auto &I : FI.arguments()) {
+ if (CC == llvm::CallingConv::SPIR_KERNEL) {
+ I.info = classifyKernelArgumentType(I.type);
+ } else {
+ I.info = classifyArgumentType(I.type);
+ }
+ }
+}
+
+unsigned AMDGCNSPIRVABIInfo::numRegsForType(QualType Ty) const {
+ // This duplicates the AMDGPUABI computation.
+ unsigned NumRegs = 0;
+
+ if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ // Compute from the number of elements. The reported size is based on the
+ // in-memory size, which includes the padding 4th element for 3-vectors.
+ QualType EltTy = VT->getElementType();
+ unsigned EltSize = getContext().getTypeSize(EltTy);
+
+ // 16-bit element vectors should be passed as packed.
+ if (EltSize == 16)
+ return (VT->getNumElements() + 1) / 2;
+
+ unsigned EltNumRegs = (EltSize + 31) / 32;
+ return EltNumRegs * VT->getNumElements();
+ }
+
+ if (const auto *RD = Ty->getAsRecordDecl()) {
+ assert(!RD->hasFlexibleArrayMember());
+
+ for (const FieldDecl *Field : RD->fields()) {
+ QualType FieldTy = Field->getType();
+ NumRegs += numRegsForType(FieldTy);
+ }
+
+ return NumRegs;
+ }
+
+ return (getContext().getTypeSize(Ty) + 31) / 32;
+}
+
+llvm::Type *
+AMDGCNSPIRVABIInfo::coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
+ unsigned ToAS) const {
+ // Single value types.
+ auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
+ if (PtrTy && PtrTy->getAddressSpace() == FromAS)
+ return llvm::PointerType::get(Ty->getContext(), ToAS);
+ return Ty;
+}
+
+ABIArgInfo AMDGCNSPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+ if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+ return DefaultABIInfo::classifyReturnType(RetTy);
+
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), RetTy, true))
+ return ABIArgInfo::getIgnore();
+
+ // Lower single-element structs to just return a regular value.
+ if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+
+ if (const auto *RD = RetTy->getAsRecordDecl();
+ RD && RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyReturnType(RetTy);
+
+ // Pack aggregates <= 4 bytes into single VGPR or pair.
+ uint64_t Size = getContext().getTypeSize(RetTy);
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ // TODO: This carried over from AMDGPU oddity, we retain it to
+ // ensure consistency, but it might be reasonable to return Int64.
+ if (Size <= 64) {
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+
+ if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
+ return ABIArgInfo::getDirect();
+ return DefaultABIInfo::classifyReturnType(RetTy);
+}
+
+/// For kernels all parameters are really passed in a special buffer. It doesn't
+/// make sense to pass anything byval, so everything must be direct.
+ABIArgInfo AMDGCNSPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ // TODO: Can we omit empty structs?
+
+ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+ Ty = QualType(SeltTy, 0);
+
+ llvm::Type *OrigLTy = CGT.ConvertType(Ty);
+ llvm::Type *LTy = OrigLTy;
+ if (getContext().getLangOpts().isTargetDevice()) {
+ LTy = coerceKernelArgumentType(
+ OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default),
+ /*ToAS=*/getContext().getTargetAddressSpace(LangAS::opencl_global));
+ }
+
+ // FIXME: This doesn't apply the optimization of coercing pointers in structs
+ // to global address space when using byref. This would require implementing a
+ // new kind of coercion of the in-memory type when for indirect arguments.
+ if (LTy == OrigLTy && isAggregateTypeForABI(Ty)) {
+ return ABIArgInfo::getIndirectAliased(
+ getContext().getTypeAlignInChars(Ty),
+ getContext().getTargetAddressSpace(LangAS::opencl_constant),
+ false /*Realign*/, nullptr /*Padding*/);
+ }
+
+ // TODO: inhibiting flattening is an AMDGPU workaround for Clover, which might
+ // be vestigial and should be revisited.
+ return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+}
+
+ABIArgInfo AMDGCNSPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+ assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
+
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ // TODO: support for variadics.
+
+ if (!isAggregateTypeForABI(Ty)) {
+ ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
+ if (!ArgInfo.isIndirect()) {
+ unsigned NumRegs = numRegsForType(Ty);
+ NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
+ }
+
+ return ArgInfo;
+ }
// Records with non-trivial destructors/copy-constructors should not be
// passed by value.
@@ -205,37 +353,87 @@ ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
RAA == CGCXXABI::RAA_DirectInMemory);
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), Ty, true))
+ return ABIArgInfo::getIgnore();
+
+ // Lower single-element structs to just pass a regular value. TODO: We
+ // could do reasonable-size multiple-element structs too, using getExpand(),
+ // though watch out for things like bitfields.
+ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+
if (const auto *RD = Ty->getAsRecordDecl();
RD && RD->hasFlexibleArrayMember())
return DefaultABIInfo::classifyArgumentType(Ty);
- return ABIArgInfo::getDirect(CGT.ConvertType(Ty), 0u, nullptr, false);
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 64) {
+ // Pack aggregates <= 8 bytes into single VGPR or pair.
+ unsigned NumRegs = (Size + 31) / 32;
+ NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ // TODO: This is an AMDGPU oddity, and might be vestigial, we retain it to
+ // ensure consistency, but it should be revisited.
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+
+ if (NumRegsLeft > 0) {
+ unsigned NumRegs = numRegsForType(Ty);
+ if (NumRegsLeft >= NumRegs) {
+ NumRegsLeft -= NumRegs;
+ return ABIArgInfo::getDirect();
+ }
+ }
+
+ // Use pass-by-reference in stead of pass-by-value for struct arguments in
+ // function ABI.
+ return ABIArgInfo::getIndirectAliased(
+ getContext().getTypeAlignInChars(Ty),
+ getContext().getTargetAddressSpace(LangAS::opencl_private));
}
-void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
- // The logic is same as in DefaultABIInfo with an exception on the kernel
- // arguments handling.
+void AMDGCNSPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
llvm::CallingConv::ID CC = FI.getCallingConvention();
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ NumRegsLeft = MaxNumRegsForArgsRet;
for (auto &I : FI.arguments()) {
- if (CC == llvm::CallingConv::SPIR_KERNEL) {
+ if (CC == llvm::CallingConv::SPIR_KERNEL)
I.info = classifyKernelArgumentType(I.type);
- } else {
+ else
I.info = classifyArgumentType(I.type);
- }
}
}
+llvm::FixedVectorType *AMDGCNSPIRVABIInfo::getOptimalVectorMemoryType(
+ llvm::FixedVectorType *Ty, const LangOptions &LangOpt) const {
+ // AMDGPU has legal instructions for 96-bit so 3x32 can be supported.
+ if (Ty->getNumElements() == 3 && getDataLayout().getTypeSizeInBits(Ty) == 96)
+ return Ty;
+ return DefaultABIInfo::getOptimalVectorMemoryType(Ty, LangOpt);
+}
+
namespace clang {
namespace CodeGen {
void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
- if (CGM.getTarget().getTriple().isSPIRV())
- SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
- else
+ if (CGM.getTarget().getTriple().isSPIRV()) {
+ if (CGM.getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+ AMDGCNSPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
+ else
+ SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
+ } else {
CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI);
+ }
}
}
}
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index a48affaec3c8a..bf45a353851b4 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -95,7 +95,7 @@ __global__ void kernel1(int *x) {
// CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8
// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
@@ -111,7 +111,7 @@ __global__ void kernel1(int *x) {
// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8
// CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
-// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8, !align [[META6:![0-9]+]]
// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
@@ -302,28 +302,23 @@ struct S {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
-// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
+// CHECK-SPIRV-SAME: ptr addrspace(2) noundef byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
-// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8
-// CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
-// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0
-// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
-// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
-// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1
-// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
-// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP2]], align 8
-// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0
-// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
-// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP4]], i64 0
-// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
-// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK-SPIRV-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_S]], align 8
+// CHECK-SPIRV-NEXT: [[S:%.*]] = addrspacecast ptr [[COERCE]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: call addrspace(4) void @llvm.memcpy.p4.p2.i64(ptr addrspace(4) align 8 [[S]], ptr addrspace(2) align 8 [[TMP0]], i64 16, i1 false)
+// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
-// CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1
-// CHECK-SPIRV-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
-// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP6]], i64 0
-// CHECK-SPIRV-NEXT: [[TMP7:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4
-// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP7]], 1.000000e+00
-// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4
+// CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S]], i32 0, i32 1
+// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP3]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX1]], align 4
+// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX1]], align 4
// CHECK-SPIRV-NEXT: ret void
//
// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S(
@@ -343,16 +338,17 @@ struct S {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
-// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
+// OPT-SPIRV-SAME: ptr addrspace(2) noundef readonly byref([[STRUCT_S:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
-// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
-// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
-// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
-// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4
-// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
-// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(2) [[TMP0]], align 8
+// OPT-SPIRV-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(2) [[TMP0]], i64 8
+// OPT-SPIRV-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(2) [[COERCE_SROA_2_0__SROA_IDX]], align 8
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[COERCE_SROA_0_0_COPYLOAD]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[COERCE_SROA_0_0_COPYLOAD]], align 4
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[COERCE_SROA_2_0_COPYLOAD]], align 4
+// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[COERCE_SROA_2_0_COPYLOAD]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel41S(
@@ -511,27 +507,25 @@ struct T {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
-// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
+// CHECK-SPIRV-SAME: ptr addrspace(2) noundef byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
-// CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8
-// CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4)
-// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
-// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
-// CHECK-SPIRV-NEXT: store [2 x ptr addrspace(4)] [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
-// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_T]], align 8
+// CHECK-SPIRV-NEXT: [[T:%.*]] = addrspacecast ptr [[COERCE]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: call addrspace(4) void @llvm.memcpy.p4.p2.i64(ptr addrspace(4) align 8 [[T]], ptr addrspace(2) align 8 [[TMP0]], i64 16, i1 false)
+// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T]], i32 0, i32 0
// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X]], i64 0, i64 0
-// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8
-// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 0
-// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4
-// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
-// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4
-// CHECK-SPIRV-NEXT: [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
-// CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X3]], i64 0, i64 1
-// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX4]], align 8
-// CHECK-SPIRV-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0
-// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX5]], align 4
-// CHECK-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP5]], 2.000000e+00
-// CHECK-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[ARRAYIDX5]], align 4
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP1]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX1]], align 4
+// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX1]], align 4
+// CHECK-SPIRV-NEXT: [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X2]], i64 0, i64 1
+// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX3]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP3]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX4]], align 4
+// CHECK-SPIRV-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD5]], ptr addrspace(4) [[ARRAYIDX4]], align 4
// CHECK-SPIRV-NEXT: ret void
//
// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T(
@@ -551,17 +545,17 @@ struct T {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
-// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
+// OPT-SPIRV-SAME: ptr addrspace(2) noundef readonly byref([[STRUCT_T:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
-// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
-// OPT-SPIRV-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 1
-// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4
+// OPT-SPIRV-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(2) [[TMP0]], align 8
+// OPT-SPIRV-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(2) [[TMP0]], i64 8
+// OPT-SPIRV-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(2) [[COERCE_SROA_2_0__SROA_IDX]], align 8
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[COERCE_SROA_0_0_COPYLOAD]], align 4
// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 1.000000e+00
-// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4
-// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4
-// OPT-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP2]], 2.000000e+00
-// OPT-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4
+// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[COERCE_SROA_0_0_COPYLOAD]], align 4
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[COERCE_SROA_2_0_COPYLOAD]], align 4
+// OPT-SPIRV-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP2]], 2.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD5]], ptr addrspace(4) [[COERCE_SROA_2_0_COPYLOAD]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel61T(
@@ -677,18 +671,17 @@ struct SS {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
-// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
+// CHECK-SPIRV-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
-// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8
+// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
// CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
-// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0
-// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
-// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
+// CHECK-SPIRV-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr addrspace(4) [[COERCE_DIVE]], align 8
// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0
-// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
-// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4
-// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 3.000000e+00
-// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
+// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4
// CHECK-SPIRV-NEXT: ret void
//
// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS(
@@ -700,12 +693,13 @@ struct SS {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
-// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
-// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
-// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00
-// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[A_COERCE]] to i64
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 3.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel82SS(
@@ -727,7 +721,11 @@ __global__ void kernel8(struct SS a) {
*a.x += 3.f;
}
//.
+// CHECK: [[META4]] = !{}
+// CHECK: [[META5]] = !{i64 4}
+//.
// CHECK-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1}
+// CHECK-SPIRV: [[META6]] = !{i64 4}
//.
// OPT: [[META4]] = !{}
//.
diff --git a/clang/test/CodeGenCUDA/kernel-args.cu b/clang/test/CodeGenCUDA/kernel-args.cu
index 8d17d89b315de..386fb8f2bfd11 100644
--- a/clang/test/CodeGenCUDA/kernel-args.cu
+++ b/clang/test/CodeGenCUDA/kernel-args.cu
@@ -12,7 +12,7 @@ struct A {
};
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
-// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(%struct.A %{{.+}})
+// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}})
// NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x)
__global__ void kernel(A x) {
}
@@ -20,7 +20,7 @@ __global__ void kernel(A x) {
class Kernel {
public:
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
- // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %{{.+}})
+ // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}})
// NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x)
static __global__ void memberKernel(A x){}
template<typename T> static __global__ void templateMemberKernel(T x) {}
@@ -35,12 +35,12 @@ void launch(void*);
void test() {
Kernel K;
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
- // AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(%struct.A %{{.+}})
+ // AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}})
// NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x)
launch((void*)templateKernel<A>);
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
- // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %{{.+}}
+ // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}}
// NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x)
launch((void*)Kernel::templateMemberKernel<A>);
}
diff --git a/clang/test/CodeGenHIP/amdgcnspirv-uses-amdgpu-abi.cpp b/clang/test/CodeGenHIP/amdgcnspirv-uses-amdgpu-abi.cpp
new file mode 100644
index 0000000000000..8f92d1fed1f9f
--- /dev/null
+++ b/clang/test/CodeGenHIP/amdgcnspirv-uses-amdgpu-abi.cpp
@@ -0,0 +1,321 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -O3 \
+// RUN: -o - %s | FileCheck --check-prefix=AMDGCNSPIRV %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -target-cpu gfx906 -emit-llvm -fcuda-is-device -O3 \
+// RUN: -o - %s | FileCheck --check-prefix=AMDGPU %s
+
+#define __global__ __attribute__((global))
+#define __device__ __attribute__((device))
+
+union Transparent { unsigned x; };
+using V1 = unsigned __attribute__((ext_vector_type(1)));
+using V2 = unsigned __attribute__((ext_vector_type(2)));
+using V3 = unsigned __attribute__((ext_vector_type(3)));
+using V4 = unsigned __attribute__((ext_vector_type(4)));
+struct SingleElement { unsigned x; };
+struct ByRef { unsigned x[17]; };
+
+// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k0s(
+// AMDGCNSPIRV-SAME: i16 noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META9:![0-9]+]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k0s(
+// AMDGPU-SAME: i16 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__global__ void k0(short) { }
+
+// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k1j(
+// AMDGCNSPIRV-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k1j(
+// AMDGPU-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__global__ void k1(unsigned) { }
+
+// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k2d(
+// AMDGCNSPIRV-SAME: double noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k2d(
+// AMDGPU-SAME: double noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__global__ void k2(double) { }
+
+// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k311Transparent(
+// AMDGCNSPIRV-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k311Transparent(
+// AMDGPU-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__global__ void k3(Transparent) { }
+
+// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k413SingleElement(
+// AMDGCNSPIRV-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k413SingleElement(
+// AMDGPU-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__global__ void k4(SingleElement) { }
+
+// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k55ByRef(
+// AMDGCNSPIRV-SAME: ptr addrspace(2) noundef readnone byref([[STRUCT_BYREF:%.*]]) align 4 captures(none) [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k55ByRef(
+// AMDGPU-SAME: ptr addrspace(4) noundef readnone byref([[STRUCT_BYREF:%.*]]) align 4 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__global__ void k5(ByRef) { }
+
+// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k6Dv1_jDv2_jDv3_jDv4_j(
+// AMDGCNSPIRV-SAME: <1 x i32> noundef [[TMP0:%.*]], <2 x i32> noundef [[TMP1:%.*]], <3 x i32> noundef [[TMP2:%.*]], <4 x i32> noundef [[TMP3:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k6Dv1_jDv2_jDv3_jDv4_j(
+// AMDGPU-SAME: <1 x i32> noundef [[TMP0:%.*]], <2 x i32> noundef [[TMP1:%.*]], <3 x i32> noundef [[TMP2:%.*]], <4 x i32> noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__global__ void k6(V1, V2, V3, V4) { }
+
+// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k7Pj(
+// AMDGCNSPIRV-SAME: ptr addrspace(1) noundef readnone captures(none) [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k7Pj(
+// AMDGPU-SAME: ptr addrspace(1) noundef readnone captures(none) [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__global__ void k7(unsigned*) { }
+
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f0s(
+// AMDGCNSPIRV-SAME: i16 noundef signext [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local void @_Z2f0s(
+// AMDGPU-SAME: i16 noundef signext [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__device__ void f0(short) { }
+
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f1j(
+// AMDGCNSPIRV-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local void @_Z2f1j(
+// AMDGPU-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__device__ void f1(unsigned) { }
+
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f2d(
+// AMDGCNSPIRV-SAME: double noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local void @_Z2f2d(
+// AMDGPU-SAME: double noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__device__ void f2(double) { }
+
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f311Transparent(
+// AMDGCNSPIRV-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local void @_Z2f311Transparent(
+// AMDGPU-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__device__ void f3(Transparent) { }
+
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f413SingleElement(
+// AMDGCNSPIRV-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local void @_Z2f413SingleElement(
+// AMDGPU-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__device__ void f4(SingleElement) { }
+
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f55ByRef(
+// AMDGCNSPIRV-SAME: ptr noundef readnone byref([[STRUCT_BYREF:%.*]]) align 4 captures(none) [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local void @_Z2f55ByRef(
+// AMDGPU-SAME: ptr addrspace(5) noundef readnone byref([[STRUCT_BYREF:%.*]]) align 4 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__device__ void f5(ByRef) { }
+
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f6Dv1_jDv2_jDv3_jDv4_j(
+// AMDGCNSPIRV-SAME: <1 x i32> noundef [[TMP0:%.*]], <2 x i32> noundef [[TMP1:%.*]], <3 x i32> noundef [[TMP2:%.*]], <4 x i32> noundef [[TMP3:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local void @_Z2f6Dv1_jDv2_jDv3_jDv4_j(
+// AMDGPU-SAME: <1 x i32> noundef [[TMP0:%.*]], <2 x i32> noundef [[TMP1:%.*]], <3 x i32> noundef [[TMP2:%.*]], <4 x i32> noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret void
+//
+__device__ void f6(V1, V2, V3, V4) { }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef signext i16 @_Z2f7v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret i16 0
+//
+// AMDGPU-LABEL: define dso_local noundef signext i16 @_Z2f7v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret i16 0
+//
+__device__ short f7() { return 0; }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @_Z2f8v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret i32 0
+//
+// AMDGPU-LABEL: define dso_local noundef i32 @_Z2f8v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret i32 0
+//
+__device__ unsigned f8() { return 0; }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef double @_Z2f9v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret double 0.000000e+00
+//
+// AMDGPU-LABEL: define dso_local noundef double @_Z2f9v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret double 0.000000e+00
+//
+__device__ double f9() { return 0.; }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @_Z3f10v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret i32 0
+//
+// AMDGPU-LABEL: define dso_local noundef i32 @_Z3f10v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret i32 0
+//
+__device__ Transparent f10() { return {}; }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @_Z3f11v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret i32 0
+//
+// AMDGPU-LABEL: define dso_local noundef i32 @_Z3f11v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret i32 0
+//
+__device__ SingleElement f11() { return {}; }
+
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z3f12v(
+// AMDGCNSPIRV-SAME: ptr dead_on_unwind noalias writable writeonly sret([[STRUCT_BYREF:%.*]]) align 4 captures(none) initializes((0, 68)) [[AGG_RESULT:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: tail call addrspace(4) void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(68) [[AGG_RESULT]], i8 0, i64 68, i1 false)
+// AMDGCNSPIRV-NEXT: ret void
+//
+// AMDGPU-LABEL: define dso_local void @_Z3f12v(
+// AMDGPU-SAME: ptr addrspace(5) dead_on_unwind noalias writable writeonly sret([[STRUCT_BYREF:%.*]]) align 4 captures(none) initializes((0, 68)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef align 4 dereferenceable(68) [[AGG_RESULT]], i8 0, i64 68, i1 false)
+// AMDGPU-NEXT: ret void
+//
+__device__ ByRef f12() { return {}; }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef <1 x i32> @_Z3f13v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret <1 x i32> zeroinitializer
+//
+// AMDGPU-LABEL: define dso_local noundef <1 x i32> @_Z3f13v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret <1 x i32> zeroinitializer
+//
+__device__ V1 f13() { return {}; }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef <2 x i32> @_Z3f14v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret <2 x i32> zeroinitializer
+//
+// AMDGPU-LABEL: define dso_local noundef <2 x i32> @_Z3f14v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret <2 x i32> zeroinitializer
+//
+__device__ V2 f14() { return {}; }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef <3 x i32> @_Z3f15v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret <3 x i32> zeroinitializer
+//
+// AMDGPU-LABEL: define dso_local noundef <3 x i32> @_Z3f15v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret <3 x i32> zeroinitializer
+//
+__device__ V3 f15() { return {}; }
+
+// AMDGCNSPIRV-LABEL: define spir_func noundef <4 x i32> @_Z3f16v(
+// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] {
+// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]]
+// AMDGCNSPIRV-NEXT: ret <4 x i32> zeroinitializer
+//
+// AMDGPU-LABEL: define dso_local noundef <4 x i32> @_Z3f16v(
+// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] {
+// AMDGPU-NEXT: [[ENTRY:.*:]]
+// AMDGPU-NEXT: ret <4 x i32> zeroinitializer
+//
+__device__ V4 f16() { return {}; }
+//.
+// AMDGCNSPIRV: [[META9]] = !{i32 1024, i32 1, i32 1}
+//.
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index dd57b74d79a5e..c4adba3a137c0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -374,7 +374,10 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
buildOpDecorate(VRegs[i][0], MIRBuilder,
SPIRV::Decoration::FuncParamAttr, {Attr});
}
- if (Arg.hasAttribute(Attribute::ByVal)) {
+ if (Arg.hasAttribute(Attribute::ByVal) ||
+ (Arg.hasAttribute(Attribute::ByRef) &&
+ F.getParent()->getTargetTriple().getVendor() ==
+ Triple::VendorType::AMD)) {
auto Attr =
static_cast<unsigned>(SPIRV::FunctionParameterAttribute::ByVal);
buildOpDecorate(VRegs[i][0], MIRBuilder,
diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref-amdgcnspirv.ll b/llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref-amdgcnspirv.ll
new file mode 100644
index 0000000000000..1712ddbb9bda5
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref-amdgcnspirv.ll
@@ -0,0 +1,24 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck --check-prefixes=CHECK,SPIRV %s
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-amd-amdhsa %s -o - | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-amd-amdhsa %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpName %[[#XKER:]] "x"
+; CHECK-DAG: OpName %[[#XFN:]] "x"
+; SPIRV-NOT: OpDecorate %[[#XKER]] FuncParamAttr ByVal
+; AMDGCNSPIRV: OpDecorate %[[#XKER]] FuncParamAttr ByVal
+; SPIRV-NOT: OpDecorate %[[#XFN]] FuncParamAttr ByVal
+; AMDGCNSPIRV: OpDecorate %[[#XFN]] FuncParamAttr ByVal
+
+%struct.S = type { i32 }
+%struct.SS = type { [7 x %struct.S] }
+
+define spir_kernel void @ker(ptr addrspace(2) noundef byref(%struct.SS) %x) {
+entry:
+ ret void
+}
+
+define spir_func void @fn(ptr noundef byref(%struct.SS) %x) {
+entry:
+ ret void
+}
\ No newline at end of file
>From 5b256a5ada23dfbf7e9b17d4ccacc81b334e5de5 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 28 Nov 2025 03:05:35 +0000
Subject: [PATCH 2/2] Fix formatting.
---
clang/lib/CodeGen/Targets/SPIR.cpp | 12 +++++++-----
llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 2 +-
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 3540093074bfe..2134e158a663d 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -67,12 +67,14 @@ class AMDGCNSPIRVABIInfo : public SPIRVABIInfo {
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
+
public:
AMDGCNSPIRVABIInfo(CodeGenTypes &CGT) : SPIRVABIInfo(CGT) {}
void computeInfo(CGFunctionInfo &FI) const override;
- llvm::FixedVectorType *getOptimalVectorMemoryType(
- llvm::FixedVectorType *Ty, const LangOptions &LangOpt) const override;
+ llvm::FixedVectorType *
+ getOptimalVectorMemoryType(llvm::FixedVectorType *Ty,
+ const LangOptions &LangOpt) const override;
};
} // end anonymous namespace
namespace {
@@ -251,9 +253,9 @@ unsigned AMDGCNSPIRVABIInfo::numRegsForType(QualType Ty) const {
return (getContext().getTypeSize(Ty) + 31) / 32;
}
-llvm::Type *
-AMDGCNSPIRVABIInfo::coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
- unsigned ToAS) const {
+llvm::Type *AMDGCNSPIRVABIInfo::coerceKernelArgumentType(llvm::Type *Ty,
+ unsigned FromAS,
+ unsigned ToAS) const {
// Single value types.
auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
if (PtrTy && PtrTy->getAddressSpace() == FromAS)
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index c4adba3a137c0..bcaa3c4c66d76 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -377,7 +377,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (Arg.hasAttribute(Attribute::ByVal) ||
(Arg.hasAttribute(Attribute::ByRef) &&
F.getParent()->getTargetTriple().getVendor() ==
- Triple::VendorType::AMD)) {
+ Triple::VendorType::AMD)) {
auto Attr =
static_cast<unsigned>(SPIRV::FunctionParameterAttribute::ByVal);
buildOpDecorate(VRegs[i][0], MIRBuilder,
More information about the llvm-commits
mailing list