[clang] [clang][CodeGen][SPIRV] Translate `amdgpu_flat_work_group_size` into `reqd_work_group_size`. (PR #116820)
Alex Voicu via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 19 07:33:31 PST 2024
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/116820
>From c5efdd24c0c889e26e3b00865780970ca5ed1f4c Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 19 Nov 2024 14:55:25 +0000
Subject: [PATCH 1/2] Translate `amdgpu_flat_work_group_size` into
`reqd_work_group_size`.
---
clang/lib/CodeGen/Targets/SPIR.cpp | 34 +++++++++++++++++++
clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu | 7 ++++
2 files changed, 41 insertions(+)
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index a48fe9d5f1ee9c..c35d91b1f49af2 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -64,6 +64,8 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &M) const override;
llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
SyncScope Scope,
llvm::AtomicOrdering Ordering,
@@ -245,6 +247,38 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
return DefaultGlobalAS;
}
+void SPIRVTargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (!M.getLangOpts().HIP ||
+ M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+ return;
+ if (GV->isDeclaration())
+ return;
+
+ auto F = dyn_cast<llvm::Function>(GV);
+ if (!F)
+ return;
+
+ auto FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (!FD)
+ return;
+ if (!FD->hasAttr<CUDAGlobalAttr>())
+ return;
+
+ unsigned N = M.getLangOpts().GPUMaxThreadsPerBlock;
+ if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>())
+ N = FlatWGS->getMax()->EvaluateKnownConstInt(M.getContext()).getExtValue();
+
+ auto Int32Ty = llvm::IntegerType::getInt32Ty(M.getLLVMContext());
+ llvm::Metadata *AttrMDArgs[] = {
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, N)),
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1)),
+ llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1))};
+
+ F->setMetadata("reqd_work_group_size",
+ llvm::MDNode::get(M.getLLVMContext(), AttrMDArgs));
+}
+
llvm::SyncScope::ID
SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
llvm::AtomicOrdering,
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
index 11a133fd1351d2..3d01ac40259254 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -4,6 +4,9 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=CHECK,MAX1024 %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa --gpu-max-threads-per-block=1024 \
+// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN: | FileCheck -check-prefixes=CHECK-SPIRV,MAX1024-SPIRV %s
// RUN: %clang_cc1 -triple nvptx \
// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
// RUN: -check-prefix=NAMD
@@ -21,12 +24,14 @@
__global__ void flat_work_group_size_default() {
// CHECK: define{{.*}} amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z28flat_work_group_size_defaultv(){{.*}} !reqd_work_group_size [[REQD_WORK_GROUP_SIZE_DEFAULT:![0-9]+]]
// NOUB: define{{.*}} void @_Z28flat_work_group_size_defaultv() [[NOUB:#[0-9]+]]
}
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
__global__ void flat_work_group_size_32_64() {
// CHECK: define{{.*}} amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z26flat_work_group_size_32_64v(){{.*}} !reqd_work_group_size [[REQD_WORK_GROUP_SIZE_64:![0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
__global__ void waves_per_eu_2() {
@@ -82,7 +87,9 @@ template __global__ void template_32_4_a_max_num_work_groups<2>();
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+// MAX1024-SPIRV-DAG: [[REQD_WORK_GROUP_SIZE_DEFAULT]] = !{i32 1024, i32 1, i32 1}
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
+// CHECK-SPIRV-DAG: [[REQD_WORK_GROUP_SIZE_64]] = !{i32 64, i32 1, i32 1}
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"
>From 4349defab2d4c15e1f123aa322e386eead8d6590 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 19 Nov 2024 15:33:20 +0000
Subject: [PATCH 2/2] Adapt test.
---
.../amdgpu-kernel-arg-pointer-type.cu | 36 ++++++++++---------
1 file changed, 20 insertions(+), 16 deletions(-)
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index b295bbbdaaf955..f34fee73fceace 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -33,7 +33,7 @@
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
-// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
+// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META5:![0-9]+]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -58,7 +58,7 @@
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META5:![0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -102,7 +102,7 @@ __global__ void kernel1(int *x) {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
-// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -126,7 +126,7 @@ __global__ void kernel1(int *x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -171,7 +171,7 @@ __global__ void kernel2(int &x) {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
-// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8
// CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8
@@ -195,7 +195,7 @@ __global__ void kernel2(int &x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
-// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
+// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
// OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
@@ -302,7 +302,7 @@ struct S {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
-// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8
// CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
@@ -343,7 +343,7 @@ struct S {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
-// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
@@ -406,7 +406,7 @@ __global__ void kernel4(struct S s) {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
-// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -446,7 +446,7 @@ __global__ void kernel4(struct S s) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -511,7 +511,7 @@ struct T {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
-// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8
// CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4)
@@ -551,7 +551,7 @@ struct T {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
-// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
@@ -606,7 +606,7 @@ __global__ void kernel6(struct T t) {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
-// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -631,7 +631,7 @@ __global__ void kernel6(struct T t) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
-// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -677,7 +677,7 @@ struct SS {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
-// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8
// CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
@@ -700,7 +700,7 @@ struct SS {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
-// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
@@ -727,5 +727,9 @@ __global__ void kernel8(struct SS a) {
*a.x += 3.f;
}
//.
+// CHECK-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1}
+//.
// OPT: [[META4]] = !{}
//.
+// OPT-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1}
+//.
More information about the cfe-commits
mailing list