[PATCH] D89582: clang/AMDGPU: Apply workgroup related attributes to all functions
Matt Arsenault via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 16 11:51:45 PDT 2020
arsenm created this revision.
arsenm added reviewers: yaxunl, rampitec.
Herald added subscribers: kerbowa, t-tye, tpr, dstuttard, nhaehnle, jvesely, kzhuravl.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
When the default flat work group size is 256, it should also apply to
callable functions.
https://reviews.llvm.org/D89582
Files:
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
clang/test/CodeGenOpenCL/amdgpu-attrs.cl
Index: clang/test/CodeGenOpenCL/amdgpu-attrs.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ clang/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -190,5 +190,5 @@
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="56" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}}
+// CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} "amdgpu-flat-work-group-size"="1,256"
// CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="56"
Index: clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -16,6 +16,10 @@
// CHECK: define amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
}
+__device__ void func_flat_work_group_size_default() {
+// CHECK: define void @_Z33func_flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC:#[0-9]+]]
+}
+
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
__global__ void flat_work_group_size_32_64() {
// CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
@@ -40,7 +44,11 @@
// NAMD-NOT: "amdgpu-num-sgpr"
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
+// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
+
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT_FUNC]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -9031,7 +9031,7 @@
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
- if (IsHIPKernel)
+ if (M.getLangOpts().HIP)
F->addFnAttr("uniform-work-group-size", "true");
@@ -9057,7 +9057,7 @@
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
- } else if (IsOpenCLKernel || IsHIPKernel) {
+ } else {
// By default, restrict the maximum size to a value specified by
// --gpu-max-threads-per-block=n or its default value.
std::string AttrVal =
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D89582.298704.patch
Type: text/x-patch
Size: 3388 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20201016/325c4c38/attachment.bin>
More information about the cfe-commits
mailing list