[clang] 62dbb7e - Revert "[HIP] Change default --gpu-max-threads-per-block value to 1024"
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 2 13:13:06 PDT 2020
Author: Yaxun (Sam) Liu
Date: 2020-09-02T16:12:28-04:00
New Revision: 62dbb7e54c65386f3cd73ef761a22b73532158f0
URL: https://github.com/llvm/llvm-project/commit/62dbb7e54c65386f3cd73ef761a22b73532158f0
DIFF: https://github.com/llvm/llvm-project/commit/62dbb7e54c65386f3cd73ef761a22b73532158f0.diff
LOG: Revert "[HIP] Change default --gpu-max-threads-per-block value to 1024"
Temporarily revert commit 04abbb3a78186aa92809866b43217c32cba90b71
due to regressions in some HIP apps due backend issues revealed by
this change.
Will re-commit it when backend issues are fixed.
Added:
Modified:
clang/include/clang/Basic/LangOptions.def
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
clang/test/CodeGenCUDA/kernel-amdgcn.cu
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 3132e7635418..9846809763f8 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -240,7 +240,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr function
LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP")
-LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP")
+LANGOPT(GPUMaxThreadsPerBlock, 32, 256, "default max threads per block for kernel launch bounds for HIP")
LANGOPT(SYCL , 1, 0, "SYCL")
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d6efd5422087..e1ab61f10585 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -9066,13 +9066,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
assert(Max == 0 && "Max must be zero");
} else if (IsOpenCLKernel || IsHIPKernel) {
// By default, restrict the maximum size to a value specified by
- // --gpu-max-threads-per-block=n or its default value for HIP.
- const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
- const unsigned DefaultMaxWorkGroupSize =
- IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
- : M.getLangOpts().GPUMaxThreadsPerBlock;
+ // --gpu-max-threads-per-block=n or its default value.
std::string AttrVal =
- std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
+ std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock);
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
}
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
index 7a9fd2527272..5415bddffc89 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -39,7 +39,7 @@ __global__ void num_vgpr_64() {
// NAMD-NOT: "amdgpu-num-vgpr"
// NAMD-NOT: "amdgpu-num-sgpr"
-// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
+// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}}"uniform-work-group-size"="true"
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
index 6066469f7647..135d3030480c 100644
--- a/clang/test/CodeGenCUDA/kernel-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -39,4 +39,4 @@ int main() {
launch((void*)D.Empty());
return 0;
}
-// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"
More information about the cfe-commits
mailing list