[clang] [AMDGPU] fix amdgpu_max_num_work_groups in templates (PR #141633)
via cfe-commits
cfe-commits at lists.llvm.org
Tue May 27 10:06:30 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Yaxun (Sam) Liu (yxsamliu)
<details>
<summary>Changes</summary>
Clang does not instantiate amdgpu_max_num_work_groups attribute with one template argument, causing
assertion codegen.
Fixes: https://github.com/llvm/llvm-project/issues/139570
---
Full diff: https://github.com/llvm/llvm-project/pull/141633.diff
2 Files Affected:
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+22-13)
- (modified) clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu (+6)
``````````diff
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 44700a446dfac..174c8fc59e4fa 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -648,21 +648,30 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr(
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
- ExprResult ResultX = S.SubstExpr(Attr.getMaxNumWorkGroupsX(), TemplateArgs);
- if (!ResultX.isUsable())
- return;
- ExprResult ResultY = S.SubstExpr(Attr.getMaxNumWorkGroupsY(), TemplateArgs);
- if (!ResultY.isUsable())
- return;
- ExprResult ResultZ = S.SubstExpr(Attr.getMaxNumWorkGroupsZ(), TemplateArgs);
- if (!ResultZ.isUsable())
- return;
+ Expr *XExpr = nullptr;
+ Expr *YExpr = nullptr;
+ Expr *ZExpr = nullptr;
+
+ if (Attr.getMaxNumWorkGroupsX()) {
+ ExprResult ResultX = S.SubstExpr(Attr.getMaxNumWorkGroupsX(), TemplateArgs);
+ if (ResultX.isUsable())
+ XExpr = ResultX.getAs<Expr>();
+ }
+
+ if (Attr.getMaxNumWorkGroupsY()) {
+ ExprResult ResultY = S.SubstExpr(Attr.getMaxNumWorkGroupsY(), TemplateArgs);
+ if (ResultY.isUsable())
+ YExpr = ResultY.getAs<Expr>();
+ }
- Expr *XExpr = ResultX.getAs<Expr>();
- Expr *YExpr = ResultY.getAs<Expr>();
- Expr *ZExpr = ResultZ.getAs<Expr>();
+ if (Attr.getMaxNumWorkGroupsZ()) {
+ ExprResult ResultZ = S.SubstExpr(Attr.getMaxNumWorkGroupsZ(), TemplateArgs);
+ if (ResultZ.isUsable())
+ ZExpr = ResultZ.getAs<Expr>();
+ }
- S.AMDGPU().addAMDGPUMaxNumWorkGroupsAttr(New, Attr, XExpr, YExpr, ZExpr);
+ if (XExpr)
+ S.AMDGPU().addAMDGPUMaxNumWorkGroupsAttr(New, Attr, XExpr, YExpr, ZExpr);
}
// This doesn't take any template parameters, but we have a custom action that
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
index 253ac0898f546..ced0059e69d9b 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@@ -78,6 +78,12 @@ __global__ void template_32_4_a_max_num_work_groups() {}
template __global__ void template_32_4_a_max_num_work_groups<2>();
// CHECK: define{{.*}} amdgpu_kernel void @_Z35template_32_4_a_max_num_work_groupsILj2EEvv() [[MAX_NUM_WORK_GROUPS_32_4_2:#[0-9]+]]
+template<unsigned a>
+__attribute__((amdgpu_max_num_work_groups(a)))
+__global__ void template_a_max_num_work_groups() {}
+template __global__ void template_a_max_num_work_groups<32>();
+// CHECK: define{{.*}} amdgpu_kernel void @_Z30template_a_max_num_work_groupsILj32EEvv() [[MAX_NUM_WORK_GROUPS_32_1_1]]
+
// Make sure this is silently accepted on other targets.
// NAMD-NOT: "amdgpu-flat-work-group-size"
// NAMD-NOT: "amdgpu-waves-per-eu"
``````````
</details>
https://github.com/llvm/llvm-project/pull/141633
More information about the cfe-commits
mailing list