[clang] d840396 - clang: Simplify emission of uniform-work-group-size attribute (#185066)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 9 03:17:35 PDT 2026
Author: Matt Arsenault
Date: 2026-03-09T11:17:30+01:00
New Revision: d840396e20b09244ac3defcc1f2946102e29f030
URL: https://github.com/llvm/llvm-project/commit/d840396e20b09244ac3defcc1f2946102e29f030
DIFF: https://github.com/llvm/llvm-project/commit/d840396e20b09244ac3defcc1f2946102e29f030.diff
LOG: clang: Simplify emission of uniform-work-group-size attribute (#185066)
Added:
Modified:
clang/include/clang/Options/Options.td
clang/lib/CodeGen/CGCall.cpp
clang/test/CodeGenCUDA/convergent.cu
clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu
clang/test/CodeGenHIP/default-attributes.hip
clang/test/CodeGenHIP/hip_weak_alias.cpp
clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
clang/test/CodeGenOpenCL/convergent.cl
Removed:
################################################################################
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index cc05fb71c84e4..45902aee92f72 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -1035,7 +1035,7 @@ def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput]>,
Group<Link_Group>;
defm offload_uniform_block : BoolFOption<"offload-uniform-block",
- LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA">,
+ LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA || (LangOpts->OpenCL && LangOpts->OpenCLVersion <= 120)">,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Assume">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Don't assume">,
BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 04b27925bab8e..6dfd0f915190a 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2611,28 +2611,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
NumElemsParam);
}
- if (DeviceKernelAttr::isOpenCLSpelling(
- TargetDecl->getAttr<DeviceKernelAttr>()) &&
- CallingConv != CallingConv::CC_C &&
- CallingConv != CallingConv::CC_SpirFunction) {
- // Check CallingConv to avoid adding uniform-work-group-size attribute to
- // OpenCL Kernel Stub
- if (getLangOpts().OpenCLVersion <= 120) {
- // OpenCL v1.2 Work groups are always uniform
- FuncAttrs.addAttribute("uniform-work-group-size");
- } else {
- // OpenCL v2.0 Work groups may be whether uniform or not.
- // '-cl-uniform-work-group-size' compile option gets a hint
- // to the compiler that the global work-size be a multiple of
- // the work-group size specified to clEnqueueNDRangeKernel
- // (i.e. work groups are uniform).
- if (getLangOpts().OffloadUniformBlock)
- FuncAttrs.addAttribute("uniform-work-group-size");
- }
- }
-
- if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
- getLangOpts().OffloadUniformBlock)
+ // OpenCL v2.0 Work groups may be whether uniform or not.
+ // '-cl-uniform-work-group-size' compile option gets a hint
+ // to the compiler that the global work-size be a multiple of
+ // the work-group size specified to clEnqueueNDRangeKernel
+ // (i.e. work groups are uniform).
+ if (getLangOpts().OffloadUniformBlock)
FuncAttrs.addAttribute("uniform-work-group-size");
if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
diff --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu
index bb034ee4ff442..97a24dc20d841 100644
--- a/clang/test/CodeGenCUDA/convergent.cu
+++ b/clang/test/CodeGenCUDA/convergent.cu
@@ -36,27 +36,27 @@ __host__ __device__ [[clang::noconvergent]] float aliasf1(int) asm("somethingels
// DEVICE-NEXT: call void @_Z3bazv() #[[ATTR4:[0-9]+]]
// DEVICE-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l"() #[[ATTR5:[0-9]+]], !srcloc [[META2:![0-9]+]]
// DEVICE-NEXT: store i32 [[TMP0]], ptr [[X]], align 4
-// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR4]], !srcloc [[META3:![0-9]+]]
-// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR6:[0-9]+]], !srcloc [[META4:![0-9]+]]
+// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR6:[0-9]+]], !srcloc [[META3:![0-9]+]]
+// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR7:[0-9]+]], !srcloc [[META4:![0-9]+]]
// DEVICE-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
// DEVICE-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR4]]
// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// DEVICE-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR6]]
+// DEVICE-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR8:[0-9]+]]
// DEVICE-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z3barv(
// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
// HOST-NEXT: [[ENTRY:.*:]]
// HOST-NEXT: [[X:%.*]] = alloca i32, align 4
-// HOST-NEXT: call void @_Z3bazv()
-// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc [[META1:![0-9]+]]
+// HOST-NEXT: call void @_Z3bazv() #[[ATTR2:[0-9]+]]
+// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META1:![0-9]+]]
// HOST-NEXT: store i32 [[TMP0]], ptr [[X]], align 4
-// HOST-NEXT: call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META2:![0-9]+]]
-// HOST-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META3:![0-9]+]]
+// HOST-NEXT: call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// HOST-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4]], !srcloc [[META3:![0-9]+]]
// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[X]], align 4
-// HOST-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]])
+// HOST-NEXT: [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR2]]
// HOST-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// HOST-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]])
+// HOST-NEXT: [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR2]]
// HOST-NEXT: ret void
//
__host__ __device__ void bar() {
@@ -71,27 +71,30 @@ __host__ __device__ void bar() {
//.
-// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// DEVICE: attributes #[[ATTR4]] = { convergent nounwind }
+// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// DEVICE: attributes #[[ATTR4]] = { convergent nounwind "uniform-work-group-size" }
// DEVICE: attributes #[[ATTR5]] = { convergent nounwind memory(none) }
-// DEVICE: attributes #[[ATTR6]] = { nounwind }
+// DEVICE: attributes #[[ATTR6]] = { convergent nounwind }
+// DEVICE: attributes #[[ATTR7]] = { nounwind }
+// DEVICE: attributes #[[ATTR8]] = { nounwind "uniform-work-group-size" }
//.
-// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// HOST: attributes #[[ATTR2]] = { nounwind memory(none) }
-// HOST: attributes #[[ATTR3]] = { nounwind }
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" }
+// HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" }
+// HOST: attributes #[[ATTR2]] = { "uniform-work-group-size" }
+// HOST: attributes #[[ATTR3]] = { nounwind memory(none) }
+// HOST: attributes #[[ATTR4]] = { nounwind }
//.
// DEVICE: [[META0:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
// DEVICE: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-// DEVICE: [[META2]] = !{i64 3120}
-// DEVICE: [[META3]] = !{i64 3155}
-// DEVICE: [[META4]] = !{i64 3206}
+// DEVICE: [[META2]] = !{i64 3174}
+// DEVICE: [[META3]] = !{i64 3209}
+// DEVICE: [[META4]] = !{i64 3260}
//.
// HOST: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-// HOST: [[META1]] = !{i64 3120}
-// HOST: [[META2]] = !{i64 3155}
-// HOST: [[META3]] = !{i64 3206}
+// HOST: [[META1]] = !{i64 3174}
+// HOST: [[META2]] = !{i64 3209}
+// HOST: [[META3]] = !{i64 3260}
//.
diff --git a/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu b/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu
index bd1da1f05c1eb..f4641fd242b4d 100644
--- a/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu
+++ b/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu
@@ -16,7 +16,7 @@ int main(int argc, char ** argv) {
// CHECK: store ptr @_Z4kern7TempValIjE, ptr %fptr, align 8
return 0;
}
-// CHECK: define dso_local void @_Z19__device_stub__kern7TempValIjE(i32 %in_val.coerce) #1 {
+// CHECK: define dso_local void @_Z19__device_stub__kern7TempValIjE(i32 %in_val.coerce) #{{[0-9]+}} {
// CHECK: %2 = call i32 @hipLaunchByPtr(ptr @_Z4kern7TempValIjE)
// CHECK: define internal void @__hip_register_globals(ptr %0) {
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index faccebbdad7e7..d5b5aa0c84ace 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -33,10 +33,10 @@ __global__ void kernel() {
extern_func();
}
//.
-// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
-// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
+// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind "uniform-work-group-size" }
//.
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
diff --git a/clang/test/CodeGenHIP/hip_weak_alias.cpp b/clang/test/CodeGenHIP/hip_weak_alias.cpp
index 480a278dc55d3..33ff74a1cf995 100644
--- a/clang/test/CodeGenHIP/hip_weak_alias.cpp
+++ b/clang/test/CodeGenHIP/hip_weak_alias.cpp
@@ -119,9 +119,9 @@ __host__ __device__ float __Four(float f) { return 2.0f * f; }
__host__ __device__ int Four(void) __attribute__((weak, alias("_Z6__Fourv")));
__host__ __device__ float Four(float f) __attribute__((weak, alias("_Z6__Fourf")));
//.
-// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" }
//.
-// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
//.
// HOST: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
index af6cc30fcd329..42630375aeb8f 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
@@ -198,7 +198,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
//.
// SPIR32: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
// SPIR32: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-// SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
// SPIR32: attributes #[[ATTR3:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
// SPIR32: attributes #[[ATTR4]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size" }
diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl
index 99d9ee74e669b..cefa4d7f8ebe9 100644
--- a/clang/test/CodeGenOpenCL/convergent.cl
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -127,18 +127,18 @@ void test_not_unroll() {
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
// CHECK-LABEL: @assume_convergent_asm
-// CHECK: tail call void asm sideeffect "s_barrier", ""() #6
+// CHECK: tail call void asm sideeffect "s_barrier", ""() #8
kernel void assume_convergent_asm()
{
__asm__ volatile("s_barrier");
}
// CHECK: attributes #0 = { nofree noinline norecurse nounwind memory(readwrite, target_mem0: none, target_mem1: none) "
-// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
-// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #5 = { {{[^}]*}}alwaysinline convergent{{[^}]*}} }
-// CHECK: attributes #6 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #7 = { {{[^}]*}}nounwind{{[^}]*}} }
-// CHECK: attributes #8 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} }
+// CHECK: attributes #1 = { convergent norecurse nounwind{{[^}]*}} }
+// CHECK: attributes #2 = { convergent nounwind{{[^}]*}} }
+// CHECK: attributes #3 = { convergent noduplicate nounwind{{[^}]*}} }
+// CHECK: attributes #4 = { alwaysinline convergent norecurse nounwind{{[^}]*}} }
+// CHECK: attributes #5 = { convergent nounwind "uniform-work-group-size" }
+// CHECK: attributes #6 = { nounwind "uniform-work-group-size" }
+// CHECK: attributes #7 = { convergent noduplicate nounwind "uniform-work-group-size" }
+// CHECK: attributes #8 = { convergent nounwind }
More information about the cfe-commits
mailing list