[clang] d840396 - clang: Simplify emission of uniform-work-group-size attribute (#185066)

Mon Mar 9 03:17:35 PDT 2026

Author: Matt Arsenault
Date: 2026-03-09T11:17:30+01:00
New Revision: d840396e20b09244ac3defcc1f2946102e29f030

URL: https://github.com/llvm/llvm-project/commit/d840396e20b09244ac3defcc1f2946102e29f030
DIFF: https://github.com/llvm/llvm-project/commit/d840396e20b09244ac3defcc1f2946102e29f030.diff

LOG: clang: Simplify emission of uniform-work-group-size attribute (#185066)

Added: 
    

Modified: 
    clang/include/clang/Options/Options.td
    clang/lib/CodeGen/CGCall.cpp
    clang/test/CodeGenCUDA/convergent.cu
    clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu
    clang/test/CodeGenHIP/default-attributes.hip
    clang/test/CodeGenHIP/hip_weak_alias.cpp
    clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
    clang/test/CodeGenOpenCL/convergent.cl

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index cc05fb71c84e4..45902aee92f72 100644

--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -1035,7 +1035,7 @@ def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput]>,
   Group<Link_Group>;
 
 defm offload_uniform_block : BoolFOption<"offload-uniform-block",
-  LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA">,
+  LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA || (LangOpts->OpenCL && LangOpts->OpenCLVersion <= 120)">,
   PosFlag<SetTrue, [], [ClangOption, CC1Option], "Assume">,
   NegFlag<SetFalse, [], [ClangOption, CC1Option], "Don't assume">,
   BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>;

diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 04b27925bab8e..6dfd0f915190a 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2611,28 +2611,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
                                  NumElemsParam);
     }
 
-    if (DeviceKernelAttr::isOpenCLSpelling(
-            TargetDecl->getAttr<DeviceKernelAttr>()) &&
-        CallingConv != CallingConv::CC_C &&
-        CallingConv != CallingConv::CC_SpirFunction) {
-      // Check CallingConv to avoid adding uniform-work-group-size attribute to
-      // OpenCL Kernel Stub
-      if (getLangOpts().OpenCLVersion <= 120) {
-        // OpenCL v1.2 Work groups are always uniform
-        FuncAttrs.addAttribute("uniform-work-group-size");
-      } else {
-        // OpenCL v2.0 Work groups may be whether uniform or not.
-        // '-cl-uniform-work-group-size' compile option gets a hint
-        // to the compiler that the global work-size be a multiple of
-        // the work-group size specified to clEnqueueNDRangeKernel
-        // (i.e. work groups are uniform).
-        if (getLangOpts().OffloadUniformBlock)
-          FuncAttrs.addAttribute("uniform-work-group-size");
-      }
-    }
-
-    if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
-        getLangOpts().OffloadUniformBlock)
+    // OpenCL v2.0 Work groups may be whether uniform or not.
+    // '-cl-uniform-work-group-size' compile option gets a hint
+    // to the compiler that the global work-size be a multiple of
+    // the work-group size specified to clEnqueueNDRangeKernel
+    // (i.e. work groups are uniform).
+    if (getLangOpts().OffloadUniformBlock)
       FuncAttrs.addAttribute("uniform-work-group-size");
 
     if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())

diff  --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu
index bb034ee4ff442..97a24dc20d841 100644
--- a/clang/test/CodeGenCUDA/convergent.cu
+++ b/clang/test/CodeGenCUDA/convergent.cu
@@ -36,27 +36,27 @@ __host__ __device__ [[clang::noconvergent]] float aliasf1(int) asm("somethingels
 // DEVICE-NEXT:    call void @_Z3bazv() #[[ATTR4:[0-9]+]]
 // DEVICE-NEXT:    [[TMP0:%.*]] = call i32 asm "trap", "=l"() #[[ATTR5:[0-9]+]], !srcloc [[META2:![0-9]+]]
 // DEVICE-NEXT:    store i32 [[TMP0]], ptr [[X]], align 4
-// DEVICE-NEXT:    call void asm sideeffect "trap", ""() #[[ATTR4]], !srcloc [[META3:![0-9]+]]
-// DEVICE-NEXT:    call void asm sideeffect "nop", ""() #[[ATTR6:[0-9]+]], !srcloc [[META4:![0-9]+]]
+// DEVICE-NEXT:    call void asm sideeffect "trap", ""() #[[ATTR6:[0-9]+]], !srcloc [[META3:![0-9]+]]
+// DEVICE-NEXT:    call void asm sideeffect "nop", ""() #[[ATTR7:[0-9]+]], !srcloc [[META4:![0-9]+]]
 // DEVICE-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
 // DEVICE-NEXT:    [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR4]]
 // DEVICE-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// DEVICE-NEXT:    [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR6]]
+// DEVICE-NEXT:    [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR8:[0-9]+]]
 // DEVICE-NEXT:    ret void
 //
 // HOST-LABEL: define dso_local void @_Z3barv(
 // HOST-SAME: ) #[[ATTR0:[0-9]+]] {
 // HOST-NEXT:  [[ENTRY:.*:]]
 // HOST-NEXT:    [[X:%.*]] = alloca i32, align 4
-// HOST-NEXT:    call void @_Z3bazv()
-// HOST-NEXT:    [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc [[META1:![0-9]+]]
+// HOST-NEXT:    call void @_Z3bazv() #[[ATTR2:[0-9]+]]
+// HOST-NEXT:    [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META1:![0-9]+]]
 // HOST-NEXT:    store i32 [[TMP0]], ptr [[X]], align 4
-// HOST-NEXT:    call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META2:![0-9]+]]
-// HOST-NEXT:    call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META3:![0-9]+]]
+// HOST-NEXT:    call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// HOST-NEXT:    call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4]], !srcloc [[META3:![0-9]+]]
 // HOST-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
-// HOST-NEXT:    [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]])
+// HOST-NEXT:    [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR2]]
 // HOST-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
-// HOST-NEXT:    [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]])
+// HOST-NEXT:    [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR2]]
 // HOST-NEXT:    ret void
 //
 __host__ __device__ void bar() {
@@ -71,27 +71,30 @@ __host__ __device__ void bar() {
 
 
 //.
-// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// DEVICE: attributes #[[ATTR4]] = { convergent nounwind }
+// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// DEVICE: attributes #[[ATTR4]] = { convergent nounwind "uniform-work-group-size" }
 // DEVICE: attributes #[[ATTR5]] = { convergent nounwind memory(none) }
-// DEVICE: attributes #[[ATTR6]] = { nounwind }
+// DEVICE: attributes #[[ATTR6]] = { convergent nounwind }
+// DEVICE: attributes #[[ATTR7]] = { nounwind }
+// DEVICE: attributes #[[ATTR8]] = { nounwind "uniform-work-group-size" }
 //.
-// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// HOST: attributes #[[ATTR2]] = { nounwind memory(none) }
-// HOST: attributes #[[ATTR3]] = { nounwind }
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" }
+// HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" }
+// HOST: attributes #[[ATTR2]] = { "uniform-work-group-size" }
+// HOST: attributes #[[ATTR3]] = { nounwind memory(none) }
+// HOST: attributes #[[ATTR4]] = { nounwind }
 //.
 // DEVICE: [[META0:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
 // DEVICE: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-// DEVICE: [[META2]] = !{i64 3120}
-// DEVICE: [[META3]] = !{i64 3155}
-// DEVICE: [[META4]] = !{i64 3206}
+// DEVICE: [[META2]] = !{i64 3174}
+// DEVICE: [[META3]] = !{i64 3209}
+// DEVICE: [[META4]] = !{i64 3260}
 //.
 // HOST: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-// HOST: [[META1]] = !{i64 3120}
-// HOST: [[META2]] = !{i64 3155}
-// HOST: [[META3]] = !{i64 3206}
+// HOST: [[META1]] = !{i64 3174}
+// HOST: [[META2]] = !{i64 3209}
+// HOST: [[META3]] = !{i64 3260}
 //.

diff  --git a/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu b/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu
index bd1da1f05c1eb..f4641fd242b4d 100644
--- a/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu
+++ b/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu
@@ -16,7 +16,7 @@ int main(int argc, char ** argv) {
 // CHECK:   store ptr @_Z4kern7TempValIjE, ptr %fptr, align 8
   return 0;
 }
-// CHECK:  define dso_local void @_Z19__device_stub__kern7TempValIjE(i32 %in_val.coerce) #1 {
+// CHECK:  define dso_local void @_Z19__device_stub__kern7TempValIjE(i32 %in_val.coerce) #{{[0-9]+}} {
 // CHECK:  %2 = call i32 @hipLaunchByPtr(ptr @_Z4kern7TempValIjE)
 
 // CHECK:  define internal void @__hip_register_globals(ptr %0) {

diff  --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index faccebbdad7e7..d5b5aa0c84ace 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -33,10 +33,10 @@ __global__ void kernel() {
  extern_func();
 }
 //.
-// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
+// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
 // OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
-// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
+// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind "uniform-work-group-size" }
 //.
 // OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
 // OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}

diff  --git a/clang/test/CodeGenHIP/hip_weak_alias.cpp b/clang/test/CodeGenHIP/hip_weak_alias.cpp
index 480a278dc55d3..33ff74a1cf995 100644
--- a/clang/test/CodeGenHIP/hip_weak_alias.cpp
+++ b/clang/test/CodeGenHIP/hip_weak_alias.cpp
@@ -119,9 +119,9 @@ __host__ __device__ float __Four(float f) { return 2.0f * f; }
 __host__ __device__ int Four(void) __attribute__((weak, alias("_Z6__Fourv")));
 __host__ __device__ float Four(float f) __attribute__((weak, alias("_Z6__Fourf")));
 //.
-// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "uniform-work-group-size" }
 //.
-// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
 //.
 // HOST: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
 //.

diff  --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
index af6cc30fcd329..42630375aeb8f 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl
@@ -198,7 +198,7 @@ kernel void device_side_enqueue(global float *a, global float *b, int i) {
 //.
 // SPIR32: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
 // SPIR32: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-// SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// SPIR32: attributes #[[ATTR2]] = { convergent noinline nounwind optnone denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size" }
 // SPIR32: attributes #[[ATTR3:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
 // SPIR32: attributes #[[ATTR4]] = { convergent nounwind denormal_fpenv(float: preservesign) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // SPIR32: attributes #[[ATTR5]] = { convergent nounwind "uniform-work-group-size" }

diff  --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl
index 99d9ee74e669b..cefa4d7f8ebe9 100644
--- a/clang/test/CodeGenOpenCL/convergent.cl
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -127,18 +127,18 @@ void test_not_unroll() {
 // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
 
 // CHECK-LABEL: @assume_convergent_asm
-// CHECK: tail call void asm sideeffect "s_barrier", ""() #6
+// CHECK: tail call void asm sideeffect "s_barrier", ""() #8
 kernel void assume_convergent_asm()
 {
   __asm__ volatile("s_barrier");
 }
 
 // CHECK: attributes #0 = { nofree noinline norecurse nounwind memory(readwrite, target_mem0: none, target_mem1: none) "
-// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
-// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #5 = { {{[^}]*}}alwaysinline convergent{{[^}]*}} }
-// CHECK: attributes #6 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #7 = { {{[^}]*}}nounwind{{[^}]*}} }
-// CHECK: attributes #8 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} }
+// CHECK: attributes #1 = { convergent norecurse nounwind{{[^}]*}} }
+// CHECK: attributes #2 = { convergent nounwind{{[^}]*}} }
+// CHECK: attributes #3 = { convergent noduplicate nounwind{{[^}]*}} }
+// CHECK: attributes #4 = { alwaysinline convergent norecurse nounwind{{[^}]*}} }
+// CHECK: attributes #5 = { convergent nounwind "uniform-work-group-size" }
+// CHECK: attributes #6 = { nounwind "uniform-work-group-size" }
+// CHECK: attributes #7 = { convergent noduplicate nounwind "uniform-work-group-size" }
+// CHECK: attributes #8 = { convergent nounwind }