[clang] 00f6a7f - clang/OpenCL: Fix not setting convergent on block invoke kernels

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 30 11:03:27 PST 2023


Author: Matt Arsenault
Date: 2023-01-30T15:03:14-04:00
New Revision: 00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88

URL: https://github.com/llvm/llvm-project/commit/00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88
DIFF: https://github.com/llvm/llvm-project/commit/00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88.diff

LOG: clang/OpenCL: Fix not setting convergent on block invoke kernels

Yet another example how convergent not being the default is dangerous
and backwards.

Added: 
    

Modified: 
    clang/lib/CodeGen/TargetInfo.cpp
    clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
    clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 1b80529e36a72..7e08d42e866ff 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -12451,6 +12451,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
 
   // FIXME: Apply default attributes
   F->addFnAttr(llvm::Attribute::NoUnwind);
+  F->addFnAttr(llvm::Attribute::Convergent);
 
   Builder.CreateRetVoid();
   Builder.restoreIP(IP);
@@ -12504,6 +12505,7 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
 
   // FIXME: Apply default attributes
   F->addFnAttr(llvm::Attribute::NoUnwind);
+  F->addFnAttr(llvm::Attribute::Convergent);
   F->addFnAttr("enqueued-block");
 
   auto IP = CGF.Builder.saveIP();

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 4277dbbc20530..17c5fc6132856 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -182,7 +182,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -216,7 +216,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -255,7 +255,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 {
 // CHECK-NEXT:  entry:
@@ -282,7 +282,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 // CHECK-NEXT:    ret void
 //
 //
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
 // CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
@@ -297,7 +297,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 // CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="false" }
 // CHECK: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 // CHECK: attributes #3 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #4 = { nounwind "enqueued-block" }
+// CHECK: attributes #4 = { convergent nounwind "enqueued-block" }
 // CHECK: attributes #5 = { convergent nounwind }
 //.
 // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 3cfb5f55e5d21..bce1a922668a1 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -297,7 +297,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   };
 
   // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
-  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
+  // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) [[INVOKE_ATTR:#[0-9]+]]
   block_A();
 
   // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -393,7 +393,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
 // COMMON:  ret void
 // COMMON: }
 // COMMON: define spir_kernel void [[INVLK2]](ptr addrspace(4){{.*}})
-// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
+// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})  [[INVOKE_ATTR:#[0-9]+]]
 // COMMON: define spir_kernel void [[INVGK2]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
 // COMMON: define spir_kernel void [[INVGK3]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
 // COMMON: define spir_kernel void [[INVGK4]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
@@ -412,3 +412,5 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
 // COMMON: define spir_kernel void [[INVGK9]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
 // COMMON: define spir_kernel void [[INVGK10]](ptr addrspace(4){{.*}})
 // COMMON: define spir_kernel void [[INVGK11]](ptr addrspace(4){{.*}})
+
+// COMMON: attributes [[INVOKE_ATTR]] = { convergent nounwind }


        


More information about the cfe-commits mailing list