[clang] 00f6a7f - clang/OpenCL: Fix not setting convergent on block invoke kernels
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 30 11:03:27 PST 2023
Author: Matt Arsenault
Date: 2023-01-30T15:03:14-04:00
New Revision: 00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88
URL: https://github.com/llvm/llvm-project/commit/00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88
DIFF: https://github.com/llvm/llvm-project/commit/00f6a7f02f9c8d542ce8ff1c9c037d9fdb421b88.diff
LOG: clang/OpenCL: Fix not setting convergent on block invoke kernels
Yet another example how convergent not being the default is dangerous
and backwards.
Added:
Modified:
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 1b80529e36a72..7e08d42e866ff 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -12451,6 +12451,7 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
// FIXME: Apply default attributes
F->addFnAttr(llvm::Attribute::NoUnwind);
+ F->addFnAttr(llvm::Attribute::Convergent);
Builder.CreateRetVoid();
Builder.restoreIP(IP);
@@ -12504,6 +12505,7 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
// FIXME: Apply default attributes
F->addFnAttr(llvm::Attribute::NoUnwind);
+ F->addFnAttr(llvm::Attribute::Convergent);
F->addFnAttr("enqueued-block");
auto IP = CGF.Builder.saveIP();
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 4277dbbc20530..17c5fc6132856 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -182,7 +182,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
// CHECK-NEXT: ret void
//
//
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
// CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
// CHECK-NEXT: entry:
@@ -216,7 +216,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
// CHECK-NEXT: ret void
//
//
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
// CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
// CHECK-NEXT: entry:
@@ -255,7 +255,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
// CHECK-NEXT: ret void
//
//
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
// CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 {
// CHECK-NEXT: entry:
@@ -282,7 +282,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
// CHECK-NEXT: ret void
//
//
-// CHECK: Function Attrs: nounwind
+// CHECK: Function Attrs: convergent nounwind
// CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
// CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
// CHECK-NEXT: entry:
@@ -297,7 +297,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
// CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="false" }
// CHECK: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
// CHECK: attributes #3 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #4 = { nounwind "enqueued-block" }
+// CHECK: attributes #4 = { convergent nounwind "enqueued-block" }
// CHECK: attributes #5 = { convergent nounwind }
//.
// CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 3cfb5f55e5d21..bce1a922668a1 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -297,7 +297,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
};
// Uses global block literal [[BLG8]] and invoke function [[INVG8]].
- // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
+ // COMMON: call spir_func void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) [[INVOKE_ATTR:#[0-9]+]]
block_A();
// Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -393,7 +393,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: ret void
// COMMON: }
// COMMON: define spir_kernel void [[INVLK2]](ptr addrspace(4){{.*}})
-// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
+// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) [[INVOKE_ATTR:#[0-9]+]]
// COMMON: define spir_kernel void [[INVGK2]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
// COMMON: define spir_kernel void [[INVGK3]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
// COMMON: define spir_kernel void [[INVGK4]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
@@ -412,3 +412,5 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: define spir_kernel void [[INVGK9]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
// COMMON: define spir_kernel void [[INVGK10]](ptr addrspace(4){{.*}})
// COMMON: define spir_kernel void [[INVGK11]](ptr addrspace(4){{.*}})
+
+// COMMON: attributes [[INVOKE_ATTR]] = { convergent nounwind }
More information about the cfe-commits
mailing list