[clang] d12ee4b - clang/OpenCL: Extend tests for enqueued block attributes

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 30 11:03:29 PST 2023


Author: Matt Arsenault
Date: 2023-01-30T15:03:15-04:00
New Revision: d12ee4bf7c14a00b14890fc3042edd659dde7fb2

URL: https://github.com/llvm/llvm-project/commit/d12ee4bf7c14a00b14890fc3042edd659dde7fb2
DIFF: https://github.com/llvm/llvm-project/commit/d12ee4bf7c14a00b14890fc3042edd659dde7fb2.diff

LOG: clang/OpenCL: Extend tests for enqueued block attributes

Baseline tests showing that enqueued blocks are not getting the
correct attributes applied.

Added: 
    

Modified: 
    clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 17c5fc613285..c56aee03441d 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs --prefix-filecheck-ir-name VAR
-// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa -target-cpu gfx900 %s | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -disable-llvm-passes -fno-ident -emit-llvm -o - -triple amdgcn-amd-amdhsa %s -fdenormal-fp-math-f32=preserve-sign | FileCheck %s --check-prefix=CHECK
 
 typedef struct {int a;} ndrange_t;
 
@@ -35,9 +35,40 @@ kernel void test(global char *a, char b, global long *c, long d) {
 
   enqueue_kernel(default_queue, flags, ndrange, block);
 }
+
+// Test that target attributes are applied to the functions inserted for the
+// block.
+__attribute__((target("s-memtime-inst")))
+kernel void test_target_features_kernel(global int *i) {
+  queue_t default_queue;
+  unsigned flags = 0;
+  ndrange_t ndrange;
+
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(void) {
+                   __builtin_amdgcn_s_memtime();
+                 });
+}
+
+__attribute__((target("s-memtime-inst")))
+void test_target_features_func(global int *i) {
+  queue_t default_queue;
+  unsigned flags = 0;
+  ndrange_t ndrange;
+
+  enqueue_kernel(default_queue, flags, ndrange,
+                 ^(void) {
+                   __builtin_amdgcn_s_memtime();
+                 });
+}
+
+//.
+// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
+// CHECK: @__block_literal_global.1 = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_func_block_invoke }, align 8 #0
+//.
 // CHECK: Function Attrs: convergent noinline norecurse nounwind optnone
 // CHECK-LABEL: define {{[^@]+}}@callee
-// CHECK-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: (i64 noundef [[ID:%.*]], ptr addrspace(1) noundef [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[ID_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
@@ -53,7 +84,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 //
 // CHECK: Function Attrs: convergent noinline norecurse nounwind optnone
 // CHECK-LABEL: define {{[^@]+}}@test
-// CHECK-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+// CHECK-SAME: (ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
@@ -167,7 +198,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 //
 // CHECK: Function Attrs: convergent noinline nounwind optnone
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke
-// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -184,7 +215,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 //
 // CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_kernel
-// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
+// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 }>, align 8, addrspace(5)
 // CHECK-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
@@ -195,7 +226,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 //
 // CHECK: Function Attrs: convergent noinline nounwind optnone
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2
-// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -218,7 +249,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 //
 // CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_2_kernel
-// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
+// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
 // CHECK-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
@@ -229,7 +260,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 //
 // CHECK: Function Attrs: convergent noinline nounwind optnone
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3
-// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]], ptr addrspace(3) noundef [[LP:%.*]]) #[[ATTR4]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[LP_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
@@ -257,7 +288,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 //
 // CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_3_kernel
-// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 {
+// CHECK-SAME: (<{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !11 !kernel_arg_access_qual !12 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !14 {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
 // CHECK-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
@@ -268,7 +299,7 @@ kernel void test(global char *a, char b, global long *c, long d) {
 //
 // CHECK: Function Attrs: convergent noinline nounwind optnone
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4
-// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR3]] {
+// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -278,13 +309,13 @@ kernel void test(global char *a, char b, global long *c, long d) {
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[BLOCK_CAPTURE_ADDR]], align 8
 // CHECK-NEXT:    [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[BLOCK_CAPTURE_ADDR1]], align 8
-// CHECK-NEXT:    call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR5:[0-9]+]]
+// CHECK-NEXT:    call void @callee(i64 noundef [[TMP0]], ptr addrspace(1) noundef [[TMP1]]) #[[ATTR9:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK: Function Attrs: convergent nounwind
 // CHECK-LABEL: define {{[^@]+}}@__test_block_invoke_4_kernel
-// CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR4]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
+// CHECK-SAME: (<{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr addrspace(1) }>, align 8, addrspace(5)
 // CHECK-NEXT:    store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
@@ -292,13 +323,99 @@ kernel void test(global char *a, char b, global long *c, long d) {
 // CHECK-NEXT:    call void @__test_block_invoke_4(ptr [[TMP2]])
 // CHECK-NEXT:    ret void
 //
+//
+// CHECK: Function Attrs: convergent noinline norecurse nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@test_target_features_kernel
+// CHECK-SAME: (ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR6:[0-9]+]] !kernel_arg_addr_space !15 !kernel_arg_access_qual !8 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !10 {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
+// CHECK-NEXT:    store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK: Function Attrs: convergent noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke
+// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK: Function Attrs: convergent nounwind
+// CHECK-LABEL: define {{[^@]+}}@__test_target_features_kernel_block_invoke_kernel
+// CHECK-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
+// CHECK-NEXT:    store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
+// CHECK-NEXT:    call void @__test_target_features_kernel_block_invoke(ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK: Function Attrs: convergent noinline norecurse nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@test_target_features_func
+// CHECK-SAME: (ptr addrspace(1) noundef [[I:%.*]]) #[[ATTR8:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[FLAGS:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
+// CHECK-NEXT:    store ptr addrspace(1) [[I]], ptr addrspace(5) [[I_ADDR]], align 8
+// CHECK-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[TMP]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) byval([[STRUCT_NDRANGE_T]]) [[TMP]], ptr @__test_target_features_func_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__block_literal_global.1 to ptr))
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK: Function Attrs: convergent noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@__test_target_features_func_block_invoke
+// CHECK-SAME: (ptr noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR4]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[BLOCK_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[DOTBLOCK_DESCRIPTOR]], ptr addrspace(5) [[BLOCK_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK: Function Attrs: convergent nounwind
+// CHECK-LABEL: define {{[^@]+}}@__test_target_features_func_block_invoke_kernel
+// CHECK-SAME: ({ i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR5]] !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, addrspace(5)
+// CHECK-NEXT:    store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
+// CHECK-NEXT:    call void @__test_target_features_func_block_invoke(ptr [[TMP2]])
+// CHECK-NEXT:    ret void
+//
 //.
-// CHECK: attributes #0 = { convergent noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="false" }
-// CHECK: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-// CHECK: attributes #3 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
-// CHECK: attributes #4 = { convergent nounwind "enqueued-block" }
-// CHECK: attributes #5 = { convergent nounwind }
+// CHECK: attributes #0 = { "objc_arc_inert" }
+// CHECK: attributes #1 = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #2 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
+// CHECK: attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+// CHECK: attributes #4 = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #5 = { convergent nounwind "enqueued-block" }
+// CHECK: attributes #6 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
+// CHECK: attributes #7 = { nocallback nofree nosync nounwind willreturn }
+// CHECK: attributes #8 = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" }
+// CHECK: attributes #9 = { convergent nounwind }
 //.
 // CHECK: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // CHECK: !1 = !{i32 1, !"wchar_size", i32 4}
@@ -315,4 +432,6 @@ kernel void test(global char *a, char b, global long *c, long d) {
 // CHECK: !12 = !{!"none", !"none"}
 // CHECK: !13 = !{!"__block_literal", !"void*"}
 // CHECK: !14 = !{!"", !""}
+// CHECK: !15 = !{i32 1}
+// CHECK: !16 = !{!"int*"}
 //.


        


More information about the cfe-commits mailing list