[llvm] a7749c3 - [AMDGPU] Use update_test_checks.py script for annotate kernel features tests.
Kuter Dinel via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 14 17:13:51 PDT 2021
Author: Kuter Dinel
Date: 2021-07-15T03:13:37+03:00
New Revision: a7749c3f79a8b0df9ffe8a814a13f9450981b292
URL: https://github.com/llvm/llvm-project/commit/a7749c3f79a8b0df9ffe8a814a13f9450981b292
DIFF: https://github.com/llvm/llvm-project/commit/a7749c3f79a8b0df9ffe8a814a13f9450981b292.diff
LOG: [AMDGPU] Use update_test_checks.py script for annotate kernel features tests.
This patch makes the annotate kernel features tests use the update_tests_checks.py
script. Which makes it easy to update the tests.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D105864
Added:
Modified:
llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 809558b0aad7..6d1e45c65bc7 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0
@@ -8,99 +9,165 @@ declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrs
@global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
-; HSA: @store_cast_0_flat_to_group_addrspacecast() #1
+;.
+; HSA: @[[LDS_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global i32 undef, align 4
+; HSA: @[[LDS_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
+; HSA: @[[GLOBAL_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global i32 undef, align 4
+; HSA: @[[GLOBAL_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
+;.
define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
+; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
+; HSA-SAME: () #[[ATTR1:[0-9]+]] {
+; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4
+; HSA-NEXT: ret void
+;
store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
ret void
}
-; HSA: @store_cast_0_group_to_flat_addrspacecast() #2
define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
+; HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
+; HSA-SAME: () #[[ATTR2:[0-9]+]] {
+; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4
+; HSA-NEXT: ret void
+;
store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*)
ret void
}
-; HSA: define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #2
define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
+; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
+; HSA-SAME: () #[[ATTR2]] {
+; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4
+; HSA-NEXT: ret void
+;
store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*)
ret void
}
-; HSA: @store_constant_cast_group_gv_gep_to_flat() #2
define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
+; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
+; HSA-SAME: () #[[ATTR2]] {
+; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; HSA-NEXT: ret void
+;
store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
ret void
}
-; HSA: @store_constant_cast_global_gv_to_flat() #1
define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
+; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat
+; HSA-SAME: () #[[ATTR1]] {
+; HSA-NEXT: store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4
+; HSA-NEXT: ret void
+;
store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*)
ret void
}
-; HSA: @store_constant_cast_global_gv_gep_to_flat() #1
define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
+; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat
+; HSA-SAME: () #[[ATTR1]] {
+; HSA-NEXT: store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; HSA-NEXT: ret void
+;
store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
ret void
}
-; HSA: @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+; HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
+; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
+; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; HSA-NEXT: ret void
+;
%val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
store i32 %val, i32 addrspace(1)* %out
ret void
}
-; HSA: @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+; HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
+; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
+; HSA-NEXT: [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
+; HSA-NEXT: ret void
+;
%val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
}
-; HSA: @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+; HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
+; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
+; HSA-NEXT: [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
+; HSA-NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
+; HSA-NEXT: store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4
+; HSA-NEXT: ret void
+;
%val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
%val0 = extractvalue { i32, i1 } %val, 0
store i32 %val0, i32 addrspace(1)* %out
ret void
}
-; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2
define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
+; HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
+; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
+; HSA-NEXT: call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
+; HSA-NEXT: ret void
+;
call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
ret void
}
; Can't just search the pointer value
-; HSA: @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #2
define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 {
+; HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
+; HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
+; HSA-NEXT: store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8
+; HSA-NEXT: ret void
+;
store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out
ret void
}
; Can't just search pointer types
-; HSA: @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #2
define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 {
+; HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
+; HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
+; HSA-NEXT: store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4
+; HSA-NEXT: ret void
+;
store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out
ret void
}
; Cast group to flat, do GEP, cast back to group
-; HSA: @store_constant_cast_group_gv_gep_to_flat_to_group() #2
define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
+; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
+; HSA-SAME: () #[[ATTR2]] {
+; HSA-NEXT: store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4
+; HSA-NEXT: ret void
+;
store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
ret void
}
-; HSA: @ret_constant_cast_group_gv_gep_to_flat_to_group() #2
define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
+; HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
+; HSA-SAME: () #[[ATTR2]] {
+; HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
+;
ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
}
-; HSA: attributes #0 = { argmemonly nofree nounwind willreturn }
-; HSA: attributes #1 = { nounwind }
-; HSA: attributes #2 = { nounwind "amdgpu-queue-ptr" }
-
attributes #0 = { argmemonly nounwind }
attributes #1 = { nounwind }
+;.
+; HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
+; HSA: attributes #[[ATTR1]] = { nounwind }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 65644af6780e..8cb30fe96c9a 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s
declare i32 @llvm.amdgcn.workgroup.id.x() #0
@@ -14,71 +15,123 @@ declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
-; HSA: define void @use_workitem_id_x() #1 {
define void @use_workitem_id_x() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
+; HSA-SAME: () #[[ATTR1:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
-; HSA: define void @use_workitem_id_y() #2 {
define void @use_workitem_id_y() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_workitem_id_y
+; HSA-SAME: () #[[ATTR2:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
-; HSA: define void @use_workitem_id_z() #3 {
define void @use_workitem_id_z() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_workitem_id_z
+; HSA-SAME: () #[[ATTR3:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
-; HSA: define void @use_workgroup_id_x() #4 {
define void @use_workgroup_id_x() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_workgroup_id_x
+; HSA-SAME: () #[[ATTR4:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workgroup.id.x()
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
-; HSA: define void @use_workgroup_id_y() #5 {
define void @use_workgroup_id_y() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y
+; HSA-SAME: () #[[ATTR5:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
-; HSA: define void @use_workgroup_id_z() #6 {
define void @use_workgroup_id_z() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_workgroup_id_z
+; HSA-SAME: () #[[ATTR6:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val, i32 addrspace(1)* undef
ret void
}
-; HSA: define void @use_dispatch_ptr() #7 {
define void @use_dispatch_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
+; HSA-SAME: () #[[ATTR7:[0-9]+]] {
+; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+; HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
+; HSA-NEXT: ret void
+;
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
-; HSA: define void @use_queue_ptr() #8 {
define void @use_queue_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_queue_ptr
+; HSA-SAME: () #[[ATTR8:[0-9]+]] {
+; HSA-NEXT: [[QUEUE_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+; HSA-NEXT: store volatile i8 addrspace(4)* [[QUEUE_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
+; HSA-NEXT: ret void
+;
%queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
store volatile i8 addrspace(4)* %queue.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
-; HSA: define void @use_dispatch_id() #9 {
define void @use_dispatch_id() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_dispatch_id
+; HSA-SAME: () #[[ATTR9:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
+; HSA-NEXT: store volatile i64 [[VAL]], i64 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%val = call i64 @llvm.amdgcn.dispatch.id()
store volatile i64 %val, i64 addrspace(1)* undef
ret void
}
-; HSA: define void @use_workgroup_id_y_workgroup_id_z() #10 {
define void @use_workgroup_id_y_workgroup_id_z() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z
+; HSA-SAME: () #[[ATTR10:[0-9]+]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* undef
@@ -86,224 +139,373 @@ define void @use_workgroup_id_y_workgroup_id_z() #1 {
ret void
}
-; HSA: define void @func_indirect_use_workitem_id_x() #1 {
define void @func_indirect_use_workitem_id_x() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x
+; HSA-SAME: () #[[ATTR1]] {
+; HSA-NEXT: call void @use_workitem_id_x()
+; HSA-NEXT: ret void
+;
call void @use_workitem_id_x()
ret void
}
-; HSA: define void @kernel_indirect_use_workitem_id_x() #1 {
define void @kernel_indirect_use_workitem_id_x() #1 {
+; HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x
+; HSA-SAME: () #[[ATTR1]] {
+; HSA-NEXT: call void @use_workitem_id_x()
+; HSA-NEXT: ret void
+;
call void @use_workitem_id_x()
ret void
}
-; HSA: define void @func_indirect_use_workitem_id_y() #2 {
define void @func_indirect_use_workitem_id_y() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y
+; HSA-SAME: () #[[ATTR2]] {
+; HSA-NEXT: call void @use_workitem_id_y()
+; HSA-NEXT: ret void
+;
call void @use_workitem_id_y()
ret void
}
-; HSA: define void @func_indirect_use_workitem_id_z() #3 {
define void @func_indirect_use_workitem_id_z() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z
+; HSA-SAME: () #[[ATTR3]] {
+; HSA-NEXT: call void @use_workitem_id_z()
+; HSA-NEXT: ret void
+;
call void @use_workitem_id_z()
ret void
}
-; HSA: define void @func_indirect_use_workgroup_id_x() #4 {
define void @func_indirect_use_workgroup_id_x() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x
+; HSA-SAME: () #[[ATTR4]] {
+; HSA-NEXT: call void @use_workgroup_id_x()
+; HSA-NEXT: ret void
+;
call void @use_workgroup_id_x()
ret void
}
-; HSA: define void @kernel_indirect_use_workgroup_id_x() #4 {
define void @kernel_indirect_use_workgroup_id_x() #1 {
+; HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x
+; HSA-SAME: () #[[ATTR4]] {
+; HSA-NEXT: call void @use_workgroup_id_x()
+; HSA-NEXT: ret void
+;
call void @use_workgroup_id_x()
ret void
}
-; HSA: define void @func_indirect_use_workgroup_id_y() #5 {
define void @func_indirect_use_workgroup_id_y() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y
+; HSA-SAME: () #[[ATTR5]] {
+; HSA-NEXT: call void @use_workgroup_id_y()
+; HSA-NEXT: ret void
+;
call void @use_workgroup_id_y()
ret void
}
-; HSA: define void @func_indirect_use_workgroup_id_z() #6 {
define void @func_indirect_use_workgroup_id_z() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z
+; HSA-SAME: () #[[ATTR6]] {
+; HSA-NEXT: call void @use_workgroup_id_z()
+; HSA-NEXT: ret void
+;
call void @use_workgroup_id_z()
ret void
}
-; HSA: define void @func_indirect_indirect_use_workgroup_id_y() #5 {
define void @func_indirect_indirect_use_workgroup_id_y() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y
+; HSA-SAME: () #[[ATTR5]] {
+; HSA-NEXT: call void @func_indirect_use_workgroup_id_y()
+; HSA-NEXT: ret void
+;
call void @func_indirect_use_workgroup_id_y()
ret void
}
-; HSA: define void @indirect_x2_use_workgroup_id_y() #5 {
define void @indirect_x2_use_workgroup_id_y() #1 {
+; HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y
+; HSA-SAME: () #[[ATTR5]] {
+; HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y()
+; HSA-NEXT: ret void
+;
call void @func_indirect_indirect_use_workgroup_id_y()
ret void
}
-; HSA: define void @func_indirect_use_dispatch_ptr() #7 {
define void @func_indirect_use_dispatch_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr
+; HSA-SAME: () #[[ATTR7]] {
+; HSA-NEXT: call void @use_dispatch_ptr()
+; HSA-NEXT: ret void
+;
call void @use_dispatch_ptr()
ret void
}
-; HSA: define void @func_indirect_use_queue_ptr() #8 {
define void @func_indirect_use_queue_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr
+; HSA-SAME: () #[[ATTR8]] {
+; HSA-NEXT: call void @use_queue_ptr()
+; HSA-NEXT: ret void
+;
call void @use_queue_ptr()
ret void
}
-; HSA: define void @func_indirect_use_dispatch_id() #9 {
define void @func_indirect_use_dispatch_id() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id
+; HSA-SAME: () #[[ATTR9]] {
+; HSA-NEXT: call void @use_dispatch_id()
+; HSA-NEXT: ret void
+;
call void @use_dispatch_id()
ret void
}
-; HSA: define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #11 {
define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z
+; HSA-SAME: () #[[ATTR11:[0-9]+]] {
+; HSA-NEXT: call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
+; HSA-NEXT: ret void
+;
call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
ret void
}
-; HSA: define void @recursive_use_workitem_id_y() #2 {
define void @recursive_use_workitem_id_y() #1 {
+; HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y
+; HSA-SAME: () #[[ATTR2]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: call void @recursive_use_workitem_id_y()
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workitem.id.y()
store volatile i32 %val, i32 addrspace(1)* undef
call void @recursive_use_workitem_id_y()
ret void
}
-; HSA: define void @call_recursive_use_workitem_id_y() #2 {
define void @call_recursive_use_workitem_id_y() #1 {
+; HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y
+; HSA-SAME: () #[[ATTR2]] {
+; HSA-NEXT: call void @recursive_use_workitem_id_y()
+; HSA-NEXT: ret void
+;
call void @recursive_use_workitem_id_y()
ret void
}
-; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 {
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
+; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR8]] {
+; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
+; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
+; HSA-NEXT: ret void
+;
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
ret void
}
-; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 {
define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
+; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
+; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
+; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
+; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
+; HSA-NEXT: ret void
+;
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
ret void
}
-; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 {
define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
+; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
+; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
+; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
+; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
+; HSA-NEXT: call void @func_indirect_use_queue_ptr()
+; HSA-NEXT: ret void
+;
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
call void @func_indirect_use_queue_ptr()
ret void
}
-; HSA: define void @indirect_use_group_to_flat_addrspacecast() #8 {
define void @indirect_use_group_to_flat_addrspacecast() #1 {
+; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
+; HSA-SAME: () #[[ATTR8]] {
+; HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null)
+; HSA-NEXT: ret void
+;
call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null)
ret void
}
-; HSA: define void @indirect_use_group_to_flat_addrspacecast_gfx9() #11 {
define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
+; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9
+; HSA-SAME: () #[[ATTR11]] {
+; HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null)
+; HSA-NEXT: ret void
+;
call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null)
ret void
}
-; HSA: define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #8 {
define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
+; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9
+; HSA-SAME: () #[[ATTR8]] {
+; HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null)
+; HSA-NEXT: ret void
+;
call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null)
ret void
}
-; HSA: define void @use_kernarg_segment_ptr() #14 {
define void @use_kernarg_segment_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
+; HSA-SAME: () #[[ATTR14:[0-9]+]] {
+; HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: store volatile i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
+; HSA-NEXT: ret void
+;
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
store volatile i8 addrspace(4)* %kernarg.segment.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
-; HSA: define void @func_indirect_use_kernarg_segment_ptr() #11 {
define void @func_indirect_use_kernarg_segment_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr
+; HSA-SAME: () #[[ATTR11]] {
+; HSA-NEXT: call void @use_kernarg_segment_ptr()
+; HSA-NEXT: ret void
+;
call void @use_kernarg_segment_ptr()
ret void
}
-; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 {
define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
+; HSA-SAME: () #[[ATTR15:[0-9]+]] {
+; HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+; HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
+; HSA-NEXT: ret void
+;
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
-; HSA: define void @use_implicitarg_ptr() #16 {
define void @use_implicitarg_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
+; HSA-SAME: () #[[ATTR16:[0-9]+]] {
+; HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+; HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
+; HSA-NEXT: ret void
+;
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
ret void
}
-; HSA: define void @func_indirect_use_implicitarg_ptr() #16 {
define void @func_indirect_use_implicitarg_ptr() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
+; HSA-SAME: () #[[ATTR16]] {
+; HSA-NEXT: call void @use_implicitarg_ptr()
+; HSA-NEXT: ret void
+;
call void @use_implicitarg_ptr()
ret void
}
-; HSA: declare void @external.func() #17
declare void @external.func() #3
-; HSA: define internal void @defined.func() #17 {
define internal void @defined.func() #3 {
+; HSA-LABEL: define {{[^@]+}}@defined.func
+; HSA-SAME: () #[[ATTR17:[0-9]+]] {
+; HSA-NEXT: ret void
+;
ret void
}
-; HSA: define void @func_call_external() #17 {
define void @func_call_external() #3 {
+; HSA-LABEL: define {{[^@]+}}@func_call_external
+; HSA-SAME: () #[[ATTR17]] {
+; HSA-NEXT: call void @external.func()
+; HSA-NEXT: ret void
+;
call void @external.func()
ret void
}
-; HSA: define void @func_call_defined() #17 {
define void @func_call_defined() #3 {
+; HSA-LABEL: define {{[^@]+}}@func_call_defined
+; HSA-SAME: () #[[ATTR17]] {
+; HSA-NEXT: call void @defined.func()
+; HSA-NEXT: ret void
+;
call void @defined.func()
ret void
}
-; HSA: define void @func_call_asm() #18 {
define void @func_call_asm() #3 {
+; HSA-LABEL: define {{[^@]+}}@func_call_asm
+; HSA-SAME: () #[[ATTR18:[0-9]+]] {
+; HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR18]]
+; HSA-NEXT: ret void
+;
call void asm sideeffect "", ""() #3
ret void
}
-; HSA: define amdgpu_kernel void @kern_call_external() #19 {
define amdgpu_kernel void @kern_call_external() #3 {
+; HSA-LABEL: define {{[^@]+}}@kern_call_external
+; HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; HSA-NEXT: call void @external.func()
+; HSA-NEXT: ret void
+;
call void @external.func()
ret void
}
-; HSA: define amdgpu_kernel void @func_kern_defined() #19 {
define amdgpu_kernel void @func_kern_defined() #3 {
+; HSA-LABEL: define {{[^@]+}}@func_kern_defined
+; HSA-SAME: () #[[ATTR19]] {
+; HSA-NEXT: call void @defined.func()
+; HSA-NEXT: ret void
+;
call void @defined.func()
ret void
}
-; HSA: define i32 @use_dispatch_ptr_ret_type() #20 {
define i32 @use_dispatch_ptr_ret_type() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
+; HSA-SAME: () #[[ATTR20:[0-9]+]] {
+; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+; HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
+; HSA-NEXT: ret i32 0
+;
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef
ret i32 0
}
-; HSA: define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #20 {
define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
+; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
+; HSA-SAME: () #[[ATTR20]] {
+; HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)()
+; HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; HSA-NEXT: ret float [[FADD]]
+;
%f = call float bitcast (i32()* @use_dispatch_ptr_ret_type to float()*)()
%fadd = fadd float %f, 1.0
ret float %fadd
@@ -314,24 +516,26 @@ attributes #1 = { nounwind "target-cpu"="fiji" }
attributes #2 = { nounwind "target-cpu"="gfx900" }
attributes #3 = { nounwind }
-; HSA: attributes #0 = { nounwind readnone speculatable willreturn }
-; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
-; HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
-; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" }
-; HSA: attributes #18 = { nounwind }
-; HSA: attributes #19 = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
-; HSA: attributes #20 = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" }
+;.
+; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
+; HSA: attributes #[[ATTR11]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR12]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
+; HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR17]] = { nounwind "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR18]] = { nounwind }
+; HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 9e8ecb89865f..49a40972ff75 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
@@ -17,22 +18,39 @@ declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2
declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2
-; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tgid_x
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tgid_y
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workgroup.id.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -40,8 +58,15 @@ define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -49,15 +74,27 @@ define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tgid_z
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workgroup.id.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tgid_x_z
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -65,8 +102,15 @@ define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tgid_y_z
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -74,8 +118,17 @@ define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
%val2 = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -85,29 +138,51 @@ define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tidig_x
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workitem.id.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tidig_y
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workitem.id.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tidig_z
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
+; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val = call i32 @llvm.amdgcn.workitem.id.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.x()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -115,8 +190,15 @@ define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workitem.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -124,8 +206,17 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
@@ -135,8 +226,23 @@ define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_all_workitems
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] {
+; HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; HSA-NEXT: [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; HSA-NEXT: [[VAL3:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; HSA-NEXT: [[VAL4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; HSA-NEXT: [[VAL5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
@@ -152,8 +258,15 @@ define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR10:[0-9]+]] {
+; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
+; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %bc
@@ -161,8 +274,15 @@ define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_queue_ptr
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR11:[0-9]+]] {
+; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
+; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %bc
@@ -170,8 +290,15 @@ define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
+; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[BC:%.*]] = bitcast i8 addrspace(4)* [[DISPATCH_PTR]] to i32 addrspace(4)*
+; HSA-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(4)* [[BC]], align 4
+; HSA-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
+; HSA-NEXT: ret void
+;
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
%bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %bc
@@ -179,88 +306,153 @@ define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
+; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR11]] {
+; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32*
+; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4
+; HSA-NEXT: ret void
+;
%stof = addrspacecast i32 addrspace(3)* %ptr to i32*
store volatile i32 0, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 {
define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
+; HSA-SAME: (i32 addrspace(5)* [[PTR:%.*]]) #[[ATTR11]] {
+; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(5)* [[PTR]] to i32*
+; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4
+; HSA-NEXT: ret void
+;
%stof = addrspacecast i32 addrspace(5)* %ptr to i32*
store volatile i32 0, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast
+; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(3)*
+; HSA-NEXT: store volatile i32 0, i32 addrspace(3)* [[FTOS]], align 4
+; HSA-NEXT: ret void
+;
%ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast
+; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(5)*
+; HSA-NEXT: store volatile i32 0, i32 addrspace(5)* [[FTOS]], align 4
+; HSA-NEXT: ret void
+;
%ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
store volatile i32 0, i32 addrspace(5)* %ftos
ret void
}
; No-op addrspacecast should not use queue ptr
-; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
+; HSA-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(1)* [[PTR]] to i32*
+; HSA-NEXT: store volatile i32 0, i32* [[STOF]], align 4
+; HSA-NEXT: ret void
+;
%stof = addrspacecast i32 addrspace(1)* %ptr to i32*
store volatile i32 0, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
+; HSA-SAME: (i32 addrspace(4)* [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(4)* [[PTR]] to i32*
+; HSA-NEXT: [[LD:%.*]] = load volatile i32, i32* [[STOF]], align 4
+; HSA-NEXT: ret void
+;
%stof = addrspacecast i32 addrspace(4)* %ptr to i32*
%ld = load volatile i32, i32* %stof
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast
+; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(1)*
+; HSA-NEXT: store volatile i32 0, i32 addrspace(1)* [[FTOS]], align 4
+; HSA-NEXT: ret void
+;
%ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
}
-; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast
+; HSA-SAME: (i32* [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-NEXT: [[FTOS:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(4)*
+; HSA-NEXT: [[LD:%.*]] = load volatile i32, i32 addrspace(4)* [[FTOS]], align 4
+; HSA-NEXT: ret void
+;
%ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
%ld = load volatile i32, i32 addrspace(4)* %ftos
ret void
}
-; HSA: define amdgpu_kernel void @use_is_shared(i8* %ptr) #11 {
define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_is_shared
+; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
+; HSA-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[PTR]])
+; HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32
+; HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
%ext = zext i1 %is.shared to i32
store i32 %ext, i32 addrspace(1)* undef
ret void
}
-; HSA: define amdgpu_kernel void @use_is_private(i8* %ptr) #11 {
define amdgpu_kernel void @use_is_private(i8* %ptr) #1 {
+; HSA-LABEL: define {{[^@]+}}@use_is_private
+; HSA-SAME: (i8* [[PTR:%.*]]) #[[ATTR11]] {
+; HSA-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[PTR]])
+; HSA-NEXT: [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32
+; HSA-NEXT: store i32 [[EXT]], i32 addrspace(1)* undef, align 4
+; HSA-NEXT: ret void
+;
%is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
%ext = zext i1 %is.private to i32
store i32 %ext, i32 addrspace(1)* undef
ret void
}
-; HSA: define amdgpu_kernel void @use_alloca() #13 {
define amdgpu_kernel void @use_alloca() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_alloca
+; HSA-SAME: () #[[ATTR13:[0-9]+]] {
+; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
+; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
+; HSA-NEXT: ret void
+;
%alloca = alloca i32, addrspace(5)
store i32 0, i32 addrspace(5)* %alloca
ret void
}
-; HSA: define amdgpu_kernel void @use_alloca_non_entry_block() #13 {
define amdgpu_kernel void @use_alloca_non_entry_block() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
+; HSA-SAME: () #[[ATTR13]] {
+; HSA-NEXT: entry:
+; HSA-NEXT: br label [[BB:%.*]]
+; HSA: bb:
+; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
+; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
+; HSA-NEXT: ret void
+;
entry:
br label %bb
@@ -270,8 +462,13 @@ bb:
ret void
}
-; HSA: define void @use_alloca_func() #13 {
define void @use_alloca_func() #1 {
+; HSA-LABEL: define {{[^@]+}}@use_alloca_func
+; HSA-SAME: () #[[ATTR13]] {
+; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
+; HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4
+; HSA-NEXT: ret void
+;
%alloca = alloca i32, addrspace(5)
store i32 0, i32 addrspace(5)* %alloca
ret void
@@ -280,17 +477,19 @@ define void @use_alloca_func() #1 {
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind }
-; HSA: attributes #0 = { nounwind readnone speculatable willreturn }
-; HSA: attributes #1 = { nounwind }
-; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
-; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
-; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
-; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
-; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
-; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
-; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
-; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
-; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
-; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
-; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }
-; HSA: attributes #13 = { nounwind "amdgpu-stack-objects" }
+;.
+; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
+; HSA: attributes #[[ATTR1]] = { nounwind }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" }
+; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" }
+; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
+; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" }
+; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" }
+; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
+; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" }
+; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" }
+; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" }
+; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 09750da4cb8c..fd4b89a8d59a 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck %s
declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
@@ -12,22 +13,36 @@ declare i32 @llvm.r600.read.local.size.x() #0
declare i32 @llvm.r600.read.local.size.y() #0
declare i32 @llvm.r600.read.local.size.z() #0
-; ALL: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tgid_x(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.x()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.tgid.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; ALL: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tgid_y(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.y()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.tgid.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; ALL: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @multi_use_tgid_y(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tgid.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
%val1 = call i32 @llvm.r600.read.tgid.y()
@@ -35,8 +50,14 @@ define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; ALL: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tgid_x_y(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tgid.x()
%val1 = call i32 @llvm.r600.read.tgid.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -44,15 +65,25 @@ define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; ALL: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tgid_z(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.z()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.tgid.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; ALL: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tgid_x_z(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tgid.x()
%val1 = call i32 @llvm.r600.read.tgid.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -60,8 +91,14 @@ define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; ALL: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tgid_y_z(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tgid.y()
%val1 = call i32 @llvm.r600.read.tgid.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -69,8 +106,16 @@ define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; ALL: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tgid_x_y_z(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
+; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tgid.z()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tgid.x()
%val1 = call i32 @llvm.r600.read.tgid.y()
%val2 = call i32 @llvm.r600.read.tgid.z()
@@ -80,29 +125,47 @@ define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; ALL: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tidig_x(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.x()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.tidig.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; ALL: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tidig_y(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.y()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.tidig.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; ALL: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tidig_z(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.z()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.tidig.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; ALL: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tidig_x_tgid_x(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.x()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tidig.x()
%val1 = call i32 @llvm.r600.read.tgid.x()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -110,8 +173,14 @@ define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; ALL: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tidig_y_tgid_y(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.y()
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tidig.y()
%val1 = call i32 @llvm.r600.read.tgid.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
@@ -119,8 +188,16 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; ALL: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_tidig_x_y_z(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y()
+; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tidig.x()
%val1 = call i32 @llvm.r600.read.tidig.y()
%val2 = call i32 @llvm.r600.read.tidig.z()
@@ -130,8 +207,22 @@ define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; ALL: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_all_workitems(
+; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
+; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y()
+; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z()
+; CHECK-NEXT: [[VAL3:%.*]] = call i32 @llvm.r600.read.tgid.x()
+; CHECK-NEXT: [[VAL4:%.*]] = call i32 @llvm.r600.read.tgid.y()
+; CHECK-NEXT: [[VAL5:%.*]] = call i32 @llvm.r600.read.tgid.z()
+; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL4]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 [[VAL5]], i32 addrspace(1)* [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
%val0 = call i32 @llvm.r600.read.tidig.x()
%val1 = call i32 @llvm.r600.read.tidig.y()
%val2 = call i32 @llvm.r600.read.tidig.z()
@@ -147,25 +238,34 @@ define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
ret void
}
-; HSA: define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #10 {
-; NOHSA: define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_get_local_size_x(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.x()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.local.size.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; HSA: define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #10 {
-; NOHSA: define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_get_local_size_y(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.y()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.local.size.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
-; HSA: define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #10 {
-; NOHSA: define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
+; CHECK-LABEL: @use_get_local_size_z(
+; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.z()
+; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
%val = call i32 @llvm.r600.read.local.size.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index 6ded80984e86..3dff193876d7 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -1,25 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
-; GCN-LABEL: define internal void @indirect() #0 {
define internal void @indirect() {
- ret void
+; GCN-LABEL: define {{[^@]+}}@indirect
+; GCN-SAME: () #[[ATTR0:[0-9]+]] {
+; GCN-NEXT: ret void
+;
+ ret void
}
-; GCN-LABEL: define internal void @direct() #1 {
define internal void @direct() {
- %fptr = alloca void()*
- store void()* @indirect, void()** %fptr
- %fp = load void()*, void()** %fptr
- call void %fp()
- ret void
+; GCN-LABEL: define {{[^@]+}}@direct
+; GCN-SAME: () #[[ATTR1:[0-9]+]] {
+; GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
+; GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
+; GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
+; GCN-NEXT: call void [[FP]]()
+; GCN-NEXT: ret void
+;
+ %fptr = alloca void()*
+ store void()* @indirect, void()** %fptr
+ %fp = load void()*, void()** %fptr
+ call void %fp()
+ ret void
}
-; GCN-LABEL: define amdgpu_kernel void @test_direct_indirect_call() #2 {
define amdgpu_kernel void @test_direct_indirect_call() {
+; GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call
+; GCN-SAME: () #[[ATTR2:[0-9]+]] {
+; GCN-NEXT: call void @direct()
+; GCN-NEXT: ret void
+;
call void @direct()
ret void
}
-
-; attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
-; attributes #1 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
-; attributes #2 = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
+;.
+; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
+; GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index 0786f14b5e92..d98564a83837 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -1,22 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
-; GCN-LABEL: define internal void @indirect() #0 {
define internal void @indirect() {
- ret void
+; GCN-LABEL: define {{[^@]+}}@indirect
+; GCN-SAME: () #[[ATTR0:[0-9]+]] {
+; GCN-NEXT: ret void
+;
+ ret void
}
-; GCN-LABEL: define amdgpu_kernel void @test_simple_indirect_call() #1 {
define amdgpu_kernel void @test_simple_indirect_call() #0 {
- %fptr = alloca void()*
- store void()* @indirect, void()** %fptr
- %fp = load void()*, void()** %fptr
- call void %fp()
- ret void
+; GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; GCN-SAME: () #[[ATTR1:[0-9]+]] {
+; GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
+; GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
+; GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
+; GCN-NEXT: call void [[FP]]()
+; GCN-NEXT: ret void
+;
+ %fptr = alloca void()*
+ store void()* @indirect, void()** %fptr
+ %fp = load void()*, void()** %fptr
+ call void %fp()
+ ret void
}
attributes #0 = { "amdgpu-dispatch-id" }
-; compiler modification to attributes
-attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
-attributes #1 = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
-
+;.
+; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index e5599c69d73e..6eda83885306 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -1,16 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
target datalayout = "A5"
-; GCN-LABEL: define internal void @indirect() #0 {
; GFX9-LABEL: {{^}}indirect:
define internal void @indirect() {
- ret void
+; GCN-LABEL: define {{[^@]+}}@indirect
+; GCN-SAME: () #[[ATTR0:[0-9]+]] {
+; GCN-NEXT: ret void
+;
+ ret void
}
-; GCN-LABEL: define amdgpu_kernel void @test_simple_indirect_call() #1 {
; GFX9-LABEL: {{^}}test_simple_indirect_call:
; GFX9: s_add_u32 flat_scratch_lo, s12, s17
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
@@ -39,13 +42,26 @@ define internal void @indirect() {
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX9-NEXT: s_endpgm
define amdgpu_kernel void @test_simple_indirect_call() {
- %fptr = alloca void()*, addrspace(5)
- %fptr.cast = addrspacecast void()* addrspace(5)* %fptr to void()**
- store void()* @indirect, void()** %fptr.cast
- %fp = load void()*, void()** %fptr.cast
- call void %fp()
- ret void
+; GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; GCN-SAME: () #[[ATTR1:[0-9]+]] {
+; GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8, addrspace(5)
+; GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast void ()* addrspace(5)* [[FPTR]] to void ()**
+; GCN-NEXT: store void ()* @indirect, void ()** [[FPTR_CAST]], align 8
+; GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR_CAST]], align 8
+; GCN-NEXT: call void [[FP]]()
+; GCN-NEXT: ret void
+;
+ %fptr = alloca void()*, addrspace(5)
+ %fptr.cast = addrspacecast void()* addrspace(5)* %fptr to void()**
+ store void()* @indirect, void()** %fptr.cast
+ %fp = load void()*, void()** %fptr.cast
+ call void %fp()
+ ret void
}
; attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
; attributes #1 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
+;.
+; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index 022ff8547ef5..a9e245be949f 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -1,19 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
-; CHECK: define void @foo() #[[FOO:[0-9]+]] {
define void @foo() #0 {
+; CHECK-LABEL: define {{[^@]+}}@foo
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
ret void
}
-; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
define amdgpu_kernel void @kernel1() #1 {
+; CHECK-LABEL: define {{[^@]+}}@kernel1
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: ret void
+;
call void @foo()
ret void
}
attributes #0 = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+;.
+; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index 321b8cb086a0..78bef7b8b02f 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -1,25 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Test to verify if the attribute gets propagated across nested function calls
-; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
define void @func1() #0 {
+; CHECK-LABEL: define {{[^@]+}}@func1
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
ret void
}
-; CHECK: define void @func2() #[[FUNC]] {
define void @func2() #1 {
+; CHECK-LABEL: define {{[^@]+}}@func2
+; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT: call void @func1()
+; CHECK-NEXT: ret void
+;
call void @func1()
ret void
}
-; CHECK: define amdgpu_kernel void @kernel3() #[[KERNEL:[0-9]+]] {
define amdgpu_kernel void @kernel3() #2 {
+; CHECK-LABEL: define {{[^@]+}}@kernel3
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @func2()
+; CHECK-NEXT: ret void
+;
call void @func2()
ret void
}
attributes #2 = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+;.
+; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index 78136d90d6f1..cdedcaee9865 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -1,26 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Two kernels with
diff erent values of the uniform-work-group-attribute call the same function
-; CHECK: define void @func() #[[FUNC:[0-9]+]] {
define void @func() #0 {
+; CHECK-LABEL: define {{[^@]+}}@func
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
ret void
}
-; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
define amdgpu_kernel void @kernel1() #1 {
+; CHECK-LABEL: define {{[^@]+}}@kernel1
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @func()
+; CHECK-NEXT: ret void
+;
call void @func()
ret void
}
-; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] {
define amdgpu_kernel void @kernel2() #2 {
+; CHECK-LABEL: define {{[^@]+}}@kernel2
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: call void @func()
+; CHECK-NEXT: ret void
+;
call void @func()
ret void
}
attributes #1 = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[KERNEL2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+;.
+; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
index 4214587842fa..b4850448388a 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -1,25 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
-; CHECK: define void @func() #[[FUNC:[0-9]+]] {
define void @func() #0 {
+; CHECK-LABEL: define {{[^@]+}}@func
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
ret void
}
-; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
define amdgpu_kernel void @kernel1() #1 {
+; CHECK-LABEL: define {{[^@]+}}@kernel1
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @func()
+; CHECK-NEXT: ret void
+;
call void @func()
ret void
}
; External declaration of a function
-; CHECK: define weak_odr void @weak_func() #[[FUNC]] {
define weak_odr void @weak_func() #0 {
+; CHECK-LABEL: define {{[^@]+}}@weak_func
+; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
ret void
}
-; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] {
define amdgpu_kernel void @kernel2() #2 {
+; CHECK-LABEL: define {{[^@]+}}@kernel2
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: call void @weak_func()
+; CHECK-NEXT: ret void
+;
call void @weak_func()
ret void
}
@@ -28,6 +43,8 @@ attributes #0 = { nounwind }
attributes #1 = { "uniform-work-group-size"="false" }
attributes #2 = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+;.
+; CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 0b6053fb4c4a..a8b6a7f91828 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -1,10 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Test to ensure recursive functions exhibit proper behaviour
; Test to generate fibonacci numbers
-; CHECK: define i32 @fib(i32 %n) #[[FIB:[0-9]+]] {
define i32 @fib(i32 %n) #0 {
+; CHECK-LABEL: define {{[^@]+}}@fib
+; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
+; CHECK: cont1:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
+; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
+; CHECK: cont2:
+; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]])
+; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
+; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]])
+; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
+; CHECK-NEXT: ret i32 [[RETVAL]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 1
+;
%cmp1 = icmp eq i32 %n, 0
br i1 %cmp1, label %exit, label %cont1
@@ -25,8 +42,13 @@ exit:
ret i32 1
}
-; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[KERNEL:[0-9]+]] {
define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
+; CHECK-LABEL: define {{[^@]+}}@kernel
+; CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5)
+; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
+; CHECK-NEXT: ret void
+;
%r = call i32 @fib(i32 5)
store i32 %r, i32 addrspace(1)* %m
ret void
@@ -34,5 +56,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
attributes #1 = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+;.
+; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index a9a2f12ef1f4..fba4b1f43341 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -1,30 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck %s
-; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
define void @func1() {
+; CHECK-LABEL: define {{[^@]+}}@func1
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret void
+;
ret void
}
-; CHECK: define void @func4() #[[FUNC]] {
define void @func4() {
+; CHECK-LABEL: define {{[^@]+}}@func4
+; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT: ret void
+;
ret void
}
-; CHECK: define void @func2() #[[FUNC]] {
define void @func2() #0 {
+; CHECK-LABEL: define {{[^@]+}}@func2
+; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT: call void @func4()
+; CHECK-NEXT: call void @func1()
+; CHECK-NEXT: ret void
+;
call void @func4()
call void @func1()
ret void
}
-; CHECK: define void @func3() #[[FUNC]] {
define void @func3() {
+; CHECK-LABEL: define {{[^@]+}}@func3
+; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT: call void @func1()
+; CHECK-NEXT: ret void
+;
call void @func1()
ret void
}
-; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] {
define amdgpu_kernel void @kernel3() #0 {
+; CHECK-LABEL: define {{[^@]+}}@kernel3
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @func2()
+; CHECK-NEXT: call void @func3()
+; CHECK-NEXT: ret void
+;
call void @func2()
call void @func3()
ret void
@@ -32,4 +53,7 @@ define amdgpu_kernel void @kernel3() #0 {
attributes #0 = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[FUNC]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+;.
+; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+;.
More information about the llvm-commits
mailing list