[llvm] 4998de4 - AMDGPU: Update some wait tests to opaque pointers
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 1 18:14:03 PST 2022
Author: Matt Arsenault
Date: 2022-12-01T21:01:58-05:00
New Revision: 4998de4dcc83a0b0ad0b797716a15630ebed1755
URL: https://github.com/llvm/llvm-project/commit/4998de4dcc83a0b0ad0b797716a15630ebed1755
DIFF: https://github.com/llvm/llvm-project/commit/4998de4dcc83a0b0ad0b797716a15630ebed1755.diff
LOG: AMDGPU: Update some wait tests to opaque pointers
The script mangled the constantexprs in waitcnt-looptest.ll, so fixed
those manually.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/wait.ll
llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll
llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/wait.ll b/llvm/test/CodeGen/AMDGPU/wait.ll
index 0d23e447b427e..41dd58db538d4 100644
--- a/llvm/test/CodeGen/AMDGPU/wait.ll
+++ b/llvm/test/CodeGen/AMDGPU/wait.ll
@@ -13,19 +13,18 @@
; DEFAULT-DAG: exp
; DEFAULT: exp
; DEFAULT-NEXT: s_endpgm
-define amdgpu_vs void @main(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, <16 x i8> addrspace(4)* inreg %arg3, <16 x i8> addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(4)* inreg %constptr) #0 {
+define amdgpu_vs void @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, ptr addrspace(4) inreg %constptr) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 0
- %tmp10 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, !tbaa !0
+ %tmp10 = load <16 x i8>, ptr addrspace(4) %arg3, !tbaa !0
%tmp10.cast = bitcast <16 x i8> %tmp10 to <4 x i32>
%tmp11 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp10.cast, i32 %arg6, i32 0, i32 0, i32 0)
%tmp12 = extractelement <4 x float> %tmp11, i32 0
%tmp13 = extractelement <4 x float> %tmp11, i32 1
call void @llvm.amdgcn.s.barrier() #1
%tmp14 = extractelement <4 x float> %tmp11, i32 2
- %tmp15 = load float, float addrspace(4)* %constptr, align 4
- %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(4)* %arg3, i32 1
- %tmp17 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp16, !tbaa !0
+ %tmp15 = load float, ptr addrspace(4) %constptr, align 4
+ %tmp16 = getelementptr <16 x i8>, ptr addrspace(4) %arg3, i32 1
+ %tmp17 = load <16 x i8>, ptr addrspace(4) %tmp16, !tbaa !0
%tmp17.cast = bitcast <16 x i8> %tmp17 to <4 x i32>
%tmp18 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp17.cast, i32 %arg6, i32 0, i32 0, i32 0)
%tmp19 = extractelement <4 x float> %tmp18, i32 0
@@ -46,10 +45,9 @@ main_body:
; ILPMAX: exp pos0
; ILPMAX-NEXT: exp param0
; ILPMAX: s_endpgm
-define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(4)* inreg %arg, [17 x <16 x i8>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <16 x i8>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+define amdgpu_vs void @main2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
main_body:
- %tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 0
- %tmp11 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, align 16, !tbaa !0
+ %tmp11 = load <16 x i8>, ptr addrspace(4) %arg4, align 16, !tbaa !0
%tmp12 = add i32 %arg5, %arg7
%tmp11.cast = bitcast <16 x i8> %tmp11 to <4 x i32>
%tmp13 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp11.cast, i32 %tmp12, i32 0, i32 0, i32 0)
@@ -57,8 +55,8 @@ main_body:
%tmp15 = extractelement <4 x float> %tmp13, i32 1
%tmp16 = extractelement <4 x float> %tmp13, i32 2
%tmp17 = extractelement <4 x float> %tmp13, i32 3
- %tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 1
- %tmp19 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp18, align 16, !tbaa !0
+ %tmp18 = getelementptr [16 x <16 x i8>], ptr addrspace(4) %arg4, i64 0, i64 1
+ %tmp19 = load <16 x i8>, ptr addrspace(4) %tmp18, align 16, !tbaa !0
%tmp20 = add i32 %arg5, %arg7
%tmp19.cast = bitcast <16 x i8> %tmp19 to <4 x i32>
%tmp21 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp19.cast, i32 %tmp20, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll
index 6df033f5d6297..203f1633fd8a5 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-flat.ll
@@ -10,9 +10,9 @@
; XGCN: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[DATA:v[0-9]+]]
; XGCN: s_waitcnt vmcnt(0) lgkmcnt(0)
; XGCN: flat_load_dword [[DATA]], v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @test(i32* %out, i32 %in) {
- store volatile i32 0, i32* %out
- %val = load volatile i32, i32* %out
+define amdgpu_kernel void @test(ptr %out, i32 %in) {
+ store volatile i32 0, ptr %out
+ %val = load volatile i32, ptr %out
ret void
}
@@ -21,8 +21,8 @@ define amdgpu_kernel void @test(i32* %out, i32 %in) {
; GFX9: global_load_dword [[LD:v[0-9]+]]
; GFX9-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX9-NEXT: ds_write_b32 [[LD]]
-define amdgpu_kernel void @test_waitcnt_type_flat_global(i32 addrspace(1)* %in) {
- %val = load volatile i32, i32 addrspace(1)* %in
- store volatile i32 %val, i32 addrspace(3)* undef
+define amdgpu_kernel void @test_waitcnt_type_flat_global(ptr addrspace(1) %in) {
+ %val = load volatile i32, ptr addrspace(1) %in
+ store volatile i32 %val, ptr addrspace(3) undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
index 8a0d65ebb1743..b32ce6eb0acc0 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
@@ -15,113 +15,111 @@
@data_generic = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4
@data_reference = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4
-define amdgpu_kernel void @testKernel(i32 addrspace(1)* nocapture %arg) local_unnamed_addr #0 {
+define amdgpu_kernel void @testKernel(ptr addrspace(1) nocapture %arg) local_unnamed_addr #0 {
bb:
- store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4
- store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4
+ store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr bitcast (ptr getelementptr ([100 x float], ptr addrspacecast ([100 x float] addrspace(1)* @data_generic to ptr), i64 0, i64 4) to ptr), align 4
+ store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr bitcast (ptr getelementptr ([100 x float], ptr addrspacecast ([100 x float] addrspace(1)* @data_reference to ptr), i64 0, i64 4) to ptr), align 4
br label %bb18
bb1: ; preds = %bb18
- %tmp = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+ %tmp = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp3 = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %tmp4 = getelementptr inbounds i8, i8 addrspace(4)* %tmp, i64 4
- %tmp5 = bitcast i8 addrspace(4)* %tmp4 to i16 addrspace(4)*
- %tmp6 = load i16, i16 addrspace(4)* %tmp5, align 4
+ %tmp4 = getelementptr inbounds i8, ptr addrspace(4) %tmp, i64 4
+ %tmp6 = load i16, ptr addrspace(4) %tmp4, align 4
%tmp7 = zext i16 %tmp6 to i32
%tmp8 = mul i32 %tmp3, %tmp7
%tmp9 = add i32 %tmp8, %tmp2
- %tmp10 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %tmp10 = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%tmp11 = zext i32 %tmp9 to i64
- %tmp12 = bitcast i8 addrspace(4)* %tmp10 to i64 addrspace(4)*
- %tmp13 = load i64, i64 addrspace(4)* %tmp12, align 8
+ %tmp13 = load i64, ptr addrspace(4) %tmp10, align 8
%tmp14 = add i64 %tmp13, %tmp11
%tmp15 = zext i1 %tmp99 to i32
%tmp16 = and i64 %tmp14, 4294967295
- %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16
- store i32 %tmp15, i32 addrspace(1)* %tmp17, align 4
+ %tmp17 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp16
+ store i32 %tmp15, ptr addrspace(1) %tmp17, align 4
ret void
bb18: ; preds = %bb18, %bb
%tmp19 = phi i64 [ 0, %bb ], [ %tmp102, %bb18 ]
%tmp20 = phi i32 [ 0, %bb ], [ %tmp100, %bb18 ]
%tmp21 = phi i1 [ true, %bb ], [ %tmp99, %bb18 ]
- %tmp22 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp19
- %tmp23 = load float, float addrspace(1)* %tmp22, align 4
- %tmp24 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp19
- %tmp25 = load float, float addrspace(1)* %tmp24, align 4
+ %tmp22 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp19
+ %tmp23 = load float, ptr addrspace(1) %tmp22, align 4
+ %tmp24 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp19
+ %tmp25 = load float, ptr addrspace(1) %tmp24, align 4
%tmp26 = fcmp oeq float %tmp23, %tmp25
%tmp27 = and i1 %tmp21, %tmp26
%tmp28 = or i32 %tmp20, 1
%tmp29 = sext i32 %tmp28 to i64
- %tmp30 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp29
- %tmp31 = load float, float addrspace(1)* %tmp30, align 4
- %tmp32 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp29
- %tmp33 = load float, float addrspace(1)* %tmp32, align 4
+ %tmp30 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp29
+ %tmp31 = load float, ptr addrspace(1) %tmp30, align 4
+ %tmp32 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp29
+ %tmp33 = load float, ptr addrspace(1) %tmp32, align 4
%tmp34 = fcmp oeq float %tmp31, %tmp33
%tmp35 = and i1 %tmp27, %tmp34
%tmp36 = add nuw nsw i32 %tmp20, 2
%tmp37 = sext i32 %tmp36 to i64
- %tmp38 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp37
- %tmp39 = load float, float addrspace(1)* %tmp38, align 4
- %tmp40 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp37
- %tmp41 = load float, float addrspace(1)* %tmp40, align 4
+ %tmp38 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp37
+ %tmp39 = load float, ptr addrspace(1) %tmp38, align 4
+ %tmp40 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp37
+ %tmp41 = load float, ptr addrspace(1) %tmp40, align 4
%tmp42 = fcmp oeq float %tmp39, %tmp41
%tmp43 = and i1 %tmp35, %tmp42
%tmp44 = add nuw nsw i32 %tmp20, 3
%tmp45 = sext i32 %tmp44 to i64
- %tmp46 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp45
- %tmp47 = load float, float addrspace(1)* %tmp46, align 4
- %tmp48 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp45
- %tmp49 = load float, float addrspace(1)* %tmp48, align 4
+ %tmp46 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp45
+ %tmp47 = load float, ptr addrspace(1) %tmp46, align 4
+ %tmp48 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp45
+ %tmp49 = load float, ptr addrspace(1) %tmp48, align 4
%tmp50 = fcmp oeq float %tmp47, %tmp49
%tmp51 = and i1 %tmp43, %tmp50
%tmp52 = add nuw nsw i32 %tmp20, 4
%tmp53 = sext i32 %tmp52 to i64
- %tmp54 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp53
- %tmp55 = load float, float addrspace(1)* %tmp54, align 4
- %tmp56 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp53
- %tmp57 = load float, float addrspace(1)* %tmp56, align 4
+ %tmp54 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp53
+ %tmp55 = load float, ptr addrspace(1) %tmp54, align 4
+ %tmp56 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp53
+ %tmp57 = load float, ptr addrspace(1) %tmp56, align 4
%tmp58 = fcmp oeq float %tmp55, %tmp57
%tmp59 = and i1 %tmp51, %tmp58
%tmp60 = add nuw nsw i32 %tmp20, 5
%tmp61 = sext i32 %tmp60 to i64
- %tmp62 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp61
- %tmp63 = load float, float addrspace(1)* %tmp62, align 4
- %tmp64 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp61
- %tmp65 = load float, float addrspace(1)* %tmp64, align 4
+ %tmp62 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp61
+ %tmp63 = load float, ptr addrspace(1) %tmp62, align 4
+ %tmp64 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp61
+ %tmp65 = load float, ptr addrspace(1) %tmp64, align 4
%tmp66 = fcmp oeq float %tmp63, %tmp65
%tmp67 = and i1 %tmp59, %tmp66
%tmp68 = add nuw nsw i32 %tmp20, 6
%tmp69 = sext i32 %tmp68 to i64
- %tmp70 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp69
- %tmp71 = load float, float addrspace(1)* %tmp70, align 4
- %tmp72 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp69
- %tmp73 = load float, float addrspace(1)* %tmp72, align 4
+ %tmp70 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp69
+ %tmp71 = load float, ptr addrspace(1) %tmp70, align 4
+ %tmp72 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp69
+ %tmp73 = load float, ptr addrspace(1) %tmp72, align 4
%tmp74 = fcmp oeq float %tmp71, %tmp73
%tmp75 = and i1 %tmp67, %tmp74
%tmp76 = add nuw nsw i32 %tmp20, 7
%tmp77 = sext i32 %tmp76 to i64
- %tmp78 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp77
- %tmp79 = load float, float addrspace(1)* %tmp78, align 4
- %tmp80 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp77
- %tmp81 = load float, float addrspace(1)* %tmp80, align 4
+ %tmp78 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp77
+ %tmp79 = load float, ptr addrspace(1) %tmp78, align 4
+ %tmp80 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp77
+ %tmp81 = load float, ptr addrspace(1) %tmp80, align 4
%tmp82 = fcmp oeq float %tmp79, %tmp81
%tmp83 = and i1 %tmp75, %tmp82
%tmp84 = add nuw nsw i32 %tmp20, 8
%tmp85 = sext i32 %tmp84 to i64
- %tmp86 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp85
- %tmp87 = load float, float addrspace(1)* %tmp86, align 4
- %tmp88 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp85
- %tmp89 = load float, float addrspace(1)* %tmp88, align 4
+ %tmp86 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp85
+ %tmp87 = load float, ptr addrspace(1) %tmp86, align 4
+ %tmp88 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp85
+ %tmp89 = load float, ptr addrspace(1) %tmp88, align 4
%tmp90 = fcmp oeq float %tmp87, %tmp89
%tmp91 = and i1 %tmp83, %tmp90
%tmp92 = add nuw nsw i32 %tmp20, 9
%tmp93 = sext i32 %tmp92 to i64
- %tmp94 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp93
- %tmp95 = load float, float addrspace(1)* %tmp94, align 4
- %tmp96 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp93
- %tmp97 = load float, float addrspace(1)* %tmp96, align 4
+ %tmp94 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_generic, i64 0, i64 %tmp93
+ %tmp95 = load float, ptr addrspace(1) %tmp94, align 4
+ %tmp96 = getelementptr inbounds [100 x float], ptr addrspace(1) @data_reference, i64 0, i64 %tmp93
+ %tmp97 = load float, ptr addrspace(1) %tmp96, align 4
%tmp98 = fcmp oeq float %tmp95, %tmp97
%tmp99 = and i1 %tmp91, %tmp98
%tmp100 = add nuw nsw i32 %tmp20, 10
@@ -131,7 +129,7 @@ bb18: ; preds = %bb18, %bb
}
; Function Attrs: nounwind readnone speculatable
-declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() #1
@@ -140,7 +138,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
declare i32 @llvm.amdgcn.workgroup.id.x() #1
; Function Attrs: nounwind readnone speculatable
-declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" }
attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
index f1af64eb9b39d..ecf6dd1eb17ed 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
@@ -9,20 +9,20 @@
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX9PLUS: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: s_barrier
-define amdgpu_kernel void @barrier_vmcnt_global(i32 addrspace(1)* %arg) {
+define amdgpu_kernel void @barrier_vmcnt_global(ptr addrspace(1) %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
- %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp1
- %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp1
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
fence syncscope("singlethread") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("singlethread") acquire
%tmp5 = add nuw nsw i64 %tmp2, 4294967296
%tmp6 = lshr exact i64 %tmp5, 32
- %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp6
- store i32 %tmp4, i32 addrspace(1)* %tmp7, align 4
+ %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp6
+ store i32 %tmp4, ptr addrspace(1) %tmp7, align 4
ret void
}
@@ -33,22 +33,22 @@ bb:
; GFX9: s_waitcnt vmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
-define amdgpu_kernel void @barrier_vscnt_global(i32 addrspace(1)* %arg) {
+define amdgpu_kernel void @barrier_vscnt_global(ptr addrspace(1) %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
%tmp3 = add nuw nsw i64 %tmp2, 8589934592
%tmp4 = lshr exact i64 %tmp3, 32
- %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4
- store i32 0, i32 addrspace(1)* %tmp5, align 4
+ %tmp5 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp4
+ store i32 0, ptr addrspace(1) %tmp5, align 4
fence syncscope("singlethread") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("singlethread") acquire
%tmp6 = add nuw nsw i64 %tmp2, 4294967296
%tmp7 = lshr exact i64 %tmp6, 32
- %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp7
- store i32 1, i32 addrspace(1)* %tmp8, align 4
+ %tmp8 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp7
+ store i32 1, ptr addrspace(1) %tmp8, align 4
ret void
}
@@ -59,24 +59,24 @@ bb:
; GFX9PLUS: s_waitcnt vmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
-define amdgpu_kernel void @barrier_vmcnt_vscnt_global(i32 addrspace(1)* %arg) {
+define amdgpu_kernel void @barrier_vmcnt_vscnt_global(ptr addrspace(1) %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
%tmp3 = add nuw nsw i64 %tmp2, 8589934592
%tmp4 = lshr exact i64 %tmp3, 32
- %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4
- store i32 0, i32 addrspace(1)* %tmp5, align 4
- %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp1
- %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
+ %tmp5 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp4
+ store i32 0, ptr addrspace(1) %tmp5, align 4
+ %tmp6 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp1
+ %tmp7 = load i32, ptr addrspace(1) %tmp6, align 4
fence syncscope("singlethread") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("singlethread") acquire
%tmp8 = add nuw nsw i64 %tmp2, 4294967296
%tmp9 = lshr exact i64 %tmp8, 32
- %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp9
- store i32 %tmp7, i32 addrspace(1)* %tmp10, align 4
+ %tmp10 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp9
+ store i32 %tmp7, ptr addrspace(1) %tmp10, align 4
ret void
}
@@ -84,20 +84,20 @@ bb:
; GCN: flat_load_{{dword|b32}}
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: s_barrier
-define amdgpu_kernel void @barrier_vmcnt_flat(i32* %arg) {
+define amdgpu_kernel void @barrier_vmcnt_flat(ptr %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
- %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp1
- %tmp4 = load i32, i32* %tmp3, align 4
+ %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp1
+ %tmp4 = load i32, ptr %tmp3, align 4
fence syncscope("singlethread") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("singlethread") acquire
%tmp5 = add nuw nsw i64 %tmp2, 4294967296
%tmp6 = lshr exact i64 %tmp5, 32
- %tmp7 = getelementptr inbounds i32, i32* %arg, i64 %tmp6
- store i32 %tmp4, i32* %tmp7, align 4
+ %tmp7 = getelementptr inbounds i32, ptr %arg, i64 %tmp6
+ store i32 %tmp4, ptr %tmp7, align 4
ret void
}
@@ -107,22 +107,22 @@ bb:
; GFX10PLUS: s_waitcnt lgkmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
-define amdgpu_kernel void @barrier_vscnt_flat(i32* %arg) {
+define amdgpu_kernel void @barrier_vscnt_flat(ptr %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
%tmp3 = add nuw nsw i64 %tmp2, 8589934592
%tmp4 = lshr exact i64 %tmp3, 32
- %tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4
- store i32 0, i32* %tmp5, align 4
+ %tmp5 = getelementptr inbounds i32, ptr %arg, i64 %tmp4
+ store i32 0, ptr %tmp5, align 4
fence syncscope("singlethread") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("singlethread") acquire
%tmp6 = add nuw nsw i64 %tmp2, 4294967296
%tmp7 = lshr exact i64 %tmp6, 32
- %tmp8 = getelementptr inbounds i32, i32* %arg, i64 %tmp7
- store i32 1, i32* %tmp8, align 4
+ %tmp8 = getelementptr inbounds i32, ptr %arg, i64 %tmp7
+ store i32 1, ptr %tmp8, align 4
ret void
}
@@ -131,24 +131,24 @@ bb:
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
-define amdgpu_kernel void @barrier_vmcnt_vscnt_flat(i32* %arg) {
+define amdgpu_kernel void @barrier_vmcnt_vscnt_flat(ptr %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
%tmp3 = add nuw nsw i64 %tmp2, 8589934592
%tmp4 = lshr exact i64 %tmp3, 32
- %tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4
- store i32 0, i32* %tmp5, align 4
- %tmp6 = getelementptr inbounds i32, i32* %arg, i64 %tmp1
- %tmp7 = load i32, i32* %tmp6, align 4
+ %tmp5 = getelementptr inbounds i32, ptr %arg, i64 %tmp4
+ store i32 0, ptr %tmp5, align 4
+ %tmp6 = getelementptr inbounds i32, ptr %arg, i64 %tmp1
+ %tmp7 = load i32, ptr %tmp6, align 4
fence syncscope("singlethread") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("singlethread") acquire
%tmp8 = add nuw nsw i64 %tmp2, 4294967296
%tmp9 = lshr exact i64 %tmp8, 32
- %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %tmp9
- store i32 %tmp7, i32* %tmp10, align 4
+ %tmp10 = getelementptr inbounds i32, ptr %arg, i64 %tmp9
+ store i32 %tmp7, ptr %tmp10, align 4
ret void
}
@@ -159,24 +159,24 @@ bb:
; GFX10PLUS: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_barrier
-define amdgpu_kernel void @barrier_vmcnt_vscnt_flat_workgroup(i32* %arg) {
+define amdgpu_kernel void @barrier_vmcnt_vscnt_flat_workgroup(ptr %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
%tmp3 = add nuw nsw i64 %tmp2, 8589934592
%tmp4 = lshr exact i64 %tmp3, 32
- %tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4
- store i32 0, i32* %tmp5, align 4
- %tmp6 = getelementptr inbounds i32, i32* %arg, i64 %tmp1
- %tmp7 = load i32, i32* %tmp6, align 4
+ %tmp5 = getelementptr inbounds i32, ptr %arg, i64 %tmp4
+ store i32 0, ptr %tmp5, align 4
+ %tmp6 = getelementptr inbounds i32, ptr %arg, i64 %tmp1
+ %tmp7 = load i32, ptr %tmp6, align 4
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%tmp8 = add nuw nsw i64 %tmp2, 4294967296
%tmp9 = lshr exact i64 %tmp8, 32
- %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %tmp9
- store i32 %tmp7, i32* %tmp10, align 4
+ %tmp10 = getelementptr inbounds i32, ptr %arg, i64 %tmp9
+ store i32 %tmp7, ptr %tmp10, align 4
ret void
}
@@ -186,17 +186,17 @@ bb:
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX9PLUS: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: {{global|flat}}_store_{{dword|b32}}
-define amdgpu_kernel void @load_vmcnt_global(i32 addrspace(1)* %arg) {
+define amdgpu_kernel void @load_vmcnt_global(ptr addrspace(1) %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
- %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp1
- %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp1
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
%tmp5 = add nuw nsw i64 %tmp2, 4294967296
%tmp6 = lshr exact i64 %tmp5, 32
- %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp6
- store i32 %tmp4, i32 addrspace(1)* %tmp7, align 4
+ %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp6
+ store i32 %tmp4, ptr addrspace(1) %tmp7, align 4
ret void
}
@@ -205,17 +205,17 @@ bb:
; GCN-NOT: vscnt
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: {{global|flat}}_store_{{dword|b32}}
-define amdgpu_kernel void @load_vmcnt_flat(i32* %arg) {
+define amdgpu_kernel void @load_vmcnt_flat(ptr %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = shl nuw nsw i64 %tmp1, 32
- %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp1
- %tmp4 = load i32, i32* %tmp3, align 4
+ %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp1
+ %tmp4 = load i32, ptr %tmp3, align 4
%tmp5 = add nuw nsw i64 %tmp2, 4294967296
%tmp6 = lshr exact i64 %tmp5, 32
- %tmp7 = getelementptr inbounds i32, i32* %arg, i64 %tmp6
- store i32 %tmp4, i32* %tmp7, align 4
+ %tmp7 = getelementptr inbounds i32, ptr %arg, i64 %tmp6
+ store i32 %tmp4, ptr %tmp7, align 4
ret void
}
@@ -224,8 +224,8 @@ bb:
; GFX8_9: s_waitcnt vmcnt(0)
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
-define void @store_vscnt_private(i32 addrspace(5)* %p) {
- store i32 0, i32 addrspace(5)* %p
+define void @store_vscnt_private(ptr addrspace(5) %p) {
+ store i32 0, ptr addrspace(5) %p
ret void
}
@@ -235,8 +235,8 @@ define void @store_vscnt_private(i32 addrspace(5)* %p) {
; GFX8_9: s_waitcnt vmcnt(0)
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
-define void @store_vscnt_global(i32 addrspace(1)* %p) {
- store i32 0, i32 addrspace(1)* %p
+define void @store_vscnt_global(ptr addrspace(1) %p) {
+ store i32 0, ptr addrspace(1) %p
ret void
}
@@ -246,8 +246,8 @@ define void @store_vscnt_global(i32 addrspace(1)* %p) {
; GFX10PLUS: s_waitcnt lgkmcnt(0){{$}}
; GFX10PLUS: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: s_setpc_b64
-define void @store_vscnt_flat(i32* %p) {
- store i32 0, i32* %p
+define void @store_vscnt_flat(ptr %p) {
+ store i32 0, ptr %p
ret void
}
More information about the llvm-commits
mailing list