[llvm] r295792 - AMDGPU: Remove some uses of llvm.SI.export in tests
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 21 16:02:22 PST 2017
Author: arsenm
Date: Tue Feb 21 18:02:21 2017
New Revision: 295792
URL: http://llvm.org/viewvc/llvm-project?rev=295792&view=rev
Log:
AMDGPU: Remove some uses of llvm.SI.export in tests
Merge some of the old, smaller tests into more complete versions.
Removed:
llvm/trunk/test/CodeGen/AMDGPU/lshl.ll
llvm/trunk/test/CodeGen/AMDGPU/lshr.ll
llvm/trunk/test/CodeGen/AMDGPU/mulhu.ll
llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll
llvm/trunk/test/CodeGen/AMDGPU/urecip.ll
Modified:
llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll
llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll
llvm/trunk/test/CodeGen/AMDGPU/elf.ll
llvm/trunk/test/CodeGen/AMDGPU/imm.ll
llvm/trunk/test/CodeGen/AMDGPU/insert-waits-exp.mir
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
llvm/trunk/test/CodeGen/AMDGPU/ret.ll
llvm/trunk/test/CodeGen/AMDGPU/seto.ll
llvm/trunk/test/CodeGen/AMDGPU/setuo.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/shl.ll
llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll
llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll
llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll
llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
llvm/trunk/test/CodeGen/AMDGPU/udiv.ll
llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
llvm/trunk/test/CodeGen/AMDGPU/wait.ll
llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
llvm/trunk/test/Transforms/StructurizeCFG/rebuild-ssa-infinite-loop.ll
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdgcn.bitcast.ll Tue Feb 21 18:02:21 2017
@@ -3,19 +3,15 @@
; This test just checks that the compiler doesn't crash.
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
-; SI: s_endpgm
-define amdgpu_ps void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
+define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
entry:
%1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
%2 = bitcast <32 x i8> %1 to <8 x i32>
%3 = extractelement <8 x i32> %2, i32 1
%4 = icmp ne i32 %3, 0
%5 = select i1 %4, float 0.0, float 1.0
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
- ret void
+ ret float %5
}
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
Modified: llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll Tue Feb 21 18:02:21 2017
@@ -4,7 +4,7 @@
; GCN-LABEL: {{^}}main:
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
-define amdgpu_ps void @main(float %arg0, float %arg1) #0 {
+define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
bb:
%tmp = fptosi float %arg0 to i32
%tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -17,13 +17,11 @@ bb:
%tmp7 = select i1 %tmp6, float 0.000000e+00, float %arg1
%tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
%tmp9 = bitcast i32 %tmp8 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
- ret void
+ ret float %tmp9
}
declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare i32 @llvm.SI.packf16(float, float) #1
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/default-fp-mode.ll Tue Feb 21 18:02:21 2017
@@ -97,18 +97,15 @@ main_body:
; GCN-LABEL: {{^}}kill_vcc_implicit_def:
; GCN: IeeeMode: 0
-define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
+define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
entry:
%tmp0 = fcmp olt float %13, 0.0
call void @llvm.AMDGPU.kill(float %14)
%tmp1 = select i1 %tmp0, float 1.0, float 0.0
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
- ret void
+ ret float %tmp1
}
-
declare void @llvm.AMDGPU.kill(float)
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind "target-cpu"="tahiti" }
attributes #1 = { nounwind "target-cpu"="fiji" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/elf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/elf.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/elf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/elf.ll Tue Feb 21 18:02:21 2017
@@ -24,11 +24,13 @@
; TONGA-NEXT: .long 704
; CONFIG: .p2align 8
; CONFIG: test:
-define amdgpu_ps void @test(i32 %p) {
+define amdgpu_ps void @test(i32 %p) #0 {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r, float %r, float %r, float %r, i1 true, i1 false)
ret void
}
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+
+attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/imm.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/imm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/imm.ll Tue Feb 21 18:02:21 2017
@@ -667,3 +667,18 @@ define void @store_literal_imm_f64(doubl
store double 4096.0, double addrspace(1)* %out
ret void
}
+
+; GCN-LABEL: {{^}}literal_folding:
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
+define amdgpu_vs void @literal_folding(float %arg) {
+main_body:
+ %tmp = fmul float %arg, 0x3FE86A7F00000000
+ %tmp1 = fmul float %arg, 0xBFE86A7F00000000
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0
+ ret void
+}
+
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+
+attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert-waits-exp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert-waits-exp.mir?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert-waits-exp.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert-waits-exp.mir Tue Feb 21 18:02:21 2017
@@ -1,18 +1,18 @@
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
--- |
- define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
+ define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x
+ i32> inreg, i32 inreg %w, float %v) #0 {
%a = load volatile float, float addrspace(1)* undef
%b = load volatile float, float addrspace(1)* undef
%c = load volatile float, float addrspace(1)* undef
%d = load volatile float, float addrspace(1)* undef
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %a, float %b, float %c, float %d)
+ call void @llvm.amdgcn.exp.f32(i32 15, i32 1, float %a, float %b, float %c, float %d, i1 true, i1 false)
ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
}
- declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
- attributes #0 = { readnone }
- attributes #1 = { nounwind }
+ attributes #0 = { nounwind }
...
---
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll Tue Feb 21 18:02:21 2017
@@ -4,15 +4,14 @@
; SI-LABEL: {{^}}kill_gs_const:
; SI-NOT: v_cmpx_le_f32
; SI: s_mov_b64 exec, 0
-
define amdgpu_gs void @kill_gs_const() {
main_body:
- %0 = icmp ule i32 0, 3
- %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
- call void @llvm.AMDGPU.kill(float %1)
- %2 = icmp ule i32 3, 0
- %3 = select i1 %2, float 1.000000e+00, float -1.000000e+00
- call void @llvm.AMDGPU.kill(float %3)
+ %tmp = icmp ule i32 0, 3
+ %tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
+ call void @llvm.AMDGPU.kill(float %tmp1)
+ %tmp2 = icmp ule i32 3, 0
+ %tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00
+ call void @llvm.AMDGPU.kill(float %tmp3)
ret void
}
@@ -21,16 +20,16 @@ main_body:
; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
-define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
+define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
entry:
- %tmp0 = fcmp olt float %13, 0.0
- call void @llvm.AMDGPU.kill(float %14)
- %tmp1 = select i1 %tmp0, float 1.0, float 0.0
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
+ %tmp0 = fcmp olt float %arg13, 0.000000e+00
+ call void @llvm.AMDGPU.kill(float %arg14)
+ %tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
+ call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
ret void
}
-declare void @llvm.AMDGPU.kill(float)
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare void @llvm.AMDGPU.kill(float) #0
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-!0 = !{!"const", null, i32 1}
+attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll Tue Feb 21 18:02:21 2017
@@ -1,146 +1,144 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=CHECK,VI %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
-;CHECK-LABEL: {{^}}image_load_v4i32:
-;CHECK: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
-;CHECK: s_waitcnt vmcnt(0)
-define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
+; GCN-LABEL: {{^}}image_load_v4i32:
+; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(0)
+define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret <4 x float> %tex
}
-;CHECK-LABEL: {{^}}image_load_v2i32:
-;CHECK: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
-;CHECK: s_waitcnt vmcnt(0)
-define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
+; GCN-LABEL: {{^}}image_load_v2i32:
+; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(0)
+define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret <4 x float> %tex
}
-;CHECK-LABEL: {{^}}image_load_i32:
-;CHECK: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
-;CHECK: s_waitcnt vmcnt(0)
-define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) {
+; GCN-LABEL: {{^}}image_load_i32:
+; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(0)
+define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret <4 x float> %tex
}
-;CHECK-LABEL: {{^}}image_load_mip:
-;CHECK: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
-;CHECK: s_waitcnt vmcnt(0)
-define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) {
+; GCN-LABEL: {{^}}image_load_mip:
+; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(0)
+define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret <4 x float> %tex
}
-;CHECK-LABEL: {{^}}image_load_1:
-;CHECK: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
-;CHECK: s_waitcnt vmcnt(0)
-define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
+; GCN-LABEL: {{^}}image_load_1:
+; GCN: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
+; GCN: s_waitcnt vmcnt(0)
+define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
%elt = extractelement <4 x float> %tex, i32 0
-; Only first component used, test that dmask etc. is changed accordingly
ret float %elt
}
-;CHECK-LABEL: {{^}}image_load_f32_v2i32:
-;CHECK: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
-;CHECK: s_waitcnt vmcnt(0)
-define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
+; GCN-LABEL: {{^}}image_load_f32_v2i32:
+; GCN: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
+; GCN: s_waitcnt vmcnt(0)
+define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
main_body:
- %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
+ %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
ret float %tex
}
-;CHECK-LABEL: {{^}}image_load_v2f32_v4i32:
-;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
-;CHECK: s_waitcnt vmcnt(0)
-define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
+; GCN-LABEL: {{^}}image_load_v2f32_v4i32:
+; GCN: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
+; GCN: s_waitcnt vmcnt(0)
+define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
main_body:
- %tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
+ %tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
ret <2 x float> %tex
}
-
-;CHECK-LABEL: {{^}}image_store_v4i32:
-;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
-define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
+; GCN-LABEL: {{^}}image_store_v4i32:
+; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
+define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
main_body:
- call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
-;CHECK-LABEL: {{^}}image_store_v2i32:
-;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
-define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) {
+; GCN-LABEL: {{^}}image_store_v2i32:
+; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 {
main_body:
- call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
-;CHECK-LABEL: {{^}}image_store_i32:
-;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
-define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) {
+; GCN-LABEL: {{^}}image_store_i32:
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 {
main_body:
- call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
-;CHECK-LABEL: {{^}}image_store_f32_i32:
-;CHECK: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
-define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) {
+; GCN-LABEL: {{^}}image_store_f32_i32:
+; GCN: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
+define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) #0 {
main_body:
- call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 0, i1 0, i1 0, i1 0)
+ call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
ret void
}
-;CHECK-LABEL: {{^}}image_store_v2f32_v4i32:
-;CHECK: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
-define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) {
+; GCN-LABEL: {{^}}image_store_v2f32_v4i32:
+; GCN: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
+define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) #0 {
main_body:
- call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 0, i1 0, i1 0, i1 0)
+ call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
ret void
}
-;CHECK-LABEL: {{^}}image_store_mip:
-;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
-define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
+; GCN-LABEL: {{^}}image_store_mip:
+; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
+define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
main_body:
- call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
-;CHECK-LABEL: {{^}}getresinfo:
-;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
-define amdgpu_ps void @getresinfo() {
+; GCN-LABEL: {{^}}getresinfo:
+; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
+define amdgpu_ps void @getresinfo() #0 {
main_body:
- %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
+ %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
%r0 = extractelement <4 x float> %r, i32 0
%r1 = extractelement <4 x float> %r, i32 1
%r2 = extractelement <4 x float> %r, i32 2
%r3 = extractelement <4 x float> %r, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %r0, float %r1, float %r2, float %r3)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0
ret void
}
; Ideally, the register allocator would avoid the wait here
;
-;CHECK-LABEL: {{^}}image_store_wait:
-;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
-;CHECK: s_waitcnt vmcnt(0) expcnt(0)
-;CHECK: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
-;CHECK: s_waitcnt vmcnt(0)
-;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
-define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) {
-main_body:
- call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0)
- %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0)
- call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %4, <8 x i32> %2, i32 15, i1 0, i1 0, i1 0, i1 0)
+; GCN-LABEL: {{^}}image_store_wait:
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(0) expcnt(0)
+; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(0)
+; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
+define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
+main_body:
+ call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false)
+ %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %arg1, i32 15, i1 false, i1 false, i1 false, i1 false)
+ call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %arg2, i32 15, i1 false, i1 false, i1 false, i1 false)
ret void
}
@@ -149,21 +147,22 @@ main_body:
; VI-LABEL: image_load_mmo
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
-define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) {
- store float 0.0, float addrspace(3)* %lds
- %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) #0 {
+bb:
+ store float 0.000000e+00, float addrspace(3)* %lds
+ %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
- store float 0.0, float addrspace(3)* %tmp2
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tex, float %tex, float %tex, float %tex)
+ store float 0.000000e+00, float addrspace(3)* %tmp2
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tex, float %tex, float %tex, float %tex, i1 true, i1 true) #0
ret void
}
declare float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.image.store.f32.i32.v8i32(float, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
-declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
+declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
@@ -173,10 +172,9 @@ declare <4 x float> @llvm.amdgcn.image.l
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #0
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll Tue Feb 21 18:02:21 2017
@@ -3,7 +3,6 @@
; RUN: llc -march=amdgcn -mcpu=kabini -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s
; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s
-
; GCN-LABEL: {{^}}v_interp:
; GCN-NOT: s_wqm
; GCN: s_mov_b32 m0, s{{[0-9]+}}
@@ -11,17 +10,17 @@
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
-define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) {
+define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 {
main_body:
- %i = extractelement <2 x float> %4, i32 0
- %j = extractelement <2 x float> %4, i32 1
- %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3)
- %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3)
- %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3)
- %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3)
- %const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %3)
+ %i = extractelement <2 x float> %arg4, i32 0
+ %j = extractelement <2 x float> %arg4, i32 1
+ %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %arg3)
+ %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %arg3)
+ %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %arg3)
+ %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %arg3)
+ %const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg3)
%w = fadd float %p1_1, %const
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %w)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_0, float %p1_1, float %w, i1 true, i1 true) #0
ret void
}
@@ -40,7 +39,8 @@ main_body:
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.w{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.w{{$}}
; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
-define amdgpu_ps void @v_interp_p1(float %i) {
+define amdgpu_ps void @v_interp_p1(float %i) #0 {
+bb:
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256)
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256)
%p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256)
@@ -80,7 +80,8 @@ define amdgpu_ps void @v_interp_p1(float
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.x{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
-define amdgpu_ps void @v_interp_p2(float %x, float %j) {
+define amdgpu_ps void @v_interp_p2(float %x, float %j) #0 {
+bb:
%p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256)
%p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256)
%p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256)
@@ -121,7 +122,8 @@ define amdgpu_ps void @v_interp_p2(float
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr64.y{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, attr64.y{{$}}
; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_10, attr64.x{{$}}
-define amdgpu_ps void @v_interp_mov(float %x, float %j) {
+define amdgpu_ps void @v_interp_mov(float %x, float %j) #0 {
+bb:
%mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256)
%mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256)
%mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256)
@@ -164,12 +166,13 @@ define amdgpu_ps void @v_interp_mov(floa
; VI-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}}
; VI: s_mov_b32 m0, -1{{$}}
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
-define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
- store float 0.0, float addrspace(3)* %lds
+define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) #0 {
+bb:
+ store float 0.000000e+00, float addrspace(3)* %lds
%tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0)
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
- store float 0.0, float addrspace(3)* %tmp2
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
+ store float 0.000000e+00, float addrspace(3)* %tmp2
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
ret void
}
@@ -178,43 +181,44 @@ define amdgpu_ps void @v_interp_readnone
; GCN-LABEL: {{^}}v_interp_p1_bank16_bug:
; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
-define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) {
+define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 {
main_body:
%i.i = extractelement <2 x i32> %arg19, i32 0
%j.i = extractelement <2 x i32> %arg19, i32 1
%i.f.i = bitcast i32 %i.i to float
%j.f.i = bitcast i32 %j.i to float
- %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #1
- %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #1
+ %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #0
+ %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #0
%i.i7 = extractelement <2 x i32> %arg19, i32 0
%j.i8 = extractelement <2 x i32> %arg19, i32 1
%i.f.i9 = bitcast i32 %i.i7 to float
%j.f.i10 = bitcast i32 %j.i8 to float
- %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #1
- %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #1
+ %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #0
+ %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #0
%i.i1 = extractelement <2 x i32> %arg19, i32 0
%j.i2 = extractelement <2 x i32> %arg19, i32 1
%i.f.i3 = bitcast i32 %i.i1 to float
%j.f.i4 = bitcast i32 %j.i2 to float
- %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #1
- %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #1
+ %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #0
+ %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #0
%tmp = call float @llvm.fabs.f32(float %p2.i)
%tmp34 = call float @llvm.fabs.f32(float %p2.i12)
%tmp35 = call float @llvm.fabs.f32(float %p2.i6)
%tmp36 = call i32 @llvm.SI.packf16(float %tmp, float %tmp34)
- %tmp37 = bitcast i32 %tmp36 to float
+ %tmp37 = bitcast i32 %tmp36 to <2 x half>
%tmp38 = call i32 @llvm.SI.packf16(float %tmp35, float 1.000000e+00)
- %tmp39 = bitcast i32 %tmp38 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp37, float %tmp39, float %tmp37, float %tmp39)
+ %tmp39 = bitcast i32 %tmp38 to <2 x half>
+ call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp37, <2 x half> %tmp39, i1 true, i1 true) #0
ret void
}
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
-declare i32 @llvm.SI.packf16(float, float) #0
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare float @llvm.fabs.f32(float) #1
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
+declare i32 @llvm.SI.packf16(float, float) #1
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll Tue Feb 21 18:02:21 2017
@@ -1,24 +1,22 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}mbcnt_intrinsics:
; GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
; VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
-
-define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
+define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) {
main_body:
- %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
- %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
- %4 = bitcast i32 %hi to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4)
+ %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+ %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #0
+ %tmp = bitcast i32 %hi to float
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp, float %tmp, float %tmp, float %tmp, i1 true, i1 true) #1
ret void
}
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
Removed: llvm/trunk/test/CodeGen/AMDGPU/lshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lshl.ll?rev=295791&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lshl.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/lshl.ll (removed)
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
-
-;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
-
-define void @test(i32 %p) {
- %i = mul i32 %p, 2
- %r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/AMDGPU/lshr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lshr.ll?rev=295791&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lshr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/lshr.ll (removed)
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
-
-;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
-
-define void @test(i32 %p) {
- %i = udiv i32 %p, 2
- %r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/AMDGPU/mulhu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mulhu.ll?rev=295791&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mulhu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mulhu.ll (removed)
@@ -1,17 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
-;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
-;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-
-define void @test(i32 %p) {
- %i = udiv i32 %p, 3
- %r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Modified: llvm/trunk/test/CodeGen/AMDGPU/ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ret.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ret.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ret.ll Tue Feb 21 18:02:21 2017
@@ -1,25 +1,24 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
; GCN-LABEL: {{^}}vgpr:
; GCN: v_mov_b32_e32 v1, v0
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
-; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
+; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
-define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
- %x = fadd float %3, 1.0
- %a = insertvalue {float, float} undef, float %x, 0
- %b = insertvalue {float, float} %a, float %3, 1
- ret {float, float} %b
+define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+bb:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
+ %x = fadd float %arg3, 1.000000e+00
+ %a = insertvalue { float, float } undef, float %x, 0
+ %b = insertvalue { float, float } %a, float %arg3, 1
+ ret { float, float } %b
}
; GCN-LABEL: {{^}}vgpr_literal:
; GCN: v_mov_b32_e32 v4, v0
-; GCN: exp mrt0 v4, v4, v4, v4 done compr vm
+; GCN: exp mrt0 v4, v4, v4, v4 done vm
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: v_mov_b32_e32 v1, 2.0
@@ -27,12 +26,12 @@ define amdgpu_vs {float, float} @vgpr([9
; GCN-DAG: v_mov_b32_e32 v3, -1.0
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
-define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
- ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
+define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+bb:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
+ ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
}
-
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
@@ -44,24 +43,24 @@ define amdgpu_vs {float, float, float, f
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v6
; GCN-NOT: s_endpgm
-define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
- %i0 = extractelement <2 x i32> %4, i32 0
- %i1 = extractelement <2 x i32> %4, i32 1
- %i2 = extractelement <2 x i32> %7, i32 0
- %i3 = extractelement <2 x i32> %8, i32 0
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+bb:
+ %i0 = extractelement <2 x i32> %arg4, i32 0
+ %i1 = extractelement <2 x i32> %arg4, i32 1
+ %i2 = extractelement <2 x i32> %arg7, i32 0
+ %i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
- %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
- %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
- %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
- %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
- %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
- ret {float, float, float, float, float} %r4
+ %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
+ %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
+ %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
+ %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
+ %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
+ ret { float, float, float, float, float } %r4
}
-
; GCN: .long 165580
; GCN-NEXT: .long 1
; GCN-NEXT: .long 165584
@@ -69,11 +68,11 @@ define amdgpu_ps {float, float, float, f
; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
; GCN: v_mov_b32_e32 v0, 1.0
; GCN-NOT: s_endpgm
-define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
- ret float 1.0
+define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+bb:
+ ret float 1.000000e+00
}
-
; GCN: .long 165580
; GCN-NEXT: .long 2081
; GCN-NEXT: .long 165584
@@ -83,14 +82,14 @@ define amdgpu_ps float @ps_input_ena_no_
; GCN-DAG: v_mov_b32_e32 v1, v2
; GCN: v_mov_b32_e32 v2, v3
; GCN-NOT: s_endpgm
-define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
- %f = bitcast <2 x i32> %8 to <2 x float>
- %s = insertvalue {float, <2 x float>} undef, float %14, 0
- %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
- ret {float, <2 x float>} %s1
+define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+bb:
+ %f = bitcast <2 x i32> %arg8 to <2 x float>
+ %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
+ %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
+ ret { float, <2 x float> } %s1
}
-
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
@@ -102,25 +101,24 @@ define amdgpu_ps {float, <2 x float>} @p
; GCN-DAG: v_mov_b32_e32 v3, v6
; GCN-DAG: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
-attributes #1 = { "InitialPSInputAddr"="1" }
-define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
- %i0 = extractelement <2 x i32> %4, i32 0
- %i1 = extractelement <2 x i32> %4, i32 1
- %i2 = extractelement <2 x i32> %7, i32 0
- %i3 = extractelement <2 x i32> %8, i32 0
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
+bb:
+ %i0 = extractelement <2 x i32> %arg4, i32 0
+ %i1 = extractelement <2 x i32> %arg4, i32 1
+ %i2 = extractelement <2 x i32> %arg7, i32 0
+ %i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
- %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
- %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
- %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
- %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
- %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
- ret {float, float, float, float, float} %r4
+ %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
+ %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
+ %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
+ %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
+ %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
+ ret { float, float, float, float, float } %r4
}
-
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
@@ -132,25 +130,24 @@ define amdgpu_ps {float, float, float, f
; GCN: v_mov_b32_e32 v3, v8
; GCN: v_mov_b32_e32 v4, v12
; GCN-NOT: s_endpgm
-attributes #2 = { "InitialPSInputAddr"="119" }
-define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
- %i0 = extractelement <2 x i32> %4, i32 0
- %i1 = extractelement <2 x i32> %4, i32 1
- %i2 = extractelement <2 x i32> %7, i32 0
- %i3 = extractelement <2 x i32> %8, i32 0
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
+bb:
+ %i0 = extractelement <2 x i32> %arg4, i32 0
+ %i1 = extractelement <2 x i32> %arg4, i32 1
+ %i2 = extractelement <2 x i32> %arg7, i32 0
+ %i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
- %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
- %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
- %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
- %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
- %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
- ret {float, float, float, float, float} %r4
+ %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
+ %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
+ %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
+ %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
+ %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
+ ret { float, float, float, float, float } %r4
}
-
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
@@ -162,38 +159,37 @@ define amdgpu_ps {float, float, float, f
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
-attributes #3 = { "InitialPSInputAddr"="418" }
-define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
- %i0 = extractelement <2 x i32> %4, i32 0
- %i1 = extractelement <2 x i32> %4, i32 1
- %i2 = extractelement <2 x i32> %7, i32 0
- %i3 = extractelement <2 x i32> %8, i32 0
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
+bb:
+ %i0 = extractelement <2 x i32> %arg4, i32 0
+ %i1 = extractelement <2 x i32> %arg4, i32 1
+ %i2 = extractelement <2 x i32> %arg7, i32 0
+ %i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
- %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
- %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
- %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
- %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
- %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
- ret {float, float, float, float, float} %r4
+ %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
+ %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
+ %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
+ %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
+ %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
+ ret { float, float, float, float, float } %r4
}
-
; GCN-LABEL: {{^}}sgpr:
; GCN: s_add_i32 s0, s3, 2
; GCN: s_mov_b32 s2, s3
; GCN-NOT: s_endpgm
-define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
- %x = add i32 %2, 2
- %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
- %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
- %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
- ret {i32, i32, i32} %c
+define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+bb:
+ %x = add i32 %arg2, 2
+ %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
+ %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
+ %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
+ ret { i32, i32, i32 } %c
}
-
; GCN-LABEL: {{^}}sgpr_literal:
; GCN: s_mov_b32 s0, 5
; GCN-NOT: s_mov_b32 s0, s0
@@ -201,37 +197,37 @@ define amdgpu_vs {i32, i32, i32} @sgpr([
; GCN-DAG: s_mov_b32 s2, 7
; GCN-DAG: s_mov_b32 s3, 8
; GCN-NOT: s_endpgm
-define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
- %x = add i32 %2, 2
- ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
+define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+bb:
+ %x = add i32 %arg2, 2
+ ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
}
-
; GCN-LABEL: {{^}}both:
; GCN: v_mov_b32_e32 v1, v0
-; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
+; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
; GCN-DAG: s_add_i32 s0, s3, 2
; GCN-DAG: s_mov_b32 s1, s2
; GCN: s_mov_b32 s2, s3
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
-define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
- %v = fadd float %3, 1.0
- %s = add i32 %2, 2
- %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
- %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
- %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
- %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
- %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
- ret {float, i32, float, i32, i32} %a4
+define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+bb:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
+ %v = fadd float %arg3, 1.000000e+00
+ %s = add i32 %arg2, 2
+ %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
+ %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
+ %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
+ %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
+ %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
+ ret { float, i32, float, i32, i32 } %a4
}
-
; GCN-LABEL: {{^}}structure_literal:
; GCN: v_mov_b32_e32 v3, v0
-; GCN: exp mrt0 v3, v3, v3, v3 done compr vm
+; GCN: exp mrt0 v3, v3, v3, v3 done vm
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: s_mov_b32 s0, 2
@@ -239,9 +235,16 @@ define amdgpu_vs {float, i32, float, i32
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
; GCN: s_waitcnt expcnt(0)
-define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
- ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
+define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+bb:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
+ ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
}
-attributes #0 = { nounwind "InitialPSInputAddr"="0" }
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "InitialPSInputAddr"="0" }
+attributes #2 = { nounwind "InitialPSInputAddr"="1" }
+attributes #3 = { nounwind "InitialPSInputAddr"="119" }
+attributes #4 = { nounwind "InitialPSInputAddr"="418" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/seto.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/seto.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/seto.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/seto.ll Tue Feb 21 18:02:21 2017
@@ -4,12 +4,9 @@
; CHECK-LABEL: {{^}}main:
; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
-define void @main(float %p) {
+define amdgpu_ps float @main(float inreg %p) {
main_body:
%c = fcmp oeq float %p, %p
%r = select i1 %c, float 1.000000e+00, float 0.000000e+00
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
- ret void
+ ret float %r
}
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Modified: llvm/trunk/test/CodeGen/AMDGPU/setuo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/setuo.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/setuo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/setuo.ll Tue Feb 21 18:02:21 2017
@@ -4,12 +4,9 @@
; CHECK-LABEL: {{^}}main:
; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
-define void @main(float %p) {
+define amdgpu_ps float @main(float inreg %p) {
main_body:
%c = fcmp une float %p, %p
%r = select i1 %c, float 1.000000e+00, float 0.000000e+00
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
- ret void
+ ret float %r
}
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Tue Feb 21 18:02:21 2017
@@ -1,13 +1,10 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
; CHECK-LABEL: {{^}}phi1:
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
+define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -25,13 +22,13 @@ ELSE:
ENDIF: ; preds = %ELSE, %main_body
%temp.0 = phi float [ %tmp26, %ELSE ], [ %tmp21, %main_body ]
%tmp27 = fadd float %temp.0, %tmp23
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
ret void
}
; Make sure this program doesn't crash
; CHECK-LABEL: {{^}}phi2:
-define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -58,32 +55,32 @@ main_body:
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float
%j.f.i = bitcast i32 %j.i to float
- %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #0
- %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #0
+ %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #1
+ %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #1
%i.i19 = extractelement <2 x i32> %arg5, i32 0
%j.i20 = extractelement <2 x i32> %arg5, i32 1
%i.f.i21 = bitcast i32 %i.i19 to float
%j.f.i22 = bitcast i32 %j.i20 to float
- %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #0
- %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #0
+ %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #1
+ %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #1
%i.i13 = extractelement <2 x i32> %arg5, i32 0
%j.i14 = extractelement <2 x i32> %arg5, i32 1
%i.f.i15 = bitcast i32 %i.i13 to float
%j.f.i16 = bitcast i32 %j.i14 to float
- %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #0
- %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #0
+ %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #1
+ %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #1
%i.i7 = extractelement <2 x i32> %arg5, i32 0
%j.i8 = extractelement <2 x i32> %arg5, i32 1
%i.f.i9 = bitcast i32 %i.i7 to float
%j.f.i10 = bitcast i32 %j.i8 to float
- %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #0
- %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #0
+ %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #1
+ %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #1
%i.i1 = extractelement <2 x i32> %arg5, i32 0
%j.i2 = extractelement <2 x i32> %arg5, i32 1
%i.f.i3 = bitcast i32 %i.i1 to float
%j.f.i4 = bitcast i32 %j.i2 to float
- %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #0
- %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #0
+ %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
+ %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
%tmp45 = bitcast float %p2.i to i32
%tmp46 = bitcast float %p2.i24 to i32
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
@@ -168,16 +165,16 @@ ENDIF24:
%tmp111 = fsub float -0.000000e+00, %tmp105
%tmp112 = fmul float %tmp111, %tmp106
%tmp113 = call i32 @llvm.SI.packf16(float %tmp108, float %tmp110)
- %tmp114 = bitcast i32 %tmp113 to float
+ %tmp114 = bitcast i32 %tmp113 to <2 x half>
%tmp115 = call i32 @llvm.SI.packf16(float %tmp112, float 1.000000e+00)
- %tmp116 = bitcast i32 %tmp115 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp114, float %tmp116, float %tmp114, float %tmp116)
+ %tmp116 = bitcast i32 %tmp115 to <2 x half>
+ call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp114, <2 x half> %tmp116, i1 true, i1 true) #0
ret void
}
; We just want ot make sure the program doesn't crash
; CHECK-LABEL: {{^}}loop:
-define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
+define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -204,7 +201,7 @@ LOOP:
br i1 %tmp33, label %IF, label %ENDIF
IF: ; preds = %LOOP
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00, i1 true, i1 true) #0
ret void
ENDIF: ; preds = %LOOP
@@ -230,7 +227,7 @@ ENDIF:
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
; CHECK: exp
; CHECK: s_endpgm
-define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #1 {
+define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -261,7 +258,7 @@ endif:
%val.0 = phi float [ %val.if.0, %if ], [ %val.else.0, %else ]
%val.1 = phi float [ %val.if.1, %if ], [ %val.else.1, %else ]
%val.2 = phi float [ %val.if.2, %if ], [ %val.else.2, %else ]
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.000000e+00)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %val.0, float %val.1, float %val.2, float 0.000000e+00, i1 true, i1 true) #0
ret void
}
@@ -294,7 +291,7 @@ endif:
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
; CHECK: s_endpgm
-define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #1 {
+define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
br label %LOOP68
@@ -308,7 +305,7 @@ LOOP68:
IF70: ; preds = %LOOP68
%q = icmp ne i32 %l, 13
%temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
ret void
ENDIF69: ; preds = %LOOP68
@@ -330,7 +327,7 @@ ENDIF69:
; [[END]]:
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
; CHECK: s_endpgm
-define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #1 {
+define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
%tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3
@@ -343,14 +340,14 @@ bb:
%j.i = extractelement <2 x i32> %arg7, i32 1
%i.f.i = bitcast i32 %i.i to float
%j.f.i = bitcast i32 %j.i to float
- %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #1
- %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #1
+ %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #0
+ %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #0
%i.i1 = extractelement <2 x i32> %arg7, i32 0
%j.i2 = extractelement <2 x i32> %arg7, i32 1
%i.f.i3 = bitcast i32 %i.i1 to float
%j.f.i4 = bitcast i32 %j.i2 to float
- %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
- %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
+ %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #0
+ %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #0
%tmp31 = bitcast float %tmp23 to i32
%tmp36 = icmp ne i32 %tmp31, 0
br i1 %tmp36, label %bb38, label %bb80
@@ -377,80 +374,58 @@ bb80:
bb71: ; preds = %bb80, %bb38
%tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ]
%tmp88 = extractelement <4 x float> %tmp72, i32 0
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp88, float %tmp88, float %tmp88, float %tmp88, i1 true, i1 true) #0
ret void
}
; Check the the resource descriptor is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #1 {
+define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
bb:
- %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
+ %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
%tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call i32 @llvm.SI.packf16(float undef, float %tmp10)
- %tmp13 = bitcast i32 %tmp12 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef)
+ %tmp13 = bitcast i32 %tmp12 to <2 x half>
+ call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
ret void
}
; Check the the sampler is stored in an sgpr.
; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #1 {
+define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 {
bb:
- %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
+ %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
%tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef)
- %tmp13 = bitcast i32 %tmp12 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef)
+ %tmp13 = bitcast i32 %tmp12 to <2 x half>
+ call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
ret void
}
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.fabs.f32(float) #0
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.rsq.f32(float) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.exp2.f32(float) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.pow.f32(float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #0
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
+declare float @llvm.fabs.f32(float) #1
+declare float @llvm.amdgcn.rsq.f32(float) #1
+declare float @llvm.exp2.f32(float) #1
+declare float @llvm.pow.f32(float, float) #1
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
+
+declare i32 @llvm.SI.packf16(float, float) #1
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readonly }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
!1 = !{!"const", !2}
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl.ll Tue Feb 21 18:02:21 2017
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; XUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() #0
@@ -466,4 +466,12 @@ define void @s_shl_inline_high_imm_f32_n
ret void
}
+; FUNC-LABEL: {{^}}test_mul2:
+; GCN: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
+define void @test_mul2(i32 %p) {
+ %i = mul i32 %p, 2
+ store volatile i32 %i, i32 addrspace(1)* undef
+ ret void
+}
+
attributes #0 = { nounwind readnone }
Removed: llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll?rev=295791&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-literal-folding.ll (removed)
@@ -1,14 +0,0 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-
-; GCN-LABEL: {{^}}main:
-; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
-; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
-define amdgpu_vs void @main(float) {
-main_body:
- %1 = fmul float %0, 0x3FE86A7F00000000
- %2 = fmul float %0, 0xBFE86A7F00000000
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %1, float %1, float %2, float %2)
- ret void
-}
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll Tue Feb 21 18:02:21 2017
@@ -1,11 +1,11 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; This shader has the potential to generated illegal VGPR to SGPR copies if
; the wrong register class is used for the REG_SEQUENCE instructions.
-; CHECK: {{^}}main:
-; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
+; GCN-LABEL: {{^}}main:
+; GCN: image_sample_b v{{\[[0-9]:[0-9]\]}}, v{{\[[0-9]:[0-9]\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf
define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
@@ -40,26 +40,16 @@ main_body:
%tmp37 = extractelement <4 x float> %tmp35, i32 1
%tmp38 = extractelement <4 x float> %tmp35, i32 2
%tmp39 = extractelement <4 x float> %tmp35, i32 3
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp36, float %tmp37, float %tmp38, float %tmp39)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp36, float %tmp37, float %tmp38, float %tmp39, i1 true, i1 true) #0
ret void
}
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll Tue Feb 21 18:02:21 2017
@@ -3,7 +3,7 @@
; The only way the subtarget knows that the si machine scheduler is being used
; is to specify -mattr=si-scheduler. If we just pass --misched=si, the backend
; won't know what scheduler we are using.
-; RUN: llc -march=amdgcn -mcpu=SI --misched=si -mattr=si-scheduler < %s | FileCheck %s
+; RUN: llc -march=amdgcn --misched=si -mattr=si-scheduler < %s | FileCheck %s
; The test checks the "si" machine scheduler pass works correctly.
@@ -16,7 +16,7 @@
; CHECK: s_waitcnt vmcnt(0)
; CHECK: exp
; CHECK: s_endpgm
-define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
+define amdgpu_ps void @main([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
main_body:
%tmp = bitcast [34 x <8 x i32>] addrspace(2)* %arg3 to <32 x i8> addrspace(2)*
%tmp22 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp, align 32, !tbaa !0
@@ -46,29 +46,22 @@ main_body:
%tmp34 = extractelement <4 x float> %tmp31, i32 2
%tmp35 = extractelement <4 x float> %tmp31, i32 3
%tmp36 = call i32 @llvm.SI.packf16(float %tmp32, float %tmp33)
- %tmp37 = bitcast i32 %tmp36 to float
+ %tmp37 = bitcast i32 %tmp36 to <2 x half>
%tmp38 = call i32 @llvm.SI.packf16(float %tmp34, float %tmp35)
- %tmp39 = bitcast i32 %tmp38 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp37, float %tmp39, float %tmp37, float %tmp39)
+ %tmp39 = bitcast i32 %tmp38 to <2 x half>
+ call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp37, <2 x half> %tmp39, i1 true, i1 false) #0
ret void
}
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #0
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare i32 @llvm.SI.packf16(float, float) #1
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
!1 = !{!"const", !2}
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Tue Feb 21 18:02:21 2017
@@ -732,10 +732,10 @@ IF67:
%tmp579 = fmul float %tmp574, %tmp45
%tmp580 = fadd float %tmp579, %tmp556
%tmp581 = call i32 @llvm.SI.packf16(float %tmp576, float %tmp578)
- %tmp582 = bitcast i32 %tmp581 to float
+ %tmp582 = bitcast i32 %tmp581 to <2 x half>
%tmp583 = call i32 @llvm.SI.packf16(float %tmp580, float %tmp282)
- %tmp584 = bitcast i32 %tmp583 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp582, float %tmp584, float %tmp582, float %tmp584)
+ %tmp584 = bitcast i32 %tmp583 to <2 x half>
+ call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp582, <2 x half> %tmp584, i1 true, i1 true) #0
ret void
ENDIF66: ; preds = %LOOP65
@@ -1814,10 +1814,10 @@ ENDIF209:
%max.0.i1 = call float @llvm.maxnum.f32(float %tmp774, float 0.000000e+00)
%clamp.i2 = call float @llvm.minnum.f32(float %max.0.i1, float 1.000000e+00)
%tmp776 = call i32 @llvm.SI.packf16(float %tmp768, float %tmp770)
- %tmp777 = bitcast i32 %tmp776 to float
+ %tmp777 = bitcast i32 %tmp776 to <2 x half>
%tmp778 = call i32 @llvm.SI.packf16(float %tmp772, float %clamp.i2)
- %tmp779 = bitcast i32 %tmp778 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp777, float %tmp779, float %tmp777, float %tmp779)
+ %tmp779 = bitcast i32 %tmp778 to <2 x half>
+ call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp777, <2 x half> %tmp779, i1 true, i1 true) #0
ret void
ELSE214: ; preds = %ELSE211
@@ -1835,11 +1835,11 @@ ELSE214:
declare float @llvm.exp2.f32(float) #1
declare float @llvm.ceil.f32(float) #1
-declare float @llvm.amdgcn.rsq.f32(float) #1
declare float @llvm.fabs.f32(float) #1
declare float @llvm.pow.f32(float, float) #1
declare float @llvm.minnum.f32(float, float) #1
declare float @llvm.maxnum.f32(float, float) #1
+declare float @llvm.amdgcn.rsq.f32(float) #1
declare float @llvm.amdgcn.cubeid(float, float, float) #1
declare float @llvm.amdgcn.cubesc(float, float, float) #1
declare float @llvm.amdgcn.cubetc(float, float, float) #1
@@ -1848,13 +1848,14 @@ declare float @llvm.amdgcn.interp.p1(flo
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
+
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare i32 @llvm.SI.packf16(float, float) #1
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-spill-cf.ll Tue Feb 21 18:02:21 2017
@@ -6,270 +6,271 @@
; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
; SI-NOT: v_readlane_b32 [[SAVED]]
+
define amdgpu_ps void @main() #0 {
main_body:
- %0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
- %1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
- %2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
- %3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
- %4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
- %5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
- %6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
- %7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
- %8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
- %9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
- %10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
- %11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
- %12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
- %13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
- %14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
- %15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
- %16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
- %17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
- %18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
- %19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
- %20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
- %21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
- %22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
- %23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
- %24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
- %25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
- %26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
- %27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
- %28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
- %29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
- %30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
- %31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
- %32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
- %33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
- %34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
- %35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
- %36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
- %37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
- %38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
- %39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
- %40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
- %41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
- %42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
- %43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
- %44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
- %45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
- %46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
- %47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
- %48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
- %49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
- %50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
- %51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
- %52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
- %53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
- %54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
- %55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
- %56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
- %57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
- %58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
- %59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
- %60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
- %61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
- %62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
- %63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
- %64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
- %65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
- %66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
+ %tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
+ %tmp1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
+ %tmp2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
+ %tmp3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
+ %tmp4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
+ %tmp5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
+ %tmp6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
+ %tmp7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
+ %tmp8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
+ %tmp9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
+ %tmp10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
+ %tmp11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
+ %tmp12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
+ %tmp13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
+ %tmp14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
+ %tmp15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
+ %tmp16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
+ %tmp17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
+ %tmp18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
+ %tmp19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
+ %tmp20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
+ %tmp21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
+ %tmp22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
+ %tmp23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
+ %tmp24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
+ %tmp25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
+ %tmp26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
+ %tmp27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
+ %tmp28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
+ %tmp29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
+ %tmp30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
+ %tmp31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
+ %tmp32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
+ %tmp33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
+ %tmp34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
+ %tmp35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
+ %tmp36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
+ %tmp37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
+ %tmp38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
+ %tmp39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
+ %tmp40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
+ %tmp41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
+ %tmp42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
+ %tmp43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
+ %tmp44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
+ %tmp45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
+ %tmp46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
+ %tmp47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
+ %tmp48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
+ %tmp49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
+ %tmp50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
+ %tmp51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
+ %tmp52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
+ %tmp53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
+ %tmp54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
+ %tmp55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
+ %tmp56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
+ %tmp57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
+ %tmp58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
+ %tmp59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
+ %tmp60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
+ %tmp61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
+ %tmp62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
+ %tmp63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
+ %tmp64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
+ %tmp65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
+ %tmp66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
br label %LOOP
LOOP: ; preds = %ENDIF2795, %main_body
%temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ]
%temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ]
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
- %67 = icmp sgt i32 %tid, 4
- br i1 %67, label %ENDLOOP, label %ENDIF
+ %tmp67 = icmp sgt i32 %tid, 4
+ br i1 %tmp67, label %ENDLOOP, label %ENDIF
ENDLOOP: ; preds = %ELSE2566, %LOOP
- %one.sub.a.i = fsub float 1.000000e+00, %0
+ %one.sub.a.i = fsub float 1.000000e+00, %tmp
%one.sub.ac.i = fmul float %one.sub.a.i, undef
%result.i = fadd float fmul (float undef, float undef), %one.sub.ac.i
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %result.i, float undef, float 1.000000e+00)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float undef, float %result.i, float undef, float 1.000000e+00, i1 true, i1 true) #0
ret void
ENDIF: ; preds = %LOOP
- %68 = fsub float %2, undef
- %69 = fsub float %3, undef
- %70 = fsub float %4, undef
- %71 = fmul float %68, 0.000000e+00
- %72 = fmul float %69, undef
- %73 = fmul float %70, undef
- %74 = fsub float %6, undef
- %75 = fsub float %7, undef
- %76 = fmul float %74, undef
- %77 = fmul float %75, 0.000000e+00
- %78 = call float @llvm.minnum.f32(float %73, float %77)
- %79 = call float @llvm.maxnum.f32(float %71, float 0.000000e+00)
- %80 = call float @llvm.maxnum.f32(float %72, float %76)
- %81 = call float @llvm.maxnum.f32(float undef, float %78)
- %82 = call float @llvm.minnum.f32(float %79, float %80)
- %83 = call float @llvm.minnum.f32(float %82, float undef)
- %84 = fsub float %14, undef
- %85 = fsub float %15, undef
- %86 = fsub float %16, undef
- %87 = fmul float %84, undef
- %88 = fmul float %85, undef
- %89 = fmul float %86, undef
- %90 = fsub float %17, undef
- %91 = fsub float %18, undef
- %92 = fsub float %19, undef
- %93 = fmul float %90, 0.000000e+00
- %94 = fmul float %91, undef
- %95 = fmul float %92, undef
- %96 = call float @llvm.minnum.f32(float %88, float %94)
- %97 = call float @llvm.maxnum.f32(float %87, float %93)
- %98 = call float @llvm.maxnum.f32(float %89, float %95)
- %99 = call float @llvm.maxnum.f32(float undef, float %96)
- %100 = call float @llvm.maxnum.f32(float %99, float undef)
- %101 = call float @llvm.minnum.f32(float %97, float undef)
- %102 = call float @llvm.minnum.f32(float %101, float %98)
- %103 = fsub float %30, undef
- %104 = fsub float %31, undef
- %105 = fmul float %103, 0.000000e+00
- %106 = fmul float %104, 0.000000e+00
- %107 = call float @llvm.minnum.f32(float undef, float %105)
- %108 = call float @llvm.maxnum.f32(float undef, float %106)
- %109 = call float @llvm.maxnum.f32(float undef, float %107)
- %110 = call float @llvm.maxnum.f32(float %109, float undef)
- %111 = call float @llvm.minnum.f32(float undef, float %108)
- %112 = fsub float %32, undef
- %113 = fsub float %33, undef
- %114 = fsub float %34, undef
- %115 = fmul float %112, 0.000000e+00
- %116 = fmul float %113, undef
- %117 = fmul float %114, undef
- %118 = fsub float %35, undef
- %119 = fsub float %36, undef
- %120 = fsub float %37, undef
- %121 = fmul float %118, undef
- %122 = fmul float %119, undef
- %123 = fmul float %120, undef
- %124 = call float @llvm.minnum.f32(float %115, float %121)
- %125 = call float @llvm.minnum.f32(float %116, float %122)
- %126 = call float @llvm.minnum.f32(float %117, float %123)
- %127 = call float @llvm.maxnum.f32(float %124, float %125)
- %128 = call float @llvm.maxnum.f32(float %127, float %126)
- %129 = fsub float %38, undef
- %130 = fsub float %39, undef
- %131 = fsub float %40, undef
- %132 = fmul float %129, 0.000000e+00
- %133 = fmul float %130, undef
- %134 = fmul float %131, undef
- %135 = fsub float %41, undef
- %136 = fsub float %42, undef
- %137 = fsub float %43, undef
- %138 = fmul float %135, undef
- %139 = fmul float %136, undef
- %140 = fmul float %137, undef
- %141 = call float @llvm.minnum.f32(float %132, float %138)
- %142 = call float @llvm.minnum.f32(float %133, float %139)
- %143 = call float @llvm.minnum.f32(float %134, float %140)
- %144 = call float @llvm.maxnum.f32(float %141, float %142)
- %145 = call float @llvm.maxnum.f32(float %144, float %143)
- %146 = fsub float %44, undef
- %147 = fsub float %45, undef
- %148 = fsub float %46, undef
- %149 = fmul float %146, 0.000000e+00
- %150 = fmul float %147, 0.000000e+00
- %151 = fmul float %148, undef
- %152 = fsub float %47, undef
- %153 = fsub float %48, undef
- %154 = fsub float %49, undef
- %155 = fmul float %152, undef
- %156 = fmul float %153, 0.000000e+00
- %157 = fmul float %154, undef
- %158 = call float @llvm.minnum.f32(float %149, float %155)
- %159 = call float @llvm.minnum.f32(float %150, float %156)
- %160 = call float @llvm.minnum.f32(float %151, float %157)
- %161 = call float @llvm.maxnum.f32(float %158, float %159)
- %162 = call float @llvm.maxnum.f32(float %161, float %160)
- %163 = fsub float %50, undef
- %164 = fsub float %51, undef
- %165 = fsub float %52, undef
- %166 = fmul float %163, undef
- %167 = fmul float %164, 0.000000e+00
- %168 = fmul float %165, 0.000000e+00
- %169 = fsub float %53, undef
- %170 = fsub float %54, undef
- %171 = fsub float %55, undef
- %172 = fdiv float 1.000000e+00, %temp18.0
- %173 = fmul float %169, undef
- %174 = fmul float %170, undef
- %175 = fmul float %171, %172
- %176 = call float @llvm.minnum.f32(float %166, float %173)
- %177 = call float @llvm.minnum.f32(float %167, float %174)
- %178 = call float @llvm.minnum.f32(float %168, float %175)
- %179 = call float @llvm.maxnum.f32(float %176, float %177)
- %180 = call float @llvm.maxnum.f32(float %179, float %178)
- %181 = fsub float %62, undef
- %182 = fsub float %63, undef
- %183 = fsub float %64, undef
- %184 = fmul float %181, 0.000000e+00
- %185 = fmul float %182, undef
- %186 = fmul float %183, undef
- %187 = fsub float %65, undef
- %188 = fsub float %66, undef
- %189 = fmul float %187, undef
- %190 = fmul float %188, undef
- %191 = call float @llvm.maxnum.f32(float %184, float %189)
- %192 = call float @llvm.maxnum.f32(float %185, float %190)
- %193 = call float @llvm.maxnum.f32(float %186, float undef)
- %194 = call float @llvm.minnum.f32(float %191, float %192)
- %195 = call float @llvm.minnum.f32(float %194, float %193)
- %.temp292.7 = select i1 undef, float %162, float undef
- %temp292.9 = select i1 false, float %180, float %.temp292.7
+ %tmp68 = fsub float %tmp2, undef
+ %tmp69 = fsub float %tmp3, undef
+ %tmp70 = fsub float %tmp4, undef
+ %tmp71 = fmul float %tmp68, 0.000000e+00
+ %tmp72 = fmul float %tmp69, undef
+ %tmp73 = fmul float %tmp70, undef
+ %tmp74 = fsub float %tmp6, undef
+ %tmp75 = fsub float %tmp7, undef
+ %tmp76 = fmul float %tmp74, undef
+ %tmp77 = fmul float %tmp75, 0.000000e+00
+ %tmp78 = call float @llvm.minnum.f32(float %tmp73, float %tmp77)
+ %tmp79 = call float @llvm.maxnum.f32(float %tmp71, float 0.000000e+00)
+ %tmp80 = call float @llvm.maxnum.f32(float %tmp72, float %tmp76)
+ %tmp81 = call float @llvm.maxnum.f32(float undef, float %tmp78)
+ %tmp82 = call float @llvm.minnum.f32(float %tmp79, float %tmp80)
+ %tmp83 = call float @llvm.minnum.f32(float %tmp82, float undef)
+ %tmp84 = fsub float %tmp14, undef
+ %tmp85 = fsub float %tmp15, undef
+ %tmp86 = fsub float %tmp16, undef
+ %tmp87 = fmul float %tmp84, undef
+ %tmp88 = fmul float %tmp85, undef
+ %tmp89 = fmul float %tmp86, undef
+ %tmp90 = fsub float %tmp17, undef
+ %tmp91 = fsub float %tmp18, undef
+ %tmp92 = fsub float %tmp19, undef
+ %tmp93 = fmul float %tmp90, 0.000000e+00
+ %tmp94 = fmul float %tmp91, undef
+ %tmp95 = fmul float %tmp92, undef
+ %tmp96 = call float @llvm.minnum.f32(float %tmp88, float %tmp94)
+ %tmp97 = call float @llvm.maxnum.f32(float %tmp87, float %tmp93)
+ %tmp98 = call float @llvm.maxnum.f32(float %tmp89, float %tmp95)
+ %tmp99 = call float @llvm.maxnum.f32(float undef, float %tmp96)
+ %tmp100 = call float @llvm.maxnum.f32(float %tmp99, float undef)
+ %tmp101 = call float @llvm.minnum.f32(float %tmp97, float undef)
+ %tmp102 = call float @llvm.minnum.f32(float %tmp101, float %tmp98)
+ %tmp103 = fsub float %tmp30, undef
+ %tmp104 = fsub float %tmp31, undef
+ %tmp105 = fmul float %tmp103, 0.000000e+00
+ %tmp106 = fmul float %tmp104, 0.000000e+00
+ %tmp107 = call float @llvm.minnum.f32(float undef, float %tmp105)
+ %tmp108 = call float @llvm.maxnum.f32(float undef, float %tmp106)
+ %tmp109 = call float @llvm.maxnum.f32(float undef, float %tmp107)
+ %tmp110 = call float @llvm.maxnum.f32(float %tmp109, float undef)
+ %tmp111 = call float @llvm.minnum.f32(float undef, float %tmp108)
+ %tmp112 = fsub float %tmp32, undef
+ %tmp113 = fsub float %tmp33, undef
+ %tmp114 = fsub float %tmp34, undef
+ %tmp115 = fmul float %tmp112, 0.000000e+00
+ %tmp116 = fmul float %tmp113, undef
+ %tmp117 = fmul float %tmp114, undef
+ %tmp118 = fsub float %tmp35, undef
+ %tmp119 = fsub float %tmp36, undef
+ %tmp120 = fsub float %tmp37, undef
+ %tmp121 = fmul float %tmp118, undef
+ %tmp122 = fmul float %tmp119, undef
+ %tmp123 = fmul float %tmp120, undef
+ %tmp124 = call float @llvm.minnum.f32(float %tmp115, float %tmp121)
+ %tmp125 = call float @llvm.minnum.f32(float %tmp116, float %tmp122)
+ %tmp126 = call float @llvm.minnum.f32(float %tmp117, float %tmp123)
+ %tmp127 = call float @llvm.maxnum.f32(float %tmp124, float %tmp125)
+ %tmp128 = call float @llvm.maxnum.f32(float %tmp127, float %tmp126)
+ %tmp129 = fsub float %tmp38, undef
+ %tmp130 = fsub float %tmp39, undef
+ %tmp131 = fsub float %tmp40, undef
+ %tmp132 = fmul float %tmp129, 0.000000e+00
+ %tmp133 = fmul float %tmp130, undef
+ %tmp134 = fmul float %tmp131, undef
+ %tmp135 = fsub float %tmp41, undef
+ %tmp136 = fsub float %tmp42, undef
+ %tmp137 = fsub float %tmp43, undef
+ %tmp138 = fmul float %tmp135, undef
+ %tmp139 = fmul float %tmp136, undef
+ %tmp140 = fmul float %tmp137, undef
+ %tmp141 = call float @llvm.minnum.f32(float %tmp132, float %tmp138)
+ %tmp142 = call float @llvm.minnum.f32(float %tmp133, float %tmp139)
+ %tmp143 = call float @llvm.minnum.f32(float %tmp134, float %tmp140)
+ %tmp144 = call float @llvm.maxnum.f32(float %tmp141, float %tmp142)
+ %tmp145 = call float @llvm.maxnum.f32(float %tmp144, float %tmp143)
+ %tmp146 = fsub float %tmp44, undef
+ %tmp147 = fsub float %tmp45, undef
+ %tmp148 = fsub float %tmp46, undef
+ %tmp149 = fmul float %tmp146, 0.000000e+00
+ %tmp150 = fmul float %tmp147, 0.000000e+00
+ %tmp151 = fmul float %tmp148, undef
+ %tmp152 = fsub float %tmp47, undef
+ %tmp153 = fsub float %tmp48, undef
+ %tmp154 = fsub float %tmp49, undef
+ %tmp155 = fmul float %tmp152, undef
+ %tmp156 = fmul float %tmp153, 0.000000e+00
+ %tmp157 = fmul float %tmp154, undef
+ %tmp158 = call float @llvm.minnum.f32(float %tmp149, float %tmp155)
+ %tmp159 = call float @llvm.minnum.f32(float %tmp150, float %tmp156)
+ %tmp160 = call float @llvm.minnum.f32(float %tmp151, float %tmp157)
+ %tmp161 = call float @llvm.maxnum.f32(float %tmp158, float %tmp159)
+ %tmp162 = call float @llvm.maxnum.f32(float %tmp161, float %tmp160)
+ %tmp163 = fsub float %tmp50, undef
+ %tmp164 = fsub float %tmp51, undef
+ %tmp165 = fsub float %tmp52, undef
+ %tmp166 = fmul float %tmp163, undef
+ %tmp167 = fmul float %tmp164, 0.000000e+00
+ %tmp168 = fmul float %tmp165, 0.000000e+00
+ %tmp169 = fsub float %tmp53, undef
+ %tmp170 = fsub float %tmp54, undef
+ %tmp171 = fsub float %tmp55, undef
+ %tmp172 = fdiv float 1.000000e+00, %temp18.0
+ %tmp173 = fmul float %tmp169, undef
+ %tmp174 = fmul float %tmp170, undef
+ %tmp175 = fmul float %tmp171, %tmp172
+ %tmp176 = call float @llvm.minnum.f32(float %tmp166, float %tmp173)
+ %tmp177 = call float @llvm.minnum.f32(float %tmp167, float %tmp174)
+ %tmp178 = call float @llvm.minnum.f32(float %tmp168, float %tmp175)
+ %tmp179 = call float @llvm.maxnum.f32(float %tmp176, float %tmp177)
+ %tmp180 = call float @llvm.maxnum.f32(float %tmp179, float %tmp178)
+ %tmp181 = fsub float %tmp62, undef
+ %tmp182 = fsub float %tmp63, undef
+ %tmp183 = fsub float %tmp64, undef
+ %tmp184 = fmul float %tmp181, 0.000000e+00
+ %tmp185 = fmul float %tmp182, undef
+ %tmp186 = fmul float %tmp183, undef
+ %tmp187 = fsub float %tmp65, undef
+ %tmp188 = fsub float %tmp66, undef
+ %tmp189 = fmul float %tmp187, undef
+ %tmp190 = fmul float %tmp188, undef
+ %tmp191 = call float @llvm.maxnum.f32(float %tmp184, float %tmp189)
+ %tmp192 = call float @llvm.maxnum.f32(float %tmp185, float %tmp190)
+ %tmp193 = call float @llvm.maxnum.f32(float %tmp186, float undef)
+ %tmp194 = call float @llvm.minnum.f32(float %tmp191, float %tmp192)
+ %tmp195 = call float @llvm.minnum.f32(float %tmp194, float %tmp193)
+ %.temp292.7 = select i1 undef, float %tmp162, float undef
+ %temp292.9 = select i1 false, float %tmp180, float %.temp292.7
%.temp292.9 = select i1 undef, float undef, float %temp292.9
- %196 = fcmp ogt float undef, 0.000000e+00
- %197 = fcmp olt float undef, %195
- %198 = and i1 %196, %197
- %199 = fcmp olt float undef, %.temp292.9
- %200 = and i1 %198, %199
- %temp292.11 = select i1 %200, float undef, float %.temp292.9
- %tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
+ %tmp196 = fcmp ogt float undef, 0.000000e+00
+ %tmp197 = fcmp olt float undef, %tmp195
+ %tmp198 = and i1 %tmp196, %tmp197
+ %tmp199 = fcmp olt float undef, %.temp292.9
+ %tmp200 = and i1 %tmp198, %tmp199
+ %temp292.11 = select i1 %tmp200, float undef, float %.temp292.9
+ %tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%cmp0 = icmp eq i32 %tid0, 0
br i1 %cmp0, label %IF2565, label %ELSE2566
IF2565: ; preds = %ENDIF
- %tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
+ %tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%cmp1 = icmp eq i32 %tid1, 0
br i1 %cmp1, label %ENDIF2582, label %ELSE2584
ELSE2566: ; preds = %ENDIF
- %tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
+ %tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%tidf = bitcast i32 %tid2 to float
- %201 = fcmp oeq float %temp292.11, %tidf
- br i1 %201, label %ENDLOOP, label %ELSE2593
+ %tmp201 = fcmp oeq float %temp292.11, %tidf
+ br i1 %tmp201, label %ENDLOOP, label %ELSE2593
ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
%temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
- %temp18.1 = phi float [ %218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
- %202 = fsub float %5, undef
- %203 = fmul float %202, undef
- %204 = call float @llvm.maxnum.f32(float undef, float %203)
- %205 = call float @llvm.minnum.f32(float %204, float undef)
- %206 = call float @llvm.minnum.f32(float %205, float undef)
- %207 = fcmp ogt float undef, 0.000000e+00
- %208 = fcmp olt float undef, 1.000000e+00
- %209 = and i1 %207, %208
- %tid3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
+ %temp18.1 = phi float [ %tmp218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
+ %tmp202 = fsub float %tmp5, undef
+ %tmp203 = fmul float %tmp202, undef
+ %tmp204 = call float @llvm.maxnum.f32(float undef, float %tmp203)
+ %tmp205 = call float @llvm.minnum.f32(float %tmp204, float undef)
+ %tmp206 = call float @llvm.minnum.f32(float %tmp205, float undef)
+ %tmp207 = fcmp ogt float undef, 0.000000e+00
+ %tmp208 = fcmp olt float undef, 1.000000e+00
+ %tmp209 = and i1 %tmp207, %tmp208
+ %tid3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%tidf3 = bitcast i32 %tid3 to float
- %210 = fcmp olt float %tidf3, %206
- %211 = and i1 %209, %210
- br i1 %211, label %ENDIF2795, label %ELSE2797
+ %tmp210 = fcmp olt float %tidf3, %tmp206
+ %tmp211 = and i1 %tmp209, %tmp210
+ br i1 %tmp211, label %ENDIF2795, label %ELSE2797
ELSE2584: ; preds = %IF2565
br label %ENDIF2582
ENDIF2582: ; preds = %ELSE2584, %IF2565
- %212 = fadd float %1, undef
- %213 = fadd float 0.000000e+00, %212
- %floor = call float @llvm.floor.f32(float %213)
- %214 = fsub float %213, %floor
- %tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #2
+ %tmp212 = fadd float %tmp1, undef
+ %tmp213 = fadd float 0.000000e+00, %tmp212
+ %floor = call float @llvm.floor.f32(float %tmp213)
+ %tmp214 = fsub float %tmp213, %floor
+ %tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%cmp4 = icmp eq i32 %tid4, 0
br i1 %cmp4, label %IF2589, label %ELSE2590
@@ -280,61 +281,61 @@ ELSE2590:
br label %ENDIF2588
ENDIF2588: ; preds = %ELSE2590, %IF2589
- %215 = fsub float 1.000000e+00, %214
- %216 = call float @llvm.sqrt.f32(float %215)
- %217 = fmul float %216, undef
- %218 = fadd float %217, undef
+ %tmp215 = fsub float 1.000000e+00, %tmp214
+ %tmp216 = call float @llvm.sqrt.f32(float %tmp215)
+ %tmp217 = fmul float %tmp216, undef
+ %tmp218 = fadd float %tmp217, undef
br label %ENDIF2564
ELSE2593: ; preds = %ELSE2566
- %219 = fcmp oeq float %temp292.11, %81
- %220 = fcmp olt float %81, %83
- %221 = and i1 %219, %220
- br i1 %221, label %ENDIF2594, label %ELSE2596
+ %tmp219 = fcmp oeq float %temp292.11, %tmp81
+ %tmp220 = fcmp olt float %tmp81, %tmp83
+ %tmp221 = and i1 %tmp219, %tmp220
+ br i1 %tmp221, label %ENDIF2594, label %ELSE2596
ELSE2596: ; preds = %ELSE2593
- %222 = fcmp oeq float %temp292.11, %100
- %223 = fcmp olt float %100, %102
- %224 = and i1 %222, %223
- br i1 %224, label %ENDIF2594, label %ELSE2632
+ %tmp222 = fcmp oeq float %temp292.11, %tmp100
+ %tmp223 = fcmp olt float %tmp100, %tmp102
+ %tmp224 = and i1 %tmp222, %tmp223
+ br i1 %tmp224, label %ENDIF2594, label %ELSE2632
ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
%temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
- %225 = fmul float %temp894.2, undef
+ %tmp225 = fmul float %temp894.2, undef
br label %ENDIF2564
ELSE2632: ; preds = %ELSE2596
br i1 undef, label %ENDIF2594, label %ELSE2650
ELSE2650: ; preds = %ELSE2632
- %226 = fcmp oeq float %temp292.11, %110
- %227 = fcmp olt float %110, %111
- %228 = and i1 %226, %227
- br i1 %228, label %IF2667, label %ELSE2668
+ %tmp226 = fcmp oeq float %temp292.11, %tmp110
+ %tmp227 = fcmp olt float %tmp110, %tmp111
+ %tmp228 = and i1 %tmp226, %tmp227
+ br i1 %tmp228, label %IF2667, label %ELSE2668
IF2667: ; preds = %ELSE2650
br i1 undef, label %ENDIF2594, label %ELSE2671
ELSE2668: ; preds = %ELSE2650
- %229 = fcmp oeq float %temp292.11, %128
- %230 = fcmp olt float %128, undef
- %231 = and i1 %229, %230
- br i1 %231, label %ENDIF2594, label %ELSE2686
+ %tmp229 = fcmp oeq float %temp292.11, %tmp128
+ %tmp230 = fcmp olt float %tmp128, undef
+ %tmp231 = and i1 %tmp229, %tmp230
+ br i1 %tmp231, label %ENDIF2594, label %ELSE2686
ELSE2671: ; preds = %IF2667
br label %ENDIF2594
ELSE2686: ; preds = %ELSE2668
- %232 = fcmp oeq float %temp292.11, %145
- %233 = fcmp olt float %145, undef
- %234 = and i1 %232, %233
- br i1 %234, label %ENDIF2594, label %ELSE2704
+ %tmp232 = fcmp oeq float %temp292.11, %tmp145
+ %tmp233 = fcmp olt float %tmp145, undef
+ %tmp234 = and i1 %tmp232, %tmp233
+ br i1 %tmp234, label %ENDIF2594, label %ELSE2704
ELSE2704: ; preds = %ELSE2686
- %235 = fcmp oeq float %temp292.11, %180
- %236 = fcmp olt float %180, undef
- %237 = and i1 %235, %236
- br i1 %237, label %ENDIF2594, label %ELSE2740
+ %tmp235 = fcmp oeq float %temp292.11, %tmp180
+ %tmp236 = fcmp olt float %tmp180, undef
+ %tmp237 = and i1 %tmp235, %tmp236
+ br i1 %tmp237, label %ENDIF2594, label %ELSE2740
ELSE2740: ; preds = %ELSE2704
br i1 undef, label %IF2757, label %ELSE2758
@@ -349,8 +350,8 @@ ELSE2761:
br label %ENDIF2594
IF2775: ; preds = %ELSE2758
- %238 = fcmp olt float undef, undef
- br i1 %238, label %ENDIF2594, label %ELSE2779
+ %tmp238 = fcmp olt float undef, undef
+ br i1 %tmp238, label %ENDIF2594, label %ELSE2779
ELSE2779: ; preds = %IF2775
br i1 undef, label %ENDIF2594, label %ELSE2782
@@ -359,39 +360,39 @@ ELSE2782:
br i1 undef, label %ENDIF2594, label %ELSE2785
ELSE2785: ; preds = %ELSE2782
- %239 = fcmp olt float undef, 0.000000e+00
- br i1 %239, label %ENDIF2594, label %ELSE2788
+ %tmp239 = fcmp olt float undef, 0.000000e+00
+ br i1 %tmp239, label %ENDIF2594, label %ELSE2788
ELSE2788: ; preds = %ELSE2785
- %240 = fcmp olt float 0.000000e+00, undef
- %.2848 = select i1 %240, float -1.000000e+00, float 1.000000e+00
+ %tmp240 = fcmp olt float 0.000000e+00, undef
+ %.2848 = select i1 %tmp240, float -1.000000e+00, float 1.000000e+00
br label %ENDIF2594
ELSE2797: ; preds = %ENDIF2564
- %241 = fsub float %8, undef
- %242 = fsub float %9, undef
- %243 = fsub float %10, undef
- %244 = fmul float %241, undef
- %245 = fmul float %242, undef
- %246 = fmul float %243, undef
- %247 = fsub float %11, undef
- %248 = fsub float %12, undef
- %249 = fsub float %13, undef
- %250 = fmul float %247, undef
- %251 = fmul float %248, undef
- %252 = fmul float %249, undef
- %253 = call float @llvm.minnum.f32(float %244, float %250)
- %254 = call float @llvm.minnum.f32(float %245, float %251)
- %255 = call float @llvm.maxnum.f32(float %246, float %252)
- %256 = call float @llvm.maxnum.f32(float %253, float %254)
- %257 = call float @llvm.maxnum.f32(float %256, float undef)
- %258 = call float @llvm.minnum.f32(float undef, float %255)
- %259 = fcmp ogt float %257, 0.000000e+00
- %260 = fcmp olt float %257, 1.000000e+00
- %261 = and i1 %259, %260
- %262 = fcmp olt float %257, %258
- %263 = and i1 %261, %262
- br i1 %263, label %ENDIF2795, label %ELSE2800
+ %tmp241 = fsub float %tmp8, undef
+ %tmp242 = fsub float %tmp9, undef
+ %tmp243 = fsub float %tmp10, undef
+ %tmp244 = fmul float %tmp241, undef
+ %tmp245 = fmul float %tmp242, undef
+ %tmp246 = fmul float %tmp243, undef
+ %tmp247 = fsub float %tmp11, undef
+ %tmp248 = fsub float %tmp12, undef
+ %tmp249 = fsub float %tmp13, undef
+ %tmp250 = fmul float %tmp247, undef
+ %tmp251 = fmul float %tmp248, undef
+ %tmp252 = fmul float %tmp249, undef
+ %tmp253 = call float @llvm.minnum.f32(float %tmp244, float %tmp250)
+ %tmp254 = call float @llvm.minnum.f32(float %tmp245, float %tmp251)
+ %tmp255 = call float @llvm.maxnum.f32(float %tmp246, float %tmp252)
+ %tmp256 = call float @llvm.maxnum.f32(float %tmp253, float %tmp254)
+ %tmp257 = call float @llvm.maxnum.f32(float %tmp256, float undef)
+ %tmp258 = call float @llvm.minnum.f32(float undef, float %tmp255)
+ %tmp259 = fcmp ogt float %tmp257, 0.000000e+00
+ %tmp260 = fcmp olt float %tmp257, 1.000000e+00
+ %tmp261 = and i1 %tmp259, %tmp260
+ %tmp262 = fcmp olt float %tmp257, %tmp258
+ %tmp263 = and i1 %tmp261, %tmp262
+ br i1 %tmp263, label %ENDIF2795, label %ELSE2800
ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
br label %LOOP
@@ -400,53 +401,53 @@ ELSE2800:
br i1 undef, label %ENDIF2795, label %ELSE2803
ELSE2803: ; preds = %ELSE2800
- %264 = fsub float %20, undef
- %265 = fsub float %21, undef
- %266 = fsub float %22, undef
- %267 = fmul float %264, undef
- %268 = fmul float %265, undef
- %269 = fmul float %266, 0.000000e+00
- %270 = fsub float %23, undef
- %271 = fsub float %24, undef
- %272 = fsub float %25, undef
- %273 = fmul float %270, undef
- %274 = fmul float %271, undef
- %275 = fmul float %272, undef
- %276 = call float @llvm.minnum.f32(float %267, float %273)
- %277 = call float @llvm.maxnum.f32(float %268, float %274)
- %278 = call float @llvm.maxnum.f32(float %269, float %275)
- %279 = call float @llvm.maxnum.f32(float %276, float undef)
- %280 = call float @llvm.maxnum.f32(float %279, float undef)
- %281 = call float @llvm.minnum.f32(float undef, float %277)
- %282 = call float @llvm.minnum.f32(float %281, float %278)
- %283 = fcmp ogt float %280, 0.000000e+00
- %284 = fcmp olt float %280, 1.000000e+00
- %285 = and i1 %283, %284
- %286 = fcmp olt float %280, %282
- %287 = and i1 %285, %286
- br i1 %287, label %ENDIF2795, label %ELSE2806
+ %tmp264 = fsub float %tmp20, undef
+ %tmp265 = fsub float %tmp21, undef
+ %tmp266 = fsub float %tmp22, undef
+ %tmp267 = fmul float %tmp264, undef
+ %tmp268 = fmul float %tmp265, undef
+ %tmp269 = fmul float %tmp266, 0.000000e+00
+ %tmp270 = fsub float %tmp23, undef
+ %tmp271 = fsub float %tmp24, undef
+ %tmp272 = fsub float %tmp25, undef
+ %tmp273 = fmul float %tmp270, undef
+ %tmp274 = fmul float %tmp271, undef
+ %tmp275 = fmul float %tmp272, undef
+ %tmp276 = call float @llvm.minnum.f32(float %tmp267, float %tmp273)
+ %tmp277 = call float @llvm.maxnum.f32(float %tmp268, float %tmp274)
+ %tmp278 = call float @llvm.maxnum.f32(float %tmp269, float %tmp275)
+ %tmp279 = call float @llvm.maxnum.f32(float %tmp276, float undef)
+ %tmp280 = call float @llvm.maxnum.f32(float %tmp279, float undef)
+ %tmp281 = call float @llvm.minnum.f32(float undef, float %tmp277)
+ %tmp282 = call float @llvm.minnum.f32(float %tmp281, float %tmp278)
+ %tmp283 = fcmp ogt float %tmp280, 0.000000e+00
+ %tmp284 = fcmp olt float %tmp280, 1.000000e+00
+ %tmp285 = and i1 %tmp283, %tmp284
+ %tmp286 = fcmp olt float %tmp280, %tmp282
+ %tmp287 = and i1 %tmp285, %tmp286
+ br i1 %tmp287, label %ENDIF2795, label %ELSE2806
ELSE2806: ; preds = %ELSE2803
- %288 = fsub float %26, undef
- %289 = fsub float %27, undef
- %290 = fsub float %28, undef
- %291 = fmul float %288, undef
- %292 = fmul float %289, 0.000000e+00
- %293 = fmul float %290, undef
- %294 = fsub float %29, undef
- %295 = fmul float %294, undef
- %296 = call float @llvm.minnum.f32(float %291, float %295)
- %297 = call float @llvm.minnum.f32(float %292, float undef)
- %298 = call float @llvm.maxnum.f32(float %293, float undef)
- %299 = call float @llvm.maxnum.f32(float %296, float %297)
- %300 = call float @llvm.maxnum.f32(float %299, float undef)
- %301 = call float @llvm.minnum.f32(float undef, float %298)
- %302 = fcmp ogt float %300, 0.000000e+00
- %303 = fcmp olt float %300, 1.000000e+00
- %304 = and i1 %302, %303
- %305 = fcmp olt float %300, %301
- %306 = and i1 %304, %305
- br i1 %306, label %ENDIF2795, label %ELSE2809
+ %tmp288 = fsub float %tmp26, undef
+ %tmp289 = fsub float %tmp27, undef
+ %tmp290 = fsub float %tmp28, undef
+ %tmp291 = fmul float %tmp288, undef
+ %tmp292 = fmul float %tmp289, 0.000000e+00
+ %tmp293 = fmul float %tmp290, undef
+ %tmp294 = fsub float %tmp29, undef
+ %tmp295 = fmul float %tmp294, undef
+ %tmp296 = call float @llvm.minnum.f32(float %tmp291, float %tmp295)
+ %tmp297 = call float @llvm.minnum.f32(float %tmp292, float undef)
+ %tmp298 = call float @llvm.maxnum.f32(float %tmp293, float undef)
+ %tmp299 = call float @llvm.maxnum.f32(float %tmp296, float %tmp297)
+ %tmp300 = call float @llvm.maxnum.f32(float %tmp299, float undef)
+ %tmp301 = call float @llvm.minnum.f32(float undef, float %tmp298)
+ %tmp302 = fcmp ogt float %tmp300, 0.000000e+00
+ %tmp303 = fcmp olt float %tmp300, 1.000000e+00
+ %tmp304 = and i1 %tmp302, %tmp303
+ %tmp305 = fcmp olt float %tmp300, %tmp301
+ %tmp306 = and i1 %tmp304, %tmp305
+ br i1 %tmp306, label %ENDIF2795, label %ELSE2809
ELSE2809: ; preds = %ELSE2806
br i1 undef, label %ENDIF2795, label %ELSE2812
@@ -461,53 +462,42 @@ ELSE2818:
br i1 undef, label %ENDIF2795, label %ELSE2821
ELSE2821: ; preds = %ELSE2818
- %307 = fsub float %56, undef
- %308 = fsub float %57, undef
- %309 = fsub float %58, undef
- %310 = fmul float %307, undef
- %311 = fmul float %308, 0.000000e+00
- %312 = fmul float %309, undef
- %313 = fsub float %59, undef
- %314 = fsub float %60, undef
- %315 = fsub float %61, undef
- %316 = fmul float %313, undef
- %317 = fmul float %314, undef
- %318 = fmul float %315, undef
- %319 = call float @llvm.maxnum.f32(float %310, float %316)
- %320 = call float @llvm.maxnum.f32(float %311, float %317)
- %321 = call float @llvm.maxnum.f32(float %312, float %318)
- %322 = call float @llvm.minnum.f32(float %319, float %320)
- %323 = call float @llvm.minnum.f32(float %322, float %321)
- %324 = fcmp ogt float undef, 0.000000e+00
- %325 = fcmp olt float undef, 1.000000e+00
- %326 = and i1 %324, %325
- %327 = fcmp olt float undef, %323
- %328 = and i1 %326, %327
- br i1 %328, label %ENDIF2795, label %ELSE2824
+ %tmp307 = fsub float %tmp56, undef
+ %tmp308 = fsub float %tmp57, undef
+ %tmp309 = fsub float %tmp58, undef
+ %tmp310 = fmul float %tmp307, undef
+ %tmp311 = fmul float %tmp308, 0.000000e+00
+ %tmp312 = fmul float %tmp309, undef
+ %tmp313 = fsub float %tmp59, undef
+ %tmp314 = fsub float %tmp60, undef
+ %tmp315 = fsub float %tmp61, undef
+ %tmp316 = fmul float %tmp313, undef
+ %tmp317 = fmul float %tmp314, undef
+ %tmp318 = fmul float %tmp315, undef
+ %tmp319 = call float @llvm.maxnum.f32(float %tmp310, float %tmp316)
+ %tmp320 = call float @llvm.maxnum.f32(float %tmp311, float %tmp317)
+ %tmp321 = call float @llvm.maxnum.f32(float %tmp312, float %tmp318)
+ %tmp322 = call float @llvm.minnum.f32(float %tmp319, float %tmp320)
+ %tmp323 = call float @llvm.minnum.f32(float %tmp322, float %tmp321)
+ %tmp324 = fcmp ogt float undef, 0.000000e+00
+ %tmp325 = fcmp olt float undef, 1.000000e+00
+ %tmp326 = and i1 %tmp324, %tmp325
+ %tmp327 = fcmp olt float undef, %tmp323
+ %tmp328 = and i1 %tmp326, %tmp327
+ br i1 %tmp328, label %ENDIF2795, label %ELSE2824
ELSE2824: ; preds = %ELSE2821
%.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
br label %ENDIF2795
}
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
-
-; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
-
-; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
-
-; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd.ll Tue Feb 21 18:02:21 2017
@@ -1,16 +1,16 @@
-; RUN: llc < %s -march=amdgcn -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
-; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=GCN %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
+; RUN: llc -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SIVI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=SIVI %s
; SMRD load with an immediate offset.
; GCN-LABEL: {{^}}smrd0:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
-define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
+ %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
+ %tmp1 = load i32, i32 addrspace(2)* %tmp
+ store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -18,11 +18,11 @@ entry:
; GCN-LABEL: {{^}}smrd1:
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
+ %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
+ %tmp1 = load i32, i32 addrspace(2)* %tmp
+ store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -33,11 +33,11 @@ entry:
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
-define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
+ %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
+ %tmp1 = load i32, i32 addrspace(2)* %tmp
+ store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -48,11 +48,11 @@ entry:
; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
; TODO: Add VI checks
; GCN: s_endpgm
-define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
+ %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296
+ %tmp1 = load i32, i32 addrspace(2)* %tmp
+ store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -62,11 +62,11 @@ entry:
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
-define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
+ %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
+ %tmp1 = load i32, i32 addrspace(2)* %tmp
+ store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -76,11 +76,11 @@ entry:
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
-define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
entry:
- %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
- %1 = load i32, i32 addrspace(2)* %0
- store i32 %1, i32 addrspace(1)* %out
+ %tmp = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
+ %tmp1 = load i32, i32 addrspace(2)* %tmp
+ store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
@@ -88,12 +88,12 @@ entry:
; GCN-LABEL: {{^}}smrd_load_const0:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
-define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
+define amdgpu_ps void @smrd_load_const0(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+ %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
+ %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
@@ -102,14 +102,15 @@ main_body:
; GCN-LABEL: {{^}}smrd_load_const1:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
+define amdgpu_ps void @smrd_load_const1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+ %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
+ %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1020)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
+
; SMRD load using the load.const intrinsic with an offset greater than the
; largets possible immediate.
; immediate offset.
@@ -118,12 +119,12 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
-define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
+define amdgpu_ps void @smrd_load_const2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+ %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
+ %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1024)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
@@ -133,12 +134,12 @@ main_body:
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
-define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
+define amdgpu_ps void @smrd_load_const3(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048572)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+ %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
+ %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048572)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
@@ -148,18 +149,17 @@ main_body:
; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
; GCN: s_endpgm
-define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) {
+define amdgpu_ps void @smrd_load_const4(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
- %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048576)
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+ %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
+ %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp
+ %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 1048576)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
ret void
}
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #0
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-attributes #0 = { nounwind readnone }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll Tue Feb 21 18:02:21 2017
@@ -107,7 +107,7 @@ endif:
%export = phi float [ %lds_data, %if ], [ %interp, %else ]
%tmp4 = call i32 @llvm.SI.packf16(float %export, float %export)
%tmp5 = bitcast i32 %tmp4 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp5, float %tmp5, float %tmp5, float %tmp5)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp5, float %tmp5, float %tmp5, float %tmp5, i1 true, i1 true) #0
ret void
}
@@ -205,11 +205,9 @@ ret:
ret void
}
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
-
-declare i32 @llvm.SI.packf16(float, float) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare i32 @llvm.SI.packf16(float, float) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll Tue Feb 21 18:02:21 2017
@@ -1,11 +1,11 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; FIXME: Move this to sgpr-copy.ll when this is fixed on VI.
; Make sure that when we split an smrd instruction in order to move it to
; the VALU, we are also moving its users to the VALU.
-; CHECK-LABEL: {{^}}split_smrd_add_worklist:
-; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
+; GCN-LABEL: {{^}}split_smrd_add_worklist:
+; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(2)* byval %arg) #0 {
bb:
%tmp = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
@@ -24,24 +24,20 @@ bb3:
%tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef)
- %tmp13 = bitcast i32 %tmp12 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp13, float undef, float undef)
+ %tmp13 = bitcast i32 %tmp12 to <2 x half>
+ call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp13, <2 x half> undef, i1 true, i1 true) #0
ret void
}
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
declare i32 @llvm.SI.packf16(float, float) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
-!1 = !{!"const", !3}
-!2 = !{!1, !1, i64 0}
-!3 = !{!"tbaa root"}
+!1 = !{!"const", !2}
+!2 = !{!"tbaa root"}
Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll Tue Feb 21 18:02:21 2017
@@ -1,39 +1,37 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -o - %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-; SI-LABEL:{{^}}row_filter_C1_D0:
-; SI: s_endpgm
-; Function Attrs: nounwind
+; GCN-LABEL:{{^}}row_filter_C1_D0:
define void @row_filter_C1_D0() {
entry:
br i1 undef, label %for.inc.1, label %do.body.preheader
do.body.preheader: ; preds = %entry
- %0 = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
+ %tmp = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
br i1 undef, label %do.body56.1, label %do.body90
do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader
- %1 = phi <4 x i32> [ %6, %do.body56.2 ], [ %5, %do.body56.1 ], [ %0, %do.body.preheader ]
- %2 = insertelement <4 x i32> %1, i32 undef, i32 2
- %3 = insertelement <4 x i32> %2, i32 undef, i32 3
+ %tmp1 = phi <4 x i32> [ %tmp6, %do.body56.2 ], [ %tmp5, %do.body56.1 ], [ %tmp, %do.body.preheader ]
+ %tmp2 = insertelement <4 x i32> %tmp1, i32 undef, i32 2
+ %tmp3 = insertelement <4 x i32> %tmp2, i32 undef, i32 3
br i1 undef, label %do.body124.1, label %do.body.1562.preheader
do.body.1562.preheader: ; preds = %do.body124.1, %do.body90
- %storemerge = phi <4 x i32> [ %3, %do.body90 ], [ %7, %do.body124.1 ]
- %4 = insertelement <4 x i32> undef, i32 undef, i32 1
+ %storemerge = phi <4 x i32> [ %tmp3, %do.body90 ], [ %tmp7, %do.body124.1 ]
+ %tmp4 = insertelement <4 x i32> undef, i32 undef, i32 1
br label %for.inc.1
do.body56.1: ; preds = %do.body.preheader
- %5 = insertelement <4 x i32> %0, i32 undef, i32 1
+ %tmp5 = insertelement <4 x i32> %tmp, i32 undef, i32 1
%or.cond472.1 = or i1 undef, undef
br i1 %or.cond472.1, label %do.body56.2, label %do.body90
do.body56.2: ; preds = %do.body56.1
- %6 = insertelement <4 x i32> %5, i32 undef, i32 1
+ %tmp6 = insertelement <4 x i32> %tmp5, i32 undef, i32 1
br label %do.body90
do.body124.1: ; preds = %do.body90
- %7 = insertelement <4 x i32> %3, i32 undef, i32 3
+ %tmp7 = insertelement <4 x i32> %tmp3, i32 undef, i32 3
br label %do.body.1562.preheader
for.inc.1: ; preds = %do.body.1562.preheader, %entry
@@ -42,8 +40,8 @@ for.inc.1:
unreachable
}
-; SI-LABEL: {{^}}foo:
-; SI: s_endpgm
+; GCN-LABEL: {{^}}foo:
+; GCN: s_endpgm
define amdgpu_ps void @foo() #0 {
bb:
br i1 undef, label %bb2, label %bb1
@@ -78,9 +76,9 @@ bb13:
bb14: ; preds = %bb27, %bb24, %bb9
%tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ]
%tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]
- %tmp17 = fmul float 10.5, %tmp16
- %tmp18 = fmul float 11.5, %tmp15
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp18, float %tmp17, float %tmp17, float %tmp17)
+ %tmp17 = fmul float 1.050000e+01, %tmp16
+ %tmp18 = fmul float 1.150000e+01, %tmp15
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp18, float %tmp17, float %tmp17, float %tmp17, i1 true, i1 true) #0
ret void
bb23: ; preds = %bb13
@@ -97,13 +95,8 @@ bb27:
br label %bb14
}
-; Function Attrs: nounwind readnone
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-; Function Attrs: nounwind readnone
-declare i32 @llvm.SI.packf16(float, float) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/udiv.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/udiv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/udiv.ll Tue Feb 21 18:02:21 2017
@@ -5,17 +5,19 @@
; FUNC-LABEL: {{^}}udiv_i32:
; EG-NOT: SETGE_INT
; EG: CF_END
+
+; SI: v_rcp_iflag_f32_e32
define void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1) * %in
- %b = load i32, i32 addrspace(1) * %b_ptr
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = udiv i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_udiv_i32:
-
+; SI: v_rcp_iflag_f32_e32
define void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%result = udiv i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
@@ -30,6 +32,8 @@ define void @s_udiv_i32(i32 addrspace(1)
; FUNC-LABEL: {{^}}udiv_v2i32:
; EG: CF_END
+; SI: v_rcp_iflag_f32_e32
+; SI: v_rcp_iflag_f32_e32
; SI: s_endpgm
define void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@@ -158,3 +162,21 @@ define void @scalarize_mulhu_4xi32(<4 x
store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16
ret void
}
+
+; FUNC-LABEL: {{^}}test_udiv2:
+; SI: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
+define void @test_udiv2(i32 %p) {
+ %i = udiv i32 %p, 2
+ store volatile i32 %i, i32 addrspace(1)* undef
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_udiv_3_mulhu:
+; SI: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
+; SI: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
+; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
+define void @test_udiv_3_mulhu(i32 %p) {
+ %i = udiv i32 %p, 3
+ store volatile i32 %i, i32 addrspace(1)* undef
+ ret void
+}
Removed: llvm/trunk/test/CodeGen/AMDGPU/urecip.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/urecip.ll?rev=295791&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/urecip.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/urecip.ll (removed)
@@ -1,13 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK: v_rcp_iflag_f32_e32
-
-define void @test(i32 %p, i32 %q) {
- %i = udiv i32 %p, %q
- %r = bitcast i32 %i to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
- ret void
-}
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll Tue Feb 21 18:02:21 2017
@@ -179,39 +179,39 @@ bb24:
br i1 %tmp155, label %bb156, label %bb157
bb156: ; preds = %bb24
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp12, float %tmp103, float %tmp102, float %tmp101)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %tmp99, float %tmp98, float %tmp97, float %tmp95)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %tmp94, float %tmp93, float %tmp91, float %tmp90)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %tmp89, float %tmp87, float %tmp86, float %tmp85)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %tmp83, float %tmp82, float %tmp81, float %tmp79)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %tmp78, float %tmp77, float %tmp75, float %tmp74)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %tmp73, float %tmp71, float %tmp70, float %tmp69)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %tmp67, float %tmp66, float %tmp65, float %tmp63)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %tmp62, float %tmp61, float %tmp59, float %tmp58)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %tmp57, float %tmp55, float %tmp54, float %tmp53)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %tmp51, float %tmp50, float %tmp49, float %tmp47)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 43, i32 0, float %tmp46, float %tmp45, float %tmp43, float %tmp42)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 44, i32 0, float %tmp41, float %tmp39, float %tmp38, float %tmp37)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 45, i32 0, float %tmp35, float %tmp34, float %tmp33, float %tmp31)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 46, i32 0, float %tmp30, float %tmp29, float %tmp27, float %tmp26)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 47, i32 0, float %tmp25, float %tmp28, float %tmp32, float %tmp36)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 48, i32 0, float %tmp40, float %tmp44, float %tmp48, float %tmp52)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 49, i32 0, float %tmp56, float %tmp60, float %tmp64, float %tmp68)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 50, i32 0, float %tmp72, float %tmp76, float %tmp80, float %tmp84)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 51, i32 0, float %tmp88, float %tmp92, float %tmp96, float %tmp100)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 52, i32 0, float %tmp104, float %tmp105, float %tmp106, float %tmp108)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 53, i32 0, float %tmp109, float %tmp110, float %tmp111, float %tmp112)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 54, i32 0, float %tmp113, float %tmp114, float %tmp115, float %tmp116)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 55, i32 0, float %tmp117, float %tmp118, float %tmp119, float %tmp120)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 56, i32 0, float %tmp121, float %tmp122, float %tmp123, float %tmp124)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 57, i32 0, float %tmp125, float %tmp126, float %tmp127, float %tmp128)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 58, i32 0, float %tmp129, float %tmp130, float %tmp131, float %tmp132)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 59, i32 0, float %tmp133, float %tmp134, float %tmp135, float %tmp136)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 60, i32 0, float %tmp137, float %tmp138, float %tmp139, float %tmp140)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 61, i32 0, float %tmp141, float %tmp142, float %tmp143, float %tmp144)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 62, i32 0, float %tmp145, float %tmp146, float %tmp147, float %tmp148)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float %tmp149, float %tmp150, float %tmp151, float %tmp13)
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp12, float %tmp103, float %tmp102, float %tmp101, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %tmp99, float %tmp98, float %tmp97, float %tmp95, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %tmp94, float %tmp93, float %tmp91, float %tmp90, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %tmp89, float %tmp87, float %tmp86, float %tmp85, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 36, i32 15, float %tmp83, float %tmp82, float %tmp81, float %tmp79, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 37, i32 15, float %tmp78, float %tmp77, float %tmp75, float %tmp74, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 38, i32 15, float %tmp73, float %tmp71, float %tmp70, float %tmp69, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 39, i32 15, float %tmp67, float %tmp66, float %tmp65, float %tmp63, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp62, float %tmp61, float %tmp59, float %tmp58, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 41, i32 15, float %tmp57, float %tmp55, float %tmp54, float %tmp53, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 42, i32 15, float %tmp51, float %tmp50, float %tmp49, float %tmp47, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 43, i32 15, float %tmp46, float %tmp45, float %tmp43, float %tmp42, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 44, i32 15, float %tmp41, float %tmp39, float %tmp38, float %tmp37, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 45, i32 15, float %tmp35, float %tmp34, float %tmp33, float %tmp31, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 46, i32 15, float %tmp30, float %tmp29, float %tmp27, float %tmp26, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 47, i32 15, float %tmp25, float %tmp28, float %tmp32, float %tmp36, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 48, i32 15, float %tmp40, float %tmp44, float %tmp48, float %tmp52, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 49, i32 15, float %tmp56, float %tmp60, float %tmp64, float %tmp68, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 50, i32 15, float %tmp72, float %tmp76, float %tmp80, float %tmp84, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 51, i32 15, float %tmp88, float %tmp92, float %tmp96, float %tmp100, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 52, i32 15, float %tmp104, float %tmp105, float %tmp106, float %tmp108, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 53, i32 15, float %tmp109, float %tmp110, float %tmp111, float %tmp112, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 54, i32 15, float %tmp113, float %tmp114, float %tmp115, float %tmp116, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 55, i32 15, float %tmp117, float %tmp118, float %tmp119, float %tmp120, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 56, i32 15, float %tmp121, float %tmp122, float %tmp123, float %tmp124, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 57, i32 15, float %tmp125, float %tmp126, float %tmp127, float %tmp128, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 58, i32 15, float %tmp129, float %tmp130, float %tmp131, float %tmp132, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 59, i32 15, float %tmp133, float %tmp134, float %tmp135, float %tmp136, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 60, i32 15, float %tmp137, float %tmp138, float %tmp139, float %tmp140, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 61, i32 15, float %tmp141, float %tmp142, float %tmp143, float %tmp144, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 62, i32 15, float %tmp145, float %tmp146, float %tmp147, float %tmp148, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float %tmp149, float %tmp150, float %tmp151, float %tmp13, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 true, i1 false) #0
ret void
bb157: ; preds = %bb24
@@ -482,16 +482,12 @@ bb157:
br label %bb24
}
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/wait.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wait.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wait.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wait.ll Tue Feb 21 18:02:21 2017
@@ -11,7 +11,7 @@
; DEFAULT: exp
; DEFAULT: s_waitcnt lgkmcnt(0)
; DEFAULT: s_endpgm
-define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) {
+define amdgpu_vs void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
%tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -20,8 +20,7 @@ main_body:
%tmp13 = extractelement <4 x float> %tmp11, i32 1
call void @llvm.amdgcn.s.barrier() #1
%tmp14 = extractelement <4 x float> %tmp11, i32 2
-; %tmp15 = extractelement <4 x float> %tmp11, i32 3
- %tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
+ %tmp15 = load float, float addrspace(2)* %constptr, align 4
%tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
%tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
%tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
@@ -29,8 +28,8 @@ main_body:
%tmp20 = extractelement <4 x float> %tmp18, i32 1
%tmp21 = extractelement <4 x float> %tmp18, i32 2
%tmp22 = extractelement <4 x float> %tmp18, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp12, float %tmp13, float %tmp14, float %tmp15)
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp12, float %tmp13, float %tmp14, float %tmp15, i1 true, i1 false) #0
ret void
}
@@ -44,40 +43,34 @@ main_body:
; ILPMAX: s_waitcnt vmcnt(1)
; ILPMAX: s_waitcnt vmcnt(0)
; ILPMAX: s_endpgm
-
-define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)*
-byval, i32 inreg, i32 inreg, i32, i32, i32, i32) {
+define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
main_body:
- %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
- %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
- %13 = add i32 %5, %7
- %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
- %15 = extractelement <4 x float> %14, i32 0
- %16 = extractelement <4 x float> %14, i32 1
- %17 = extractelement <4 x float> %14, i32 2
- %18 = extractelement <4 x float> %14, i32 3
- %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
- %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
- %21 = add i32 %5, %7
- %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
- %23 = extractelement <4 x float> %22, i32 0
- %24 = extractelement <4 x float> %22, i32 1
- %25 = extractelement <4 x float> %22, i32 2
- %26 = extractelement <4 x float> %22, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18)
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26)
+ %tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
+ %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
+ %tmp12 = add i32 %arg5, %arg7
+ %tmp13 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp11, i32 0, i32 %tmp12)
+ %tmp14 = extractelement <4 x float> %tmp13, i32 0
+ %tmp15 = extractelement <4 x float> %tmp13, i32 1
+ %tmp16 = extractelement <4 x float> %tmp13, i32 2
+ %tmp17 = extractelement <4 x float> %tmp13, i32 3
+ %tmp18 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 1
+ %tmp19 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp18, align 16, !tbaa !0
+ %tmp20 = add i32 %arg5, %arg7
+ %tmp21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp19, i32 0, i32 %tmp20)
+ %tmp22 = extractelement <4 x float> %tmp21, i32 0
+ %tmp23 = extractelement <4 x float> %tmp21, i32 1
+ %tmp24 = extractelement <4 x float> %tmp21, i32 2
+ %tmp25 = extractelement <4 x float> %tmp21, i32 3
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp14, float %tmp15, float %tmp16, float %tmp17, i1 true, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp22, float %tmp23, float %tmp24, float %tmp25, i1 false, i1 false) #0
ret void
}
-
-; Function Attrs: convergent nounwind
declare void @llvm.amdgcn.s.barrier() #1
-
-; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
+attributes #0 = { nounwind }
attributes #1 = { convergent nounwind }
attributes #2 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Tue Feb 21 18:02:21 2017
@@ -1,5 +1,5 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SI
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=VI %s
; Check that WQM isn't triggered by image load/store intrinsics.
;
@@ -25,9 +25,7 @@ main_body:
%c.3 = extractelement <4 x i32> %c.2, i32 0
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
%data = load float, float addrspace(1)* %gep
-
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef)
-
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %data, float undef, float undef, float undef, i1 true, i1 true) #1
ret void
}
@@ -500,7 +498,7 @@ end:
ret <4 x float> %r
}
-
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
@@ -512,8 +510,7 @@ declare <4 x float> @llvm.SI.image.sampl
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
-declare void @llvm.AMDGPU.kill(float)
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+declare void @llvm.AMDGPU.kill(float) #1
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
Modified: llvm/trunk/test/Transforms/StructurizeCFG/rebuild-ssa-infinite-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/StructurizeCFG/rebuild-ssa-infinite-loop.ll?rev=295792&r1=295791&r2=295792&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/StructurizeCFG/rebuild-ssa-infinite-loop.ll (original)
+++ llvm/trunk/test/Transforms/StructurizeCFG/rebuild-ssa-infinite-loop.ll Tue Feb 21 18:02:21 2017
@@ -6,46 +6,51 @@
target triple = "amdgcn--"
-declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2
-
-define amdgpu_vs void @wrapper(i32 inreg, i32) {
+define amdgpu_vs void @wrapper(i32 inreg %arg, i32 %arg1) {
main_body:
- %2 = add i32 %1, %0
- %3 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> undef, i32 0, i32 %2)
- %4 = extractelement <4 x float> %3, i32 1
- %5 = fptosi float %4 to i32
- %6 = insertelement <2 x i32> undef, i32 %5, i32 1
+ %tmp = add i32 %arg1, %arg
+ %tmp2 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> undef, i32 0, i32 %tmp)
+ %tmp3 = extractelement <4 x float> %tmp2, i32 1
+ %tmp4 = fptosi float %tmp3 to i32
+ %tmp5 = insertelement <2 x i32> undef, i32 %tmp4, i32 1
br label %loop11.i
loop11.i: ; preds = %endif46.i, %main_body
- %7 = phi i32 [ 0, %main_body ], [ %15, %endif46.i ]
- %8 = icmp sgt i32 %7, 999
- br i1 %8, label %main.exit, label %if16.i
+ %tmp6 = phi i32 [ 0, %main_body ], [ %tmp14, %endif46.i ]
+ %tmp7 = icmp sgt i32 %tmp6, 999
+ br i1 %tmp7, label %main.exit, label %if16.i
if16.i: ; preds = %loop11.i
- %9 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %6, <8 x i32> undef, i32 15, i1 true, i1 false, i1 false, i1 false)
- %10 = extractelement <4 x float> %9, i32 0
- %11 = fcmp ult float 0.000000e+00, %10
- br i1 %11, label %if28.i, label %endif46.i
+ %tmp8 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp5, <8 x i32> undef, i32 15, i1 true, i1 false, i1 false, i1 false)
+ %tmp9 = extractelement <4 x float> %tmp8, i32 0
+ %tmp10 = fcmp ult float 0.000000e+00, %tmp9
+ br i1 %tmp10, label %if28.i, label %endif46.i
if28.i: ; preds = %if16.i
- %12 = bitcast float %10 to i32
- %13 = shl i32 %12, 16
- %14 = bitcast i32 %13 to float
+ %tmp11 = bitcast float %tmp9 to i32
+ %tmp12 = shl i32 %tmp11, 16
+ %tmp13 = bitcast i32 %tmp12 to float
br label %main.exit
endif46.i: ; preds = %if16.i
- %15 = add i32 %7, 1
+ %tmp14 = add i32 %tmp6, 1
br label %loop11.i
main.exit: ; preds = %if28.i, %loop11.i
- %16 = phi float [ %14, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
- call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %16, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000)
+ %tmp15 = phi float [ %tmp13, %if28.i ], [ 0x36F0800000000000, %loop11.i ]
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp15, float 0.000000e+00, float 0.000000e+00, float 0x36A0000000000000, i1 false, i1 false) #0
ret void
}
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind }
+; Function Attrs: nounwind
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
+
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
More information about the llvm-commits
mailing list