[llvm] r335229 - AMDGPU: Convert test cases to the dimension-aware intrinsics
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 21 06:37:19 PDT 2018
Author: nha
Date: Thu Jun 21 06:37:19 2018
New Revision: 335229
URL: http://llvm.org/viewvc/llvm-project?rev=335229&view=rev
Log:
AMDGPU: Convert test cases to the dimension-aware intrinsics
Summary:
Also explicitly port over some tests in llvm.amdgcn.image.* that were
missing. Some tests are removed because they no longer apply (i.e.
explicitly testing building an address vector via insertelement).
This is in preparation for the eventual removal of the old-style
intrinsics.
Some additional notes:
- constant-address-space-32bit.ll: change some GCN-NEXT to GCN because
the instruction schedule was subtly altered
- insert_vector_elt.ll: the old test didn't actually test anything,
because %tmp1 was not used; remove the load, because it doesn't work
(Because of the amdgpu_ps calling convention? In any case, it's
orthogonal to what the test claims to be testing.)
Change-Id: Idfa99b6512ad139e755e82b8b89548ab08f0afcf
Reviewers: arsenm, rampitec
Subscribers: MatzeB, qcolombet, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D48018
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
Modified:
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
llvm/trunk/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll
llvm/trunk/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
llvm/trunk/test/CodeGen/AMDGPU/else.ll
llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll
llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
llvm/trunk/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll
llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
llvm/trunk/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
llvm/trunk/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
Modified: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll (original)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll Thu Jun 21 06:37:19 2018
@@ -1,105 +1,105 @@
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(
define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.add.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(
define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(
define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(
define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(
define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(
define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(
define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.and.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(
define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.or.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(
define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(
define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(
define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
-;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(
define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 {
main_body:
- %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
+ %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
%r = bitcast i32 %orig to float
ret float %r
}
@@ -112,19 +112,19 @@ main_body:
ret float %r
}
-declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.add.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.and.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.or.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32, i32, i32, <8 x i32>,i1, i1, i1) #0
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
Modified: llvm/trunk/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll Thu Jun 21 06:37:19 2018
@@ -1,13 +1,13 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}adjust_writemask_crash_0_nochain:
-; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
+; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
main_body:
- %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -17,13 +17,13 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_crash_1_nochain:
-; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
+; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
main_body:
- %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -33,13 +33,13 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_crash_0_chain:
-; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
+; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
main_body:
- %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -49,13 +49,13 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_crash_1_chain:
-; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
+; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
main_body:
- %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
main_body:
- %tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <4 x float> %tmp to <4 x i32>
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -76,9 +76,9 @@ main_body:
}
-declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll Thu Jun 21 06:37:19 2018
@@ -7,7 +7,7 @@
define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
bb:
%tmp = fptosi float %arg0 to i32
- %tmp1 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
%tmp2.f = extractelement <4 x float> %tmp1, i32 0
%tmp2 = bitcast float %tmp2.f to i32
%tmp3 = and i32 %tmp, 7
@@ -21,7 +21,7 @@ bb:
}
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/constant-address-space-32bit.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/constant-address-space-32bit.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/constant-address-space-32bit.ll Thu Jun 21 06:37:19 2018
@@ -204,7 +204,7 @@ define amdgpu_vs i32 @load_i32_hifffffff
; GCN: v_readfirstlane_b32
; GCN-NEXT: v_readfirstlane_b32
; SI: s_nop
-; GCN-NEXT: s_load_dwordx8
+; GCN: s_load_dwordx8
; GCN-NEXT: s_load_dwordx4
; GCN: image_sample
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
@@ -219,7 +219,7 @@ main_body:
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
- %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
+ %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
@@ -238,7 +238,7 @@ main_body:
; GCN: v_readfirstlane_b32
; GCN-NEXT: v_readfirstlane_b32
; SI: s_nop
-; GCN-NEXT: s_load_dwordx8
+; GCN: s_load_dwordx8
; GCN-NEXT: s_load_dwordx4
; GCN: image_sample
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
@@ -253,7 +253,7 @@ main_body:
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
- %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
+ %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
@@ -272,7 +272,7 @@ main_body:
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
!0 = !{}
Modified: llvm/trunk/test/CodeGen/AMDGPU/else.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/else.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/else.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/else.ll Thu Jun 21 06:37:19 2018
@@ -44,7 +44,7 @@ if:
else:
%c = fmul float %v, 3.0
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%v.else = extractelement <4 x float> %tex, i32 0
br label %end
@@ -55,7 +55,7 @@ end:
}
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind writeonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/image-schedule.ll Thu Jun 21 06:37:19 2018
@@ -25,17 +25,18 @@ define dllexport amdgpu_cs void @_amdgpu
%tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 32
%tmp11 = bitcast i8 addrspace(4)* %tmp10 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
%tmp12 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp11, align 16
- %tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> <i32 0, i32 1>
- %tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0
+ %tmp13.0 = extractelement <3 x i32> %tmp9, i32 0
+ %tmp13.1 = extractelement <3 x i32> %tmp9, i32 1
+ %tmp14 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp12, i32 0, i32 0) #0
%tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(4)*
%tmp16 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
- call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0
+ call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp14, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp16, i32 0, i32 0) #0
%tmp17 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
- %tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0
+ %tmp18 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 165, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp17, i32 0, i32 0) #0
%tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 64
%tmp20 = bitcast i8 addrspace(4)* %tmp19 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
%tmp21 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp20, align 16
- call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0
+ call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp18, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp21, i32 0, i32 0) #0
ret void
}
@@ -43,10 +44,10 @@ define dllexport amdgpu_cs void @_amdgpu
declare i64 @llvm.amdgcn.s.getpc() #1
; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #2
; Function Attrs: nounwind writeonly
-declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3
+declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll Thu Jun 21 06:37:19 2018
@@ -75,10 +75,9 @@ define amdgpu_kernel void @insertelement
; GCN-LABEL: {{^}}insertelement_to_sgpr:
; GCN-NOT: v_readfirstlane
-define amdgpu_ps <4 x float> @insertelement_to_sgpr() nounwind {
- %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef
- %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
- %tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true)
+define amdgpu_ps <4 x float> @insertelement_to_sgpr(<4 x i32> inreg %samp) nounwind {
+ %tmp1 = insertelement <4 x i32> %samp, i32 0, i32 0
+ %tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0)
ret <4 x float> %tmp2
}
@@ -474,7 +473,7 @@ define amdgpu_kernel void @dynamic_inser
ret void
}
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll Thu Jun 21 06:37:19 2018
@@ -1,3 +1,4 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}atomic_swap_1d:
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll Thu Jun 21 06:37:19 2018
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; GCN-LABEL: {{^}}load_1d:
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
@@ -370,6 +370,46 @@ main_body:
ret void
}
+; GCN-LABEL: {{^}}getresinfo_dmask0:
+; GCN-NOT: image
+; GCN: ; return to shader part epilog
+define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+ ret <4 x float> %r
+}
+
+; Ideally, the register allocator would avoid the wait here
+;
+; GCN-LABEL: {{^}}image_store_wait:
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; SI: s_waitcnt expcnt(0)
+; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
+; GCN: s_waitcnt vmcnt(0)
+; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
+define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
+main_body:
+ call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
+ %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
+ call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
+ ret void
+}
+
+; SI won't merge ds memory operations, because of the signed offset bug, so
+; we only have check lines for VI.
+; VI-LABEL: image_load_mmo
+; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
+; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
+define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
+ store float 0.000000e+00, float addrspace(3)* %lds
+ %c0 = extractelement <2 x i32> %c, i32 0
+ %c1 = extractelement <2 x i32> %c, i32 1
+ %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
+ %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
+ store float 0.000000e+00, float addrspace(3)* %tmp2
+ ret float %tex
+}
+
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
@@ -412,6 +452,7 @@ declare <4 x float> @llvm.amdgcn.image.g
declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll Thu Jun 21 06:37:19 2018
@@ -1,3 +1,4 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; GCN-LABEL: {{^}}gather4_2d:
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll?rev=335229&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll Thu Jun 21 06:37:19 2018
@@ -0,0 +1,118 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+; GCN-LABEL: {{^}}gather4_o_2d:
+; GCN: image_gather4_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_o_2d:
+; GCN: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_cl_o_2d:
+; GCN: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_cl_o_2d:
+; GCN: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_b_o_2d:
+; GCN: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_b_o_2d:
+; GCN: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_b_cl_o_2d:
+; GCN: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_b_cl_o_2d:
+; GCN: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_l_o_2d:
+; GCN: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_l_o_2d:
+; GCN: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_lz_o_2d:
+; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_lz_o_2d:
+; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll Thu Jun 21 06:37:19 2018
@@ -1,3 +1,4 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; GCN-LABEL: {{^}}sample_1d:
@@ -400,6 +401,95 @@ main_body:
ret <4 x float> %v
}
+; GCN-LABEL: {{^}}adjust_writemask_sample_0:
+; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %elt0 = extractelement <4 x float> %r, i32 0
+ ret float %elt0
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_01
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3{{$}}
+define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x float> %out
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_012
+; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7{{$}}
+define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x float> %out
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_12
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
+define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+ ret <2 x float> %out
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_03
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9{{$}}
+define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
+ ret <2 x float> %out
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_13
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
+define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
+ ret <2 x float> %out
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_123
+; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe{{$}}
+define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+ ret <3 x float> %out
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled
+; GCN-NOT: image
+define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %r
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
+define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x float> %out
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
+define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+ ret <2 x float> %out
+}
+
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll?rev=335229&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll Thu Jun 21 06:37:19 2018
@@ -0,0 +1,371 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+; GCN-LABEL: {{^}}sample_o_1d:
+; GCN: image_sample_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_o_2d:
+; GCN: image_sample_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_o_1d:
+; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_o_2d:
+; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cl_o_1d:
+; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cl_o_2d:
+; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cl_o_1d:
+; GCN: image_sample_c_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cl_o_2d:
+; GCN: image_sample_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_o_1d:
+; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_o_2d:
+; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_o_1d:
+; GCN: image_sample_c_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_o_2d:
+; GCN: image_sample_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_cl_o_1d:
+; GCN: image_sample_b_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_b_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_cl_o_2d:
+; GCN: image_sample_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_cl_o_1d:
+; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_cl_o_2d:
+; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_o_1d:
+; GCN: image_sample_d_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_d_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_o_2d:
+; GCN: image_sample_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_o_1d:
+; GCN: image_sample_c_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_o_2d:
+; GCN: image_sample_c_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_cl_o_1d:
+; GCN: image_sample_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_cl_o_2d:
+; GCN: image_sample_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_cl_o_1d:
+; GCN: image_sample_c_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_cl_o_2d:
+; GCN: image_sample_c_d_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_o_1d:
+; GCN: image_sample_cd_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cd_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_o_2d:
+; GCN: image_sample_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_o_1d:
+; GCN: image_sample_c_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_o_2d:
+; GCN: image_sample_c_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_cl_o_1d:
+; GCN: image_sample_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_cl_o_2d:
+; GCN: image_sample_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_cl_o_1d:
+; GCN: image_sample_c_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_cl_o_2d:
+; GCN: image_sample_c_cd_cl_o v[0:3], v[0:15], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_o_1d:
+; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_o_2d:
+; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_o_1d:
+; GCN: image_sample_c_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_o_2d:
+; GCN: image_sample_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_lz_o_1d:
+; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_lz_o_2d:
+; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_lz_o_1d:
+; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_lz_o_2d:
+; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll Thu Jun 21 06:37:19 2018
@@ -26,7 +26,7 @@ define amdgpu_ps float @test2() #0 {
%live = call i1 @llvm.amdgcn.ps.live()
%live.32 = zext i1 %live to i32
%live.32.bc = bitcast i32 %live.32 to float
- %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%r = extractelement <4 x float> %t, i32 0
ret float %r
}
@@ -49,13 +49,13 @@ dead:
end:
%tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ]
%tc.bc = bitcast i32 %tc to float
- %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %tc.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
%r = extractelement <4 x float> %t, i32 0
ret float %r
}
declare i1 @llvm.amdgcn.ps.live() #1
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll Thu Jun 21 06:37:19 2018
@@ -8,9 +8,9 @@
; CHECK-NEXT: image_store
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> %d1, i32 %c0, i32 %c1) {
- call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)
+ call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d0, i32 15, i32 %c0, <8 x i32> %rsrc, i32 0, i32 0)
call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00
- call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)
+ call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d1, i32 15, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
ret void
}
@@ -24,17 +24,17 @@ define amdgpu_ps void @test1(<8 x i32> i
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0){{$}}
; CHECK-NEXT: image_store
define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) {
- %t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ %t = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00
%c.1 = mul i32 %c, 2
- call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %t, i32 15, i32 %c.1, <8 x i32> %rsrc, i32 0, i32 0)
ret void
}
declare void @llvm.amdgcn.s.waitcnt(i32) #0
-declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
-declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll Thu Jun 21 06:37:19 2018
@@ -10,8 +10,8 @@ target triple = "amdgcn--amdpal"
define dllexport amdgpu_ps <2 x float> @_amdgpu_ps_main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, <2 x float>, <2 x float>, <2 x float>, <3 x float>, <2 x float>, <2 x float>, <2 x float>, float, float, float, float, float, i32, i32, i32, i32) local_unnamed_addr {
.entry:
- %res = call <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %res = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
ret <2 x float> %res
}
-declare <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
+declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Thu Jun 21 06:37:19 2018
@@ -81,13 +81,8 @@ main_body:
%j.f.i4 = bitcast i32 %j.i2 to float
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
- %tmp45 = bitcast float %p2.i to i32
- %tmp46 = bitcast float %p2.i24 to i32
- %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
- %tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
%tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
- %a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
- %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i24, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i1 0, i32 0, i32 0)
%tmp50 = extractelement <4 x float> %tmp1, i32 2
%tmp51 = call float @llvm.fabs.f32(float %tmp50)
%tmp52 = fmul float %p2.i18, %p2.i18
@@ -240,14 +235,14 @@ entry:
br i1 %tmp27, label %if, label %else
if: ; preds = %entry
- %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36D6000000000000, float 0x36DA000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36D6000000000000, float 0x36DA000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0)
%val.if.0 = extractelement <4 x float> %tmp1, i32 0
%val.if.1 = extractelement <4 x float> %tmp1, i32 1
%val.if.2 = extractelement <4 x float> %tmp1, i32 2
br label %endif
else: ; preds = %entry
- %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36C4000000000000, float 0x36CC000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36C4000000000000, float 0x36CC000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0)
%val.else.0 = extractelement <4 x float> %tmp2, i32 0
%val.else.1 = extractelement <4 x float> %tmp2, i32 1
%val.else.2 = extractelement <4 x float> %tmp2, i32 2
@@ -352,24 +347,18 @@ bb:
br i1 %tmp36, label %bb38, label %bb80
bb38: ; preds = %bb
- %tmp52 = bitcast float %p2.i to i32
- %tmp53 = bitcast float %p2.i6 to i32
- %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
- %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
%tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32>
- %a.bc.i = bitcast <2 x i32> %tmp55 to <2 x float>
- %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp56, <4 x i32> %tmp28, i1 0, i32 0, i32 0)
br label %bb71
bb80: ; preds = %bb
%tmp81 = bitcast float %p2.i to i32
%tmp82 = bitcast float %p2.i6 to i32
%tmp82.2 = add i32 %tmp82, 1
- %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
- %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
+ %tmp83 = bitcast i32 %tmp81 to float
+ %tmp84 = bitcast i32 %tmp82.2 to float
%tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32>
- %a.bc.i1 = bitcast <2 x i32> %tmp84 to <2 x float>
- %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp83, float %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i1 0, i32 0, i32 0)
br label %bb71
bb71: ; preds = %bb80, %bb38
@@ -387,7 +376,7 @@ bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
- %tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@@ -402,7 +391,7 @@ bb:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
%tmp8 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp7, align 16, !tbaa !0
- %tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> undef, <4 x i32> %tmp8, i1 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@@ -419,7 +408,7 @@ declare float @llvm.amdgcn.interp.p2(flo
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll Thu Jun 21 06:37:19 2018
@@ -34,14 +34,9 @@ main_body:
%j.f.i4 = bitcast i32 %j.i2 to float
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
- %tmp27 = bitcast float %p2.i to i32
- %tmp28 = bitcast float %p2.i6 to i32
- %tmp29 = insertelement <2 x i32> undef, i32 %tmp27, i32 0
- %tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1
%tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
%tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
- %tmp30.bc = bitcast <2 x i32> %tmp30 to <2 x float>
- %tmp31 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp30.bc, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp31 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i1 0, i32 0, i32 0)
%tmp32 = extractelement <4 x float> %tmp31, i32 0
%tmp33 = extractelement <4 x float> %tmp31, i32 1
@@ -57,7 +52,7 @@ declare float @llvm.amdgcn.interp.p1(flo
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Thu Jun 21 06:37:19 2018
@@ -324,14 +324,6 @@ main_body:
%tmp213 = fmul float %tmp205, %tmp191
%tmp214 = fmul float %tmp206, %tmp191
%tmp215 = fmul float -1.000000e+00, %tmp191
- %tmp216 = bitcast float %tmp135 to i32
- %tmp217 = bitcast float %tmp181 to i32
- %tmp218 = bitcast float %tmp136 to i32
- %tmp219 = bitcast float %tmp182 to i32
- %tmp220 = insertelement <8 x i32> undef, i32 %tmp216, i32 0
- %tmp221 = insertelement <8 x i32> %tmp220, i32 %tmp217, i32 1
- %tmp222 = insertelement <8 x i32> %tmp221, i32 %tmp218, i32 2
- %tmp223 = insertelement <8 x i32> %tmp222, i32 %tmp219, i32 3
br label %LOOP
LOOP: ; preds = %ENDIF, %main_body
@@ -358,14 +350,7 @@ IF:
br label %LOOP65
ENDIF: ; preds = %LOOP
- %tmp237 = bitcast float %temp28.0 to i32
- %tmp238 = bitcast float %temp29.0 to i32
- %tmp239 = insertelement <8 x i32> %tmp223, i32 %tmp237, i32 4
- %tmp240 = insertelement <8 x i32> %tmp239, i32 %tmp238, i32 5
- %tmp241 = insertelement <8 x i32> %tmp240, i32 undef, i32 6
- %tmp242 = insertelement <8 x i32> %tmp241, i32 undef, i32 7
- %tmp242.bc = bitcast <8 x i32> %tmp242 to <8 x float>
- %tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp242.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.0, float %temp29.0, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
%tmp244 = extractelement <4 x float> %tmp243, i32 3
%tmp245 = fcmp oge float %temp30.0, %tmp244
%tmp246 = sext i1 %tmp245 to i32
@@ -396,65 +381,20 @@ LOOP65:
br i1 %tmp262, label %IF67, label %ENDIF66
IF67: ; preds = %LOOP65
- %tmp263 = bitcast float %tmp135 to i32
- %tmp264 = bitcast float %tmp181 to i32
- %tmp265 = bitcast float %tmp136 to i32
- %tmp266 = bitcast float %tmp182 to i32
- %tmp267 = bitcast float %temp28.1 to i32
- %tmp268 = bitcast float %temp29.1 to i32
- %tmp269 = insertelement <8 x i32> undef, i32 %tmp263, i32 0
- %tmp270 = insertelement <8 x i32> %tmp269, i32 %tmp264, i32 1
- %tmp271 = insertelement <8 x i32> %tmp270, i32 %tmp265, i32 2
- %tmp272 = insertelement <8 x i32> %tmp271, i32 %tmp266, i32 3
- %tmp273 = insertelement <8 x i32> %tmp272, i32 %tmp267, i32 4
- %tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5
- %tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
- %tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
%tmp67.bc = bitcast <4 x i32> %tmp67 to <4 x i32>
- %tmp276.bc = bitcast <8 x i32> %tmp276 to <8 x float>
- %tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp276.bc, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i1 0, i32 0, i32 0)
%tmp278 = extractelement <4 x float> %tmp277, i32 0
%tmp279 = extractelement <4 x float> %tmp277, i32 1
%tmp280 = extractelement <4 x float> %tmp277, i32 2
%tmp281 = extractelement <4 x float> %tmp277, i32 3
%tmp282 = fmul float %tmp281, %tmp46
- %tmp283 = bitcast float %tmp135 to i32
- %tmp284 = bitcast float %tmp181 to i32
- %tmp285 = bitcast float %tmp136 to i32
- %tmp286 = bitcast float %tmp182 to i32
- %tmp287 = bitcast float %temp28.1 to i32
- %tmp288 = bitcast float %temp29.1 to i32
- %tmp289 = insertelement <8 x i32> undef, i32 %tmp283, i32 0
- %tmp290 = insertelement <8 x i32> %tmp289, i32 %tmp284, i32 1
- %tmp291 = insertelement <8 x i32> %tmp290, i32 %tmp285, i32 2
- %tmp292 = insertelement <8 x i32> %tmp291, i32 %tmp286, i32 3
- %tmp293 = insertelement <8 x i32> %tmp292, i32 %tmp287, i32 4
- %tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5
- %tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
- %tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
%tmp83.bc = bitcast <4 x i32> %tmp83 to <4 x i32>
- %tmp296.bc = bitcast <8 x i32> %tmp296 to <8 x float>
- %tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp296.bc, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i1 0, i32 0, i32 0)
%tmp298 = extractelement <4 x float> %tmp297, i32 0
%tmp299 = extractelement <4 x float> %tmp297, i32 1
%tmp300 = extractelement <4 x float> %tmp297, i32 2
- %tmp301 = bitcast float %tmp135 to i32
- %tmp302 = bitcast float %tmp181 to i32
- %tmp303 = bitcast float %tmp136 to i32
- %tmp304 = bitcast float %tmp182 to i32
- %tmp305 = bitcast float %temp28.1 to i32
- %tmp306 = bitcast float %temp29.1 to i32
- %tmp307 = insertelement <8 x i32> undef, i32 %tmp301, i32 0
- %tmp308 = insertelement <8 x i32> %tmp307, i32 %tmp302, i32 1
- %tmp309 = insertelement <8 x i32> %tmp308, i32 %tmp303, i32 2
- %tmp310 = insertelement <8 x i32> %tmp309, i32 %tmp304, i32 3
- %tmp311 = insertelement <8 x i32> %tmp310, i32 %tmp305, i32 4
- %tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5
- %tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
- %tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
%tmp79.bc = bitcast <4 x i32> %tmp79 to <4 x i32>
- %tmp314.bc = bitcast <8 x i32> %tmp314 to <8 x float>
- %tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp314.bc, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i1 0, i32 0, i32 0)
%tmp316 = extractelement <4 x float> %tmp315, i32 0
%tmp317 = extractelement <4 x float> %tmp315, i32 1
%tmp318 = extractelement <4 x float> %tmp315, i32 2
@@ -470,22 +410,7 @@ IF67:
%tmp328 = fadd float %tmp278, %tmp323
%tmp329 = fadd float %tmp279, %tmp325
%tmp330 = fadd float %tmp280, %tmp327
- %tmp331 = bitcast float %tmp135 to i32
- %tmp332 = bitcast float %tmp181 to i32
- %tmp333 = bitcast float %tmp136 to i32
- %tmp334 = bitcast float %tmp182 to i32
- %tmp335 = bitcast float %temp28.1 to i32
- %tmp336 = bitcast float %temp29.1 to i32
- %tmp337 = insertelement <8 x i32> undef, i32 %tmp331, i32 0
- %tmp338 = insertelement <8 x i32> %tmp337, i32 %tmp332, i32 1
- %tmp339 = insertelement <8 x i32> %tmp338, i32 %tmp333, i32 2
- %tmp340 = insertelement <8 x i32> %tmp339, i32 %tmp334, i32 3
- %tmp341 = insertelement <8 x i32> %tmp340, i32 %tmp335, i32 4
- %tmp342 = insertelement <8 x i32> %tmp341, i32 %tmp336, i32 5
- %tmp343 = insertelement <8 x i32> %tmp342, i32 undef, i32 6
- %tmp344 = insertelement <8 x i32> %tmp343, i32 undef, i32 7
- %tmp344.bc = bitcast <8 x i32> %tmp344 to <8 x float>
- %tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp344.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
%tmp346 = extractelement <4 x float> %tmp345, i32 0
%tmp347 = extractelement <4 x float> %tmp345, i32 1
%tmp348 = extractelement <4 x float> %tmp345, i32 2
@@ -501,23 +426,8 @@ IF67:
%tmp358 = fmul float %tmp349, %tmp357
%tmp359 = fmul float %tmp350, %tmp357
%tmp360 = fmul float %tmp351, %tmp357
- %tmp361 = bitcast float %tmp135 to i32
- %tmp362 = bitcast float %tmp181 to i32
- %tmp363 = bitcast float %tmp136 to i32
- %tmp364 = bitcast float %tmp182 to i32
- %tmp365 = bitcast float %temp28.1 to i32
- %tmp366 = bitcast float %temp29.1 to i32
- %tmp367 = insertelement <8 x i32> undef, i32 %tmp361, i32 0
- %tmp368 = insertelement <8 x i32> %tmp367, i32 %tmp362, i32 1
- %tmp369 = insertelement <8 x i32> %tmp368, i32 %tmp363, i32 2
- %tmp370 = insertelement <8 x i32> %tmp369, i32 %tmp364, i32 3
- %tmp371 = insertelement <8 x i32> %tmp370, i32 %tmp365, i32 4
- %tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5
- %tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
- %tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
%tmp71.bc = bitcast <4 x i32> %tmp71 to <4 x i32>
- %tmp374.bc = bitcast <8 x i32> %tmp374 to <8 x float>
- %tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp374.bc, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i1 0, i32 0, i32 0)
%tmp376 = extractelement <4 x float> %tmp375, i32 0
%tmp377 = extractelement <4 x float> %tmp375, i32 1
%tmp378 = extractelement <4 x float> %tmp375, i32 2
@@ -557,23 +467,8 @@ IF67:
%tmp412 = fadd float %tmp411, %tmp406
%tmp413 = fmul float %tmp399, %p2.i24
%tmp414 = fadd float %tmp413, %tmp408
- %tmp415 = bitcast float %tmp135 to i32
- %tmp416 = bitcast float %tmp181 to i32
- %tmp417 = bitcast float %tmp136 to i32
- %tmp418 = bitcast float %tmp182 to i32
- %tmp419 = bitcast float %temp28.1 to i32
- %tmp420 = bitcast float %temp29.1 to i32
- %tmp421 = insertelement <8 x i32> undef, i32 %tmp415, i32 0
- %tmp422 = insertelement <8 x i32> %tmp421, i32 %tmp416, i32 1
- %tmp423 = insertelement <8 x i32> %tmp422, i32 %tmp417, i32 2
- %tmp424 = insertelement <8 x i32> %tmp423, i32 %tmp418, i32 3
- %tmp425 = insertelement <8 x i32> %tmp424, i32 %tmp419, i32 4
- %tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5
- %tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
- %tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
%tmp87.bc = bitcast <4 x i32> %tmp87 to <4 x i32>
- %tmp428.bc = bitcast <8 x i32> %tmp428 to <8 x float>
- %tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp428.bc, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i1 0, i32 0, i32 0)
%tmp430 = extractelement <4 x float> %tmp429, i32 0
%tmp431 = extractelement <4 x float> %tmp429, i32 1
%tmp432 = extractelement <4 x float> %tmp429, i32 2
@@ -617,16 +512,8 @@ IF67:
%tmp460 = fadd float %tmp459, 1.500000e+00
%tmp461 = fmul float %tmp454, %tmp458
%tmp462 = fadd float %tmp461, 1.500000e+00
- %tmp463 = bitcast float %tmp462 to i32
- %tmp464 = bitcast float %tmp460 to i32
- %tmp465 = bitcast float %tmp456 to i32
- %tmp466 = insertelement <4 x i32> undef, i32 %tmp463, i32 0
- %tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1
- %tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
- %tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
%tmp91.bc = bitcast <4 x i32> %tmp91 to <4 x i32>
- %tmp469.bc = bitcast <4 x i32> %tmp469 to <4 x float>
- %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp469.bc, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %tmp462, float %tmp460, float %tmp456, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i1 0, i32 0, i32 0) #0
%tmp471 = extractelement <4 x float> %tmp470, i32 0
%tmp472 = extractelement <4 x float> %tmp470, i32 1
%tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -713,23 +600,8 @@ IF67:
%tmp554 = fadd float %tmp553, %tmp549
%tmp555 = fmul float %tmp547, %tmp58
%tmp556 = fadd float %tmp555, %tmp550
- %tmp557 = bitcast float %tmp135 to i32
- %tmp558 = bitcast float %tmp181 to i32
- %tmp559 = bitcast float %tmp136 to i32
- %tmp560 = bitcast float %tmp182 to i32
- %tmp561 = bitcast float %temp28.1 to i32
- %tmp562 = bitcast float %temp29.1 to i32
- %tmp563 = insertelement <8 x i32> undef, i32 %tmp557, i32 0
- %tmp564 = insertelement <8 x i32> %tmp563, i32 %tmp558, i32 1
- %tmp565 = insertelement <8 x i32> %tmp564, i32 %tmp559, i32 2
- %tmp566 = insertelement <8 x i32> %tmp565, i32 %tmp560, i32 3
- %tmp567 = insertelement <8 x i32> %tmp566, i32 %tmp561, i32 4
- %tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5
- %tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
- %tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
%tmp75.bc = bitcast <4 x i32> %tmp75 to <4 x i32>
- %tmp570.bc = bitcast <8 x i32> %tmp570 to <8 x float>
- %tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp570.bc, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i1 0, i32 0, i32 0)
%tmp572 = extractelement <4 x float> %tmp571, i32 0
%tmp573 = extractelement <4 x float> %tmp571, i32 1
%tmp574 = extractelement <4 x float> %tmp571, i32 2
@@ -745,14 +617,7 @@ IF67:
ret void
ENDIF66: ; preds = %LOOP65
- %tmp585 = bitcast float %temp28.1 to i32
- %tmp586 = bitcast float %temp29.1 to i32
- %tmp587 = insertelement <8 x i32> %tmp236, i32 %tmp585, i32 4
- %tmp588 = insertelement <8 x i32> %tmp587, i32 %tmp586, i32 5
- %tmp589 = insertelement <8 x i32> %tmp588, i32 undef, i32 6
- %tmp590 = insertelement <8 x i32> %tmp589, i32 undef, i32 7
- %tmp590.bc = bitcast <8 x i32> %tmp590 to <8 x float>
- %tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp590.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %tmp135, float %tmp181, float %tmp136, float %tmp182, float %temp28.1, float %temp29.1, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i1 0, i32 0, i32 0)
%tmp592 = extractelement <4 x float> %tmp591, i32 3
%tmp593 = fcmp oge float %temp30.1, %tmp592
%tmp594 = sext i1 %tmp593 to i32
@@ -1140,13 +1005,8 @@ main_body:
%tmp218 = fmul float %., %tmp53
%tmp219 = fmul float %arg13, %tmp46
%tmp220 = fmul float %tmp196, %tmp47
- %tmp221 = bitcast float %p2.i132 to i32
- %tmp222 = bitcast float %p2.i126 to i32
- %tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
- %tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
%tmp132.bc = bitcast <4 x i32> %tmp132 to <4 x i32>
- %tmp224.bc = bitcast <2 x i32> %tmp224 to <2 x float>
- %tmp225 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp224.bc, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp225 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i132, float %p2.i126, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i1 0, i32 0, i32 0)
%tmp226 = extractelement <4 x float> %tmp225, i32 0
%tmp227 = extractelement <4 x float> %tmp225, i32 1
%tmp228 = extractelement <4 x float> %tmp225, i32 2
@@ -1220,7 +1080,7 @@ LOOP:
%tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
%tmp148.bc = bitcast <4 x i32> %tmp148 to <4 x i32>
%tmp281.bc = bitcast <4 x i32> %tmp281 to <4 x float>
- %tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp281.bc, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %temp168.0, float %temp169.0, float 0.0, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i1 0, i32 0, i32 0)
%tmp283 = extractelement <4 x float> %tmp282, i32 3
%tmp284 = fadd float %temp168.0, %tmp273
%tmp285 = fadd float %temp169.0, %tmp274
@@ -1279,13 +1139,8 @@ IF189:
%tmp335 = fadd float %p2.i162, %tmp329
%tmp336 = fadd float %p2.i156, %tmp331
%tmp337 = fadd float %p2.i150, %tmp333
- %tmp338 = bitcast float %tmp334 to i32
- %tmp339 = bitcast float %tmp335 to i32
- %tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
- %tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
%tmp136.bc = bitcast <4 x i32> %tmp136 to <4 x i32>
- %a.bc.i = bitcast <2 x i32> %tmp341 to <2 x float>
- %tmp0 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp0 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp334, float %tmp335, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i1 0, i32 0, i32 0)
%tmp343 = extractelement <4 x float> %tmp0, i32 0
%tmp344 = extractelement <4 x float> %tmp0, i32 1
%tmp345 = extractelement <4 x float> %tmp0, i32 2
@@ -1313,25 +1168,15 @@ IF189:
%one.sub.ac.i30 = fmul float %one.sub.a.i29, %tmp353
%mul.i31 = fmul float %tmp345, %tmp353
%result.i32 = fadd float %mul.i31, %one.sub.ac.i30
- %tmp358 = bitcast float %tmp336 to i32
- %tmp359 = bitcast float %tmp337 to i32
- %tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
- %tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
%tmp152.bc = bitcast <4 x i32> %tmp152 to <4 x i32>
- %a.bc.i3 = bitcast <2 x i32> %tmp361 to <2 x float>
- %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i3, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp336, float %tmp337, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i1 0, i32 0, i32 0)
%tmp363 = extractelement <4 x float> %tmp1, i32 2
%tmp364 = fmul float %result.i40, %result.i
%tmp365 = fmul float %result.i36, %result.i44
%tmp366 = fmul float %result.i32, %result.i42
%tmp367 = fmul float %tmp354, %tmp229
- %tmp368 = bitcast float %tmp310 to i32
- %tmp369 = bitcast float %tmp311 to i32
- %tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
- %tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
%tmp140.bc = bitcast <4 x i32> %tmp140 to <4 x i32>
- %a.bc.i2 = bitcast <2 x i32> %tmp371 to <2 x float>
- %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i2, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp310, float %tmp311, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i1 0, i32 0, i32 0)
%tmp373 = extractelement <4 x float> %tmp2, i32 0
%tmp374 = extractelement <4 x float> %tmp2, i32 1
%tmp375 = extractelement <4 x float> %tmp2, i32 2
@@ -1343,13 +1188,8 @@ IF189:
%tmp381 = icmp ne i32 %tmp380, 0
%.224 = select i1 %tmp381, float %tmp374, float %tmp373
%.225 = select i1 %tmp381, float %tmp376, float %tmp374
- %tmp382 = bitcast float %tmp320 to i32
- %tmp383 = bitcast float %tmp321 to i32
- %tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
- %tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
%tmp144.bc = bitcast <4 x i32> %tmp144 to <4 x i32>
- %a.bc.i1 = bitcast <2 x i32> %tmp385 to <2 x float>
- %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp320, float %tmp321, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i1 0, i32 0, i32 0)
%tmp387 = extractelement <4 x float> %tmp3, i32 0
%tmp388 = extractelement <4 x float> %tmp3, i32 1
%tmp389 = extractelement <4 x float> %tmp3, i32 2
@@ -1442,13 +1282,8 @@ ENDIF197:
%temp14.0 = phi float [ %tmp465, %IF198 ], [ %tmp457, %IF189 ]
%temp13.0 = phi float [ %tmp464, %IF198 ], [ %tmp456, %IF189 ]
%temp12.0 = phi float [ %tmp463, %IF198 ], [ %tmp455, %IF189 ]
- %tmp466 = bitcast float %tmp219 to i32
- %tmp467 = bitcast float %tmp220 to i32
- %tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
- %tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
%tmp160.bc = bitcast <4 x i32> %tmp160 to <4 x i32>
- %tmp469.bc = bitcast <2 x i32> %tmp469 to <2 x float>
- %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp469.bc, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp219, float %tmp220, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i1 0, i32 0, i32 0)
%tmp471 = extractelement <4 x float> %tmp470, i32 0
%tmp472 = extractelement <4 x float> %tmp470, i32 1
%tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -1461,13 +1296,8 @@ ENDIF197:
%tmp480 = fadd float %tmp479, %tmp40
%tmp481 = fmul float %tmp474, %tmp41
%tmp482 = fadd float %tmp481, %tmp42
- %tmp483 = bitcast float %p2.i144 to i32
- %tmp484 = bitcast float %p2.i138 to i32
- %tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
- %tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
%tmp156.bc = bitcast <4 x i32> %tmp156 to <4 x i32>
- %tmp486.bc = bitcast <2 x i32> %tmp486 to <2 x float>
- %tmp487 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp486.bc, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp487 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i144, float %p2.i138, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i1 0, i32 0, i32 0)
%tmp488 = extractelement <4 x float> %tmp487, i32 0
%tmp489 = extractelement <4 x float> %tmp487, i32 1
%tmp490 = extractelement <4 x float> %tmp487, i32 2
@@ -1667,27 +1497,11 @@ ENDIF209:
%tmp651 = fadd float %tmp650, 1.000000e+00
%max.0.i11 = call float @llvm.maxnum.f32(float %tmp651, float 0.000000e+00)
%clamp.i12 = call float @llvm.minnum.f32(float %max.0.i11, float 1.000000e+00)
- %tmp653 = bitcast float %tmp642 to i32
- %tmp654 = bitcast float %tmp644 to i32
- %tmp655 = bitcast float 0.000000e+00 to i32
- %tmp656 = insertelement <4 x i32> undef, i32 %tmp653, i32 0
- %tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1
- %tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
- %tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
%tmp128.bc = bitcast <4 x i32> %tmp128 to <4 x i32>
- %tmp659.bc = bitcast <4 x i32> %tmp659 to <4 x float>
- %tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp659.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %tmp642, float %tmp644, float 0.0, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i1 0, i32 0, i32 0)
%tmp661 = extractelement <4 x float> %tmp660, i32 0
%tmp662 = extractelement <4 x float> %tmp660, i32 1
- %tmp663 = bitcast float %tmp646 to i32
- %tmp664 = bitcast float %tmp648 to i32
- %tmp665 = bitcast float 0.000000e+00 to i32
- %tmp666 = insertelement <4 x i32> undef, i32 %tmp663, i32 0
- %tmp667 = insertelement <4 x i32> %tmp666, i32 %tmp664, i32 1
- %tmp668 = insertelement <4 x i32> %tmp667, i32 %tmp665, i32 2
- %tmp669 = insertelement <4 x i32> %tmp668, i32 undef, i32 3
- %tmp669.bc = bitcast <4 x i32> %tmp669 to <4 x float>
- %tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp669.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %tmp646, float %tmp648, float 0.0, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i1 0, i32 0, i32 0)
%tmp671 = extractelement <4 x float> %tmp670, i32 0
%tmp672 = extractelement <4 x float> %tmp670, i32 1
%tmp673 = fsub float -0.000000e+00, %tmp662
@@ -1865,10 +1679,10 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/skip-if-dead.ll Thu Jun 21 06:37:19 2018
@@ -355,7 +355,7 @@ bb7:
; CHECK: [[END]]:
; CHECK: s_endpgm
-define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 {
+define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
bb:
%tmp = fcmp ult float %arg1, 0.000000e+00
br i1 %tmp, label %bb3, label %bb4
@@ -365,7 +365,7 @@ bb3:
br label %bb4
bb4: ; preds = %bb3, %bb
- %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp6 = extractelement <4 x float> %tmp5, i32 0
%tmp7 = fcmp une float %tmp6, 0.000000e+00
br i1 %tmp7, label %bb8, label %bb9
@@ -378,7 +378,7 @@ bb9:
ret void
}
-declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare void @llvm.AMDGPU.kill(float) #0
attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/split-smrd.ll Thu Jun 21 06:37:19 2018
@@ -21,7 +21,7 @@ bb3:
%tmp6 = sext i32 %tmp5 to i64
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i64 0, i64 %tmp6
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
- %tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float)>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp9 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float), <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp10 = extractelement <4 x float> %tmp9, i32 0
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@@ -30,7 +30,7 @@ bb3:
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll Thu Jun 21 06:37:19 2018
@@ -65,7 +65,7 @@ bb7:
br label %bb4
bb9: ; preds = %bb2
- %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp11 = extractelement <4 x float> %tmp10, i32 1
%tmp12 = extractelement <4 x float> %tmp10, i32 3
br label %bb14
@@ -97,7 +97,7 @@ bb27:
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll Thu Jun 21 06:37:19 2018
@@ -34,9 +34,8 @@ bb:
%tmp = load volatile i32, i32 addrspace(1)* undef, align 4
%tmp1 = load volatile i32, i32 addrspace(1)* undef, align 4
%tmp2 = insertelement <4 x i32> undef, i32 %tmp1, i32 0
- %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
- %tmp3.cast = bitcast <4 x i32> %tmp3 to <4 x float>
- %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp3.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp3 = bitcast i32 %tmp1 to float
+ %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp5 = extractelement <4 x float> %tmp4, i32 0
%tmp6 = fmul float %tmp5, undef
%tmp7 = fadd float %tmp6, %tmp6
@@ -84,7 +83,7 @@ define amdgpu_kernel void @partially_und
ret void
}
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/unigine-liveness-crash.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/unigine-liveness-crash.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/unigine-liveness-crash.ll Thu Jun 21 06:37:19 2018
@@ -17,25 +17,20 @@ main_body:
%j.f.i = bitcast i32 %j.i to float
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2
- %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp24 = extractelement <4 x float> %tmp23, i32 3
%tmp25 = fmul float %tmp24, %tmp24
%tmp26 = fmul float %p2.i, %p2.i
%tmp27 = fadd float %tmp26, %tmp26
- %tmp28 = bitcast float %tmp27 to i32
- %tmp29 = insertelement <4 x i32> undef, i32 %tmp28, i32 0
- %tmp30 = insertelement <4 x i32> %tmp29, i32 0, i32 1
- %tmp31 = insertelement <4 x i32> %tmp30, i32 undef, i32 2
- %tmp31.cast = bitcast <4 x i32> %tmp31 to <4 x float>
- %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp31.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float 0.0, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp33 = extractelement <4 x float> %tmp32, i32 0
%tmp34 = fadd float %tmp33, %tmp33
%tmp35 = fadd float %tmp34, %tmp34
%tmp36 = fadd float %tmp35, %tmp35
%tmp37 = fadd float %tmp36, %tmp36
%tmp38 = fadd float %tmp37, %tmp37
- %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp40 = extractelement <4 x float> %tmp39, i32 0
%tmp41 = extractelement <4 x float> %tmp39, i32 1
%tmp42 = extractelement <4 x float> %tmp39, i32 2
@@ -53,17 +48,12 @@ main_body:
%tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1
%tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2
%tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float>
- %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp55.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float %tmp48, float %tmp49, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp57 = extractelement <4 x float> %tmp56, i32 0
%tmp58 = fadd float %tmp38, %tmp57
%tmp59 = fadd float %tmp46, %tmp46
%tmp60 = fadd float %tmp47, %tmp47
- %tmp61 = bitcast float %tmp59 to i32
- %tmp62 = bitcast float %tmp60 to i32
- %tmp63 = insertelement <4 x i32> undef, i32 %tmp61, i32 1
- %tmp64 = insertelement <4 x i32> %tmp63, i32 %tmp62, i32 2
- %tmp64.cast = bitcast <4 x i32> %tmp64 to <4 x float>
- %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp64.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float undef, float %tmp59, float %tmp60, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp66 = extractelement <4 x float> %tmp65, i32 0
%tmp67 = fadd float %tmp58, %tmp66
%tmp68 = fmul float %tmp67, 1.250000e-01
@@ -101,10 +91,7 @@ IF29:
br label %ENDIF25
ENDIF28: ; preds = %LOOP
- %tmp85 = insertelement <4 x i32> %tmp72, i32 undef, i32 1
- %tmp86 = insertelement <4 x i32> %tmp85, i32 undef, i32 2
- %tmp86.cast = bitcast <4 x i32> %tmp86 to <4 x float>
- %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp86.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp88 = extractelement <4 x float> %tmp87, i32 0
%tmp89 = fadd float %tmp88, %tmp88
br label %LOOP
@@ -114,9 +101,8 @@ declare float @llvm.minnum.f32(float, fl
declare float @llvm.maxnum.f32(float, float) #1
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" }
attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=335229&r1=335228&r2=335229&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Thu Jun 21 06:37:19 2018
@@ -5,10 +5,10 @@
;
;CHECK-LABEL: {{^}}test1:
;CHECK-NOT: s_wqm
-define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
+define amdgpu_ps <4 x float> @test1(<8 x i32> inreg %rsrc, i32 %c) {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
- call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %tex, <4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
+ %tex = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
+ call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %tex, i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x float> %tex
}
@@ -30,11 +30,9 @@ main_body:
%inst24 = extractelement <2 x float> %pos, i32 1
%inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
%inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
- %inst27 = insertelement <2 x float> undef, float %inst26, i32 0
%inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
%inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
- %inst30 = insertelement <2 x float> %inst27, float %inst29, i32 1
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %inst30, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
ret <4 x float> %tex
}
@@ -49,9 +47,9 @@ main_body:
;CHECK: store
;CHECK-NOT: exec
;CHECK: .size test3
-define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <4 x float> %c) {
+define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float %c) {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex.1 = bitcast <4 x float> %tex to <4 x i32>
%tex.2 = extractelement <4 x i32> %tex.1, i32 0
@@ -77,11 +75,9 @@ main_body:
%inst24 = extractelement <2 x float> %pos, i32 1
%inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
%inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0)
- %inst27 = insertelement <2 x float> undef, float %inst26, i32 0
%inst28 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 1, i32 0, i32 %m0)
%inst29 = tail call float @llvm.amdgcn.interp.p2(float %inst28, float %inst24, i32 1, i32 0, i32 %m0)
- %inst30 = insertelement <2 x float> %inst27, float %inst29, i32 1
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %inst30, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %inst26, float %inst29, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%tex.0 = extractelement <4 x float> %tex, i32 0
%tex.1 = extractelement <4 x float> %tex, i32 1
%tex.2 = extractelement <4 x float> %tex, i32 2
@@ -108,8 +104,9 @@ main_body:
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i1 0, i1 0)
%c.1.bc = bitcast i32 %c.1 to float
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
ret <4 x float> %dtex
}
@@ -361,8 +358,9 @@ main_body:
IF:
%c.bc = bitcast i32 %c to float
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%data.if = extractelement <4 x float> %dtex, i32 0
br label %END
@@ -403,8 +401,9 @@ main_body:
IF:
%c.bc = bitcast i32 %c to float
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%data.if = extractelement <4 x float> %dtex, i32 0
br label %END
@@ -460,7 +459,7 @@ ELSE:
END:
%coord.END = phi i32 [ %coord.IF, %IF ], [ %coord.ELSE, %ELSE ]
%coord.END.bc = bitcast i32 %coord.END to float
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
ret <4 x float> %tex
}
@@ -477,8 +476,9 @@ END:
;CHECK-DAG: store
define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %coord) {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%dtex.1 = extractelement <4 x float> %dtex, i32 0
call void @llvm.amdgcn.buffer.store.f32(float %dtex.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
@@ -523,8 +523,9 @@ IF:
br label %END
END:
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
ret <4 x float> %dtex
}
@@ -545,7 +546,7 @@ END:
;CHECK: image_sample
define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, float %coord, float %coord2, float %z) {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%idx.0 = extractelement <2 x i32> %idx, i32 0
%data.0 = extractelement <2 x float> %data, i32 0
call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0)
@@ -555,8 +556,9 @@ main_body:
%idx.1 = extractelement <2 x i32> %idx, i32 1
%data.1 = extractelement <2 x float> %data, i32 1
call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
- %tex2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+ %tex2.0 = extractelement <4 x float> %tex2, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex2.0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%out = fadd <4 x float> %tex, %dtex
ret <4 x float> %out
@@ -576,8 +578,9 @@ main_body:
; CHECK: v_cmpx_
define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) {
main_body:
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
@@ -618,7 +621,7 @@ main_body:
; CHECK: ; return
define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
entry:
- call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
+ call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
br label %loop
loop:
@@ -628,7 +631,8 @@ loop:
br i1 %cc, label %break, label %body
body:
- %c.next = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c.iv, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %c.iv0 = extractelement <4 x float> %c.iv, i32 0
+ %c.next = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.iv0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
%ctr.next = fadd float %ctr.iv, 2.0
br label %loop
@@ -669,7 +673,7 @@ entry:
%c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx
%c = load i32, i32 addrspace(5)* %c.gep, align 4
%c.bc = bitcast i32 %c to float
- %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
ret void
@@ -687,8 +691,9 @@ entry:
; CHECK: s_and_b64 exec, exec, [[LIVE]]
; CHECK-NOT: exec
define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
ret <4 x float> %dtex
}
@@ -700,8 +705,9 @@ define amdgpu_ps <4 x float> @test_nonvo
; CHECK-NOT: exec
define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
entry:
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
%cc = icmp sgt i32 %c, 0
br i1 %cc, label %if, label %else
@@ -733,11 +739,11 @@ main_body:
br i1 %cc, label %if, label %else
if:
- %r.if = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float 0.0, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %r.if = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
br label %end
else:
- %r.else = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0.0, float bitcast (i32 1 to float)>, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %r.else = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.0, float bitcast (i32 1 to float), <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
br label %end
end:
@@ -757,8 +763,9 @@ end:
define amdgpu_ps float @test_wwm_within_wqm(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data) {
main_body:
%c.bc = bitcast i32 %c to float
- %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
- %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
%cmp = icmp eq i32 %z, 0
br i1 %cmp, label %IF, label %ENDIF
@@ -777,14 +784,13 @@ ENDIF:
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
+declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
-declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #3
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
declare void @llvm.AMDGPU.kill(float) #1
declare float @llvm.amdgcn.wqm.f32(float) #3
declare i32 @llvm.amdgcn.wqm.i32(i32) #3
More information about the llvm-commits
mailing list