[llvm] r258787 - AMDGPU: Remove old sample intrinsics
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 25 20:38:09 PST 2016
Author: arsenm
Date: Mon Jan 25 22:38:08 2016
New Revision: 258787
URL: http://llvm.org/viewvc/llvm-project?rev=258787&view=rev
Log:
AMDGPU: Remove old sample intrinsics
I did my best to try to update all the uses in tests that
just happened to use the old ones to the newer intrinsics.
I'm not sure I got all of the immediate operand conversions
correct, since the value seems to have been ignored by the
old pattern but I don't think it really matters.
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll
- copied, changed from r258786, llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll
Removed:
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.imageload.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.resinfo.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sampled.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td
llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll
llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll
llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jan 25 22:38:08 2016
@@ -1414,14 +1414,6 @@ SDValue SITargetLowering::LowerINTRINSIC
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
Op->getVTList(), Ops, VT, MMO);
}
- case AMDGPUIntrinsic::SI_sample:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
- case AMDGPUIntrinsic::SI_sampleb:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
- case AMDGPUIntrinsic::SI_sampled:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
- case AMDGPUIntrinsic::SI_samplel:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
case AMDGPUIntrinsic::SI_vs_load_input:
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
Op.getOperand(1),
@@ -1603,15 +1595,6 @@ SDValue SITargetLowering::LowerLOAD(SDVa
return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
}
-SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
- const SDValue &Op,
- SelectionDAG &DAG) const {
- return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3),
- Op.getOperand(4));
-}
-
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() != MVT::i64)
return SDValue();
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Mon Jan 25 22:38:08 2016
@@ -23,8 +23,6 @@ namespace llvm {
class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
SDValue Chain, unsigned Offset, bool Signed) const;
- SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
- SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Mon Jan 25 22:38:08 2016
@@ -2442,38 +2442,6 @@ class ImageLoadArrayMSAAPattern<Intrinsi
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
>;
-multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> {
- def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>;
- def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>;
-}
-
-multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> {
- def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>;
- def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>;
-}
-
-defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>;
-defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>;
-
-defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>;
-defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>;
-
-/* Image resource information */
-def : Pat <
- (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
- (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
-def : Pat <
- (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
- (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
-def : Pat <
- (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA),
- (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
->;
-
/********** ============================================ **********/
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
Modified: llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td Mon Jan 25 22:38:08 2016
@@ -172,16 +172,6 @@ let TargetPrefix = "SI", isTarget = 1 in
def int_SI_image_load_mip : Image;
def int_SI_getresinfo : Image;
- // Deprecated image and sample intrinsics.
- class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_SI_sample : Sample;
- def int_SI_sampleb : Sample;
- def int_SI_sampled : Sample;
- def int_SI_samplel : Sample;
- def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-
/* Interpolation Intrinsics */
def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
Modified: llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll Mon Jan 25 22:38:08 2016
@@ -4,29 +4,25 @@
; GCN-LABEL: {{^}}main:
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
-
define void @main() #0 {
-main_body:
- %0 = fptosi float undef to i32
- %1 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> undef, <32 x i8> undef, i32 2)
- %2 = extractelement <4 x i32> %1, i32 0
- %3 = and i32 %0, 7
- %4 = shl i32 1, %3
- %5 = and i32 %2, %4
- %6 = icmp eq i32 %5, 0
- %.10 = select i1 %6, float 0.000000e+00, float undef
- %7 = call i32 @llvm.SI.packf16(float undef, float %.10)
- %8 = bitcast i32 %7 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %8, float undef, float %8)
+bb:
+ %tmp = fptosi float undef to i32
+ %tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %tmp2.f = extractelement <4 x float> %tmp1, i32 0
+ %tmp2 = bitcast float %tmp2.f to i32
+ %tmp3 = and i32 %tmp, 7
+ %tmp4 = shl i32 1, %tmp3
+ %tmp5 = and i32 %tmp2, %tmp4
+ %tmp6 = icmp eq i32 %tmp5, 0
+ %tmp7 = select i1 %tmp6, float 0.000000e+00, float undef
+ %tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
+ %tmp9 = bitcast i32 %tmp8 to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
ret void
}
-; Function Attrs: nounwind readnone
-declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
-
-; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
declare i32 @llvm.SI.packf16(float, float) #1
-
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
Copied: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll (from r258786, llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll?p2=llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll&p1=llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll&r1=258786&r2=258787&rev=258787&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.image.sample-masked.ll Mon Jan 25 22:38:08 2016
@@ -6,7 +6,7 @@
define void @v1(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+ %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%2 = extractelement <4 x float> %1, i32 0
%3 = extractelement <4 x float> %1, i32 2
%4 = extractelement <4 x float> %1, i32 3
@@ -19,7 +19,7 @@ entry:
define void @v2(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+ %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%2 = extractelement <4 x float> %1, i32 0
%3 = extractelement <4 x float> %1, i32 1
%4 = extractelement <4 x float> %1, i32 3
@@ -32,7 +32,7 @@ entry:
define void @v3(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+ %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%2 = extractelement <4 x float> %1, i32 1
%3 = extractelement <4 x float> %1, i32 2
%4 = extractelement <4 x float> %1, i32 3
@@ -45,7 +45,7 @@ entry:
define void @v4(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+ %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%2 = extractelement <4 x float> %1, i32 0
%3 = extractelement <4 x float> %1, i32 1
%4 = extractelement <4 x float> %1, i32 2
@@ -58,7 +58,7 @@ entry:
define void @v5(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+ %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%2 = extractelement <4 x float> %1, i32 1
%3 = extractelement <4 x float> %1, i32 3
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
@@ -70,7 +70,7 @@ entry:
define void @v6(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+ %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%2 = extractelement <4 x float> %1, i32 1
%3 = extractelement <4 x float> %1, i32 2
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
@@ -82,14 +82,14 @@ entry:
define void @v7(i32 %a1) #0 {
entry:
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+ %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%2 = extractelement <4 x float> %1, i32 0
%3 = extractelement <4 x float> %1, i32 3
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
ret void
}
-declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
+declare <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) readnone
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.imageload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.imageload.ll?rev=258786&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.imageload.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.imageload.ll (removed)
@@ -1,132 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 2, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 1, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 4, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, -1
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
- %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
- %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
- %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
- %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
- %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
- %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
- %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
- %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
- %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
- %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
- %res1 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v1,
- <32 x i8> undef, i32 1)
- %res2 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v2,
- <32 x i8> undef, i32 2)
- %res3 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v3,
- <32 x i8> undef, i32 3)
- %res4 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v4,
- <32 x i8> undef, i32 4)
- %res5 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v5,
- <32 x i8> undef, i32 5)
- %res6 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v6,
- <32 x i8> undef, i32 6)
- %res10 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v10,
- <32 x i8> undef, i32 10)
- %res11 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v11,
- <32 x i8> undef, i32 11)
- %res15 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v15,
- <32 x i8> undef, i32 15)
- %res16 = call <4 x i32> @llvm.SI.imageload.(<4 x i32> %v16,
- <32 x i8> undef, i32 16)
- %e1 = extractelement <4 x i32> %res1, i32 0
- %e2 = extractelement <4 x i32> %res2, i32 1
- %e3 = extractelement <4 x i32> %res3, i32 2
- %e4 = extractelement <4 x i32> %res4, i32 3
- %t0 = extractelement <4 x i32> %res5, i32 0
- %t1 = extractelement <4 x i32> %res5, i32 1
- %e5 = add i32 %t0, %t1
- %t2 = extractelement <4 x i32> %res6, i32 0
- %t3 = extractelement <4 x i32> %res6, i32 2
- %e6 = add i32 %t2, %t3
- %t10 = extractelement <4 x i32> %res10, i32 2
- %t11 = extractelement <4 x i32> %res10, i32 3
- %e10 = add i32 %t10, %t11
- %t12 = extractelement <4 x i32> %res11, i32 0
- %t13 = extractelement <4 x i32> %res11, i32 1
- %t14 = extractelement <4 x i32> %res11, i32 2
- %t15 = add i32 %t12, %t13
- %e11 = add i32 %t14, %t15
- %t28 = extractelement <4 x i32> %res15, i32 0
- %t29 = extractelement <4 x i32> %res15, i32 1
- %t30 = extractelement <4 x i32> %res15, i32 2
- %t31 = extractelement <4 x i32> %res15, i32 3
- %t32 = add i32 %t28, %t29
- %t33 = add i32 %t30, %t31
- %e15 = add i32 %t32, %t33
- %e16 = extractelement <4 x i32> %res16, i32 3
- %s1 = add i32 %e1, %e2
- %s2 = add i32 %s1, %e3
- %s3 = add i32 %s2, %e4
- %s4 = add i32 %s3, %e5
- %s5 = add i32 %s4, %e6
- %s9 = add i32 %s5, %e10
- %s10 = add i32 %s9, %e11
- %s14 = add i32 %s10, %e15
- %s15 = add i32 %s14, %e16
- %s16 = bitcast i32 %s15 to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
- ret void
-}
-
-; Test that ccordinates are stored in vgprs and not sgprs
-; CHECK: vgpr_coords
-; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
-define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
-main_body:
- %20 = getelementptr float addrspace(2)*, float addrspace(2)* addrspace(2)* %0, i32 0
- %21 = load float addrspace(2)*, float addrspace(2)* addrspace(2)* %20, !tbaa !2
- %22 = getelementptr float, float addrspace(2)* %21, i32 0
- %23 = load float, float addrspace(2)* %22, !tbaa !2, !invariant.load !1
- %24 = getelementptr float, float addrspace(2)* %21, i32 1
- %25 = load float, float addrspace(2)* %24, !tbaa !2, !invariant.load !1
- %26 = getelementptr float, float addrspace(2)* %21, i32 4
- %27 = load float, float addrspace(2)* %26, !tbaa !2, !invariant.load !1
- %28 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
- %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, !tbaa !2
- %30 = bitcast float %27 to i32
- %31 = bitcast float %23 to i32
- %32 = bitcast float %25 to i32
- %33 = insertelement <4 x i32> undef, i32 %31, i32 0
- %34 = insertelement <4 x i32> %33, i32 %32, i32 1
- %35 = insertelement <4 x i32> %34, i32 %30, i32 2
- %36 = insertelement <4 x i32> %35, i32 undef, i32 3
- %37 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %36, <32 x i8> %29, i32 2)
- %38 = extractelement <4 x i32> %37, i32 0
- %39 = extractelement <4 x i32> %37, i32 1
- %40 = extractelement <4 x i32> %37, i32 2
- %41 = extractelement <4 x i32> %37, i32 3
- %42 = bitcast i32 %38 to float
- %43 = bitcast i32 %39 to float
- %44 = bitcast i32 %40 to float
- %45 = bitcast i32 %41 to float
- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %42, float %43, float %44, float %45)
- ret void
-}
-
-declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <32 x i8>, i32) readnone
-; Function Attrs: nounwind readnone
-declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
-attributes #1 = { nounwind readnone }
-
-!0 = !{!"const", null}
-!1 = !{}
-!2 = !{!0, !0, i64 0, i32 1}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.resinfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.resinfo.ll?rev=258786&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.resinfo.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.resinfo.ll (removed)
@@ -1,111 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 2, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 1, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 4, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
-; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, -1
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
- i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
- %res1 = call <4 x i32> @llvm.SI.resinfo(i32 %a1, <32 x i8> undef, i32 1)
- %res2 = call <4 x i32> @llvm.SI.resinfo(i32 %a2, <32 x i8> undef, i32 2)
- %res3 = call <4 x i32> @llvm.SI.resinfo(i32 %a3, <32 x i8> undef, i32 3)
- %res4 = call <4 x i32> @llvm.SI.resinfo(i32 %a4, <32 x i8> undef, i32 4)
- %res5 = call <4 x i32> @llvm.SI.resinfo(i32 %a5, <32 x i8> undef, i32 5)
- %res6 = call <4 x i32> @llvm.SI.resinfo(i32 %a6, <32 x i8> undef, i32 6)
- %res7 = call <4 x i32> @llvm.SI.resinfo(i32 %a7, <32 x i8> undef, i32 7)
- %res8 = call <4 x i32> @llvm.SI.resinfo(i32 %a8, <32 x i8> undef, i32 8)
- %res9 = call <4 x i32> @llvm.SI.resinfo(i32 %a9, <32 x i8> undef, i32 9)
- %res10 = call <4 x i32> @llvm.SI.resinfo(i32 %a10, <32 x i8> undef, i32 10)
- %res11 = call <4 x i32> @llvm.SI.resinfo(i32 %a11, <32 x i8> undef, i32 11)
- %res12 = call <4 x i32> @llvm.SI.resinfo(i32 %a12, <32 x i8> undef, i32 12)
- %res13 = call <4 x i32> @llvm.SI.resinfo(i32 %a13, <32 x i8> undef, i32 13)
- %res14 = call <4 x i32> @llvm.SI.resinfo(i32 %a14, <32 x i8> undef, i32 14)
- %res15 = call <4 x i32> @llvm.SI.resinfo(i32 %a15, <32 x i8> undef, i32 15)
- %res16 = call <4 x i32> @llvm.SI.resinfo(i32 %a16, <32 x i8> undef, i32 16)
- %e1 = extractelement <4 x i32> %res1, i32 0
- %e2 = extractelement <4 x i32> %res2, i32 1
- %e3 = extractelement <4 x i32> %res3, i32 2
- %e4 = extractelement <4 x i32> %res4, i32 3
- %t0 = extractelement <4 x i32> %res5, i32 0
- %t1 = extractelement <4 x i32> %res5, i32 1
- %e5 = add i32 %t0, %t1
- %t2 = extractelement <4 x i32> %res6, i32 0
- %t3 = extractelement <4 x i32> %res6, i32 2
- %e6 = add i32 %t2, %t3
- %t4 = extractelement <4 x i32> %res7, i32 0
- %t5 = extractelement <4 x i32> %res7, i32 3
- %e7 = add i32 %t4, %t5
- %t6 = extractelement <4 x i32> %res8, i32 1
- %t7 = extractelement <4 x i32> %res8, i32 2
- %e8 = add i32 %t6, %t7
- %t8 = extractelement <4 x i32> %res9, i32 1
- %t9 = extractelement <4 x i32> %res9, i32 3
- %e9 = add i32 %t8, %t9
- %t10 = extractelement <4 x i32> %res10, i32 2
- %t11 = extractelement <4 x i32> %res10, i32 3
- %e10 = add i32 %t10, %t11
- %t12 = extractelement <4 x i32> %res11, i32 0
- %t13 = extractelement <4 x i32> %res11, i32 1
- %t14 = extractelement <4 x i32> %res11, i32 2
- %t15 = add i32 %t12, %t13
- %e11 = add i32 %t14, %t15
- %t16 = extractelement <4 x i32> %res12, i32 0
- %t17 = extractelement <4 x i32> %res12, i32 1
- %t18 = extractelement <4 x i32> %res12, i32 3
- %t19 = add i32 %t16, %t17
- %e12 = add i32 %t18, %t19
- %t20 = extractelement <4 x i32> %res13, i32 0
- %t21 = extractelement <4 x i32> %res13, i32 2
- %t22 = extractelement <4 x i32> %res13, i32 3
- %t23 = add i32 %t20, %t21
- %e13 = add i32 %t22, %t23
- %t24 = extractelement <4 x i32> %res14, i32 1
- %t25 = extractelement <4 x i32> %res14, i32 2
- %t26 = extractelement <4 x i32> %res14, i32 3
- %t27 = add i32 %t24, %t25
- %e14 = add i32 %t26, %t27
- %t28 = extractelement <4 x i32> %res15, i32 0
- %t29 = extractelement <4 x i32> %res15, i32 1
- %t30 = extractelement <4 x i32> %res15, i32 2
- %t31 = extractelement <4 x i32> %res15, i32 3
- %t32 = add i32 %t28, %t29
- %t33 = add i32 %t30, %t31
- %e15 = add i32 %t32, %t33
- %e16 = extractelement <4 x i32> %res16, i32 3
- %s1 = add i32 %e1, %e2
- %s2 = add i32 %s1, %e3
- %s3 = add i32 %s2, %e4
- %s4 = add i32 %s3, %e5
- %s5 = add i32 %s4, %e6
- %s6 = add i32 %s5, %e7
- %s7 = add i32 %s6, %e8
- %s8 = add i32 %s7, %e9
- %s9 = add i32 %s8, %e10
- %s10 = add i32 %s9, %e11
- %s11 = add i32 %s10, %e12
- %s12 = add i32 %s11, %e13
- %s13 = add i32 %s12, %e14
- %s14 = add i32 %s13, %e15
- %s15 = add i32 %s14, %e16
- %s16 = bitcast i32 %s15 to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s16, float %s16, float %s16, float %s16)
- ret void
-}
-
-declare <4 x i32> @llvm.SI.resinfo(i32, <32 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll?rev=258786&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample-masked.ll (removed)
@@ -1,96 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s
-
-; CHECK-LABEL: {{^}}v1:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
-define void @v1(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 2
- %4 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v2:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
-define void @v2(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 1
- %4 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v3:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
-define void @v3(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 1
- %3 = extractelement <4 x float> %1, i32 2
- %4 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v4:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
-define void @v4(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 1
- %4 = extractelement <4 x float> %1, i32 2
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %4)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v5:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
-define void @v5(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 1
- %3 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v6:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
-define void @v6(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 1
- %3 = extractelement <4 x float> %1, i32 2
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
- ret void
-}
-
-; CHECK-LABEL: {{^}}v7:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
-define void @v7(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %3, float %3)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample.ll?rev=258786&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sample.ll (removed)
@@ -1,160 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: image_sample {{v[0-9]+}}, 2
-;CHECK-DAG: image_sample {{v[0-9]+}}, 1
-;CHECK-DAG: image_sample {{v[0-9]+}}, 4
-;CHECK-DAG: image_sample {{v[0-9]+}}, 8
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: image_sample {{v[0-9]+}}, 8
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
- %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
- %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
- %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
- %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
- %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
- %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
- %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
- %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
- %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
- %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
- %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
- %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
- %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
- %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
- %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
- %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
- %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
- <32 x i8> undef, <16 x i8> undef, i32 1)
- %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
- <32 x i8> undef, <16 x i8> undef, i32 2)
- %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
- <32 x i8> undef, <16 x i8> undef, i32 3)
- %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
- <32 x i8> undef, <16 x i8> undef, i32 4)
- %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
- <32 x i8> undef, <16 x i8> undef, i32 5)
- %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
- <32 x i8> undef, <16 x i8> undef, i32 6)
- %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
- <32 x i8> undef, <16 x i8> undef, i32 7)
- %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
- <32 x i8> undef, <16 x i8> undef, i32 8)
- %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
- <32 x i8> undef, <16 x i8> undef, i32 9)
- %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
- <32 x i8> undef, <16 x i8> undef, i32 10)
- %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
- <32 x i8> undef, <16 x i8> undef, i32 11)
- %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
- <32 x i8> undef, <16 x i8> undef, i32 12)
- %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
- <32 x i8> undef, <16 x i8> undef, i32 13)
- %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
- <32 x i8> undef, <16 x i8> undef, i32 14)
- %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
- <32 x i8> undef, <16 x i8> undef, i32 15)
- %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
- <32 x i8> undef, <16 x i8> undef, i32 16)
- %e1 = extractelement <4 x float> %res1, i32 0
- %e2 = extractelement <4 x float> %res2, i32 1
- %e3 = extractelement <4 x float> %res3, i32 2
- %e4 = extractelement <4 x float> %res4, i32 3
- %t0 = extractelement <4 x float> %res5, i32 0
- %t1 = extractelement <4 x float> %res5, i32 1
- %e5 = fadd float %t0, %t1
- %t2 = extractelement <4 x float> %res6, i32 0
- %t3 = extractelement <4 x float> %res6, i32 2
- %e6 = fadd float %t2, %t3
- %t4 = extractelement <4 x float> %res7, i32 0
- %t5 = extractelement <4 x float> %res7, i32 3
- %e7 = fadd float %t4, %t5
- %t6 = extractelement <4 x float> %res8, i32 1
- %t7 = extractelement <4 x float> %res8, i32 2
- %e8 = fadd float %t6, %t7
- %t8 = extractelement <4 x float> %res9, i32 1
- %t9 = extractelement <4 x float> %res9, i32 3
- %e9 = fadd float %t8, %t9
- %t10 = extractelement <4 x float> %res10, i32 2
- %t11 = extractelement <4 x float> %res10, i32 3
- %e10 = fadd float %t10, %t11
- %t12 = extractelement <4 x float> %res11, i32 0
- %t13 = extractelement <4 x float> %res11, i32 1
- %t14 = extractelement <4 x float> %res11, i32 2
- %t15 = fadd float %t12, %t13
- %e11 = fadd float %t14, %t15
- %t16 = extractelement <4 x float> %res12, i32 0
- %t17 = extractelement <4 x float> %res12, i32 1
- %t18 = extractelement <4 x float> %res12, i32 3
- %t19 = fadd float %t16, %t17
- %e12 = fadd float %t18, %t19
- %t20 = extractelement <4 x float> %res13, i32 0
- %t21 = extractelement <4 x float> %res13, i32 2
- %t22 = extractelement <4 x float> %res13, i32 3
- %t23 = fadd float %t20, %t21
- %e13 = fadd float %t22, %t23
- %t24 = extractelement <4 x float> %res14, i32 1
- %t25 = extractelement <4 x float> %res14, i32 2
- %t26 = extractelement <4 x float> %res14, i32 3
- %t27 = fadd float %t24, %t25
- %e14 = fadd float %t26, %t27
- %t28 = extractelement <4 x float> %res15, i32 0
- %t29 = extractelement <4 x float> %res15, i32 1
- %t30 = extractelement <4 x float> %res15, i32 2
- %t31 = extractelement <4 x float> %res15, i32 3
- %t32 = fadd float %t28, %t29
- %t33 = fadd float %t30, %t31
- %e15 = fadd float %t32, %t33
- %e16 = extractelement <4 x float> %res16, i32 3
- %s1 = fadd float %e1, %e2
- %s2 = fadd float %s1, %e3
- %s3 = fadd float %s2, %e4
- %s4 = fadd float %s3, %e5
- %s5 = fadd float %s4, %e6
- %s6 = fadd float %s5, %e7
- %s7 = fadd float %s6, %e8
- %s8 = fadd float %s7, %e9
- %s9 = fadd float %s8, %e10
- %s10 = fadd float %s9, %e11
- %s11 = fadd float %s10, %e12
- %s12 = fadd float %s11, %e13
- %s13 = fadd float %s12, %e14
- %s14 = fadd float %s13, %e15
- %s15 = fadd float %s14, %e16
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
- ret void
-}
-
-; CHECK: {{^}}v1:
-; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
-define void @v1(i32 %a1) #0 {
-entry:
- %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
- %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
- %2 = extractelement <4 x float> %1, i32 0
- %3 = extractelement <4 x float> %1, i32 1
- %4 = extractelement <4 x float> %1, i32 2
- %5 = extractelement <4 x float> %1, i32 3
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
- ret void
-}
-
-
-declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sampled.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sampled.ll?rev=258786&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sampled.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.sampled.ll (removed)
@@ -1,143 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 2
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 1
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 4
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
-
-define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
- %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
- %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
- %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
- %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
- %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
- %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
- %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
- %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
- %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
- %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
- %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
- %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
- %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
- %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
- %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
- %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
- %res1 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v1,
- <32 x i8> undef, <16 x i8> undef, i32 1)
- %res2 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v2,
- <32 x i8> undef, <16 x i8> undef, i32 2)
- %res3 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v3,
- <32 x i8> undef, <16 x i8> undef, i32 3)
- %res4 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v4,
- <32 x i8> undef, <16 x i8> undef, i32 4)
- %res5 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v5,
- <32 x i8> undef, <16 x i8> undef, i32 5)
- %res6 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v6,
- <32 x i8> undef, <16 x i8> undef, i32 6)
- %res7 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v7,
- <32 x i8> undef, <16 x i8> undef, i32 7)
- %res8 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v8,
- <32 x i8> undef, <16 x i8> undef, i32 8)
- %res9 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v9,
- <32 x i8> undef, <16 x i8> undef, i32 9)
- %res10 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v10,
- <32 x i8> undef, <16 x i8> undef, i32 10)
- %res11 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v11,
- <32 x i8> undef, <16 x i8> undef, i32 11)
- %res12 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v12,
- <32 x i8> undef, <16 x i8> undef, i32 12)
- %res13 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v13,
- <32 x i8> undef, <16 x i8> undef, i32 13)
- %res14 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v14,
- <32 x i8> undef, <16 x i8> undef, i32 14)
- %res15 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v15,
- <32 x i8> undef, <16 x i8> undef, i32 15)
- %res16 = call <4 x float> @llvm.SI.sampled.(<4 x i32> %v16,
- <32 x i8> undef, <16 x i8> undef, i32 16)
- %e1 = extractelement <4 x float> %res1, i32 0
- %e2 = extractelement <4 x float> %res2, i32 1
- %e3 = extractelement <4 x float> %res3, i32 2
- %e4 = extractelement <4 x float> %res4, i32 3
- %t0 = extractelement <4 x float> %res5, i32 0
- %t1 = extractelement <4 x float> %res5, i32 1
- %e5 = fadd float %t0, %t1
- %t2 = extractelement <4 x float> %res6, i32 0
- %t3 = extractelement <4 x float> %res6, i32 2
- %e6 = fadd float %t2, %t3
- %t4 = extractelement <4 x float> %res7, i32 0
- %t5 = extractelement <4 x float> %res7, i32 3
- %e7 = fadd float %t4, %t5
- %t6 = extractelement <4 x float> %res8, i32 1
- %t7 = extractelement <4 x float> %res8, i32 2
- %e8 = fadd float %t6, %t7
- %t8 = extractelement <4 x float> %res9, i32 1
- %t9 = extractelement <4 x float> %res9, i32 3
- %e9 = fadd float %t8, %t9
- %t10 = extractelement <4 x float> %res10, i32 2
- %t11 = extractelement <4 x float> %res10, i32 3
- %e10 = fadd float %t10, %t11
- %t12 = extractelement <4 x float> %res11, i32 0
- %t13 = extractelement <4 x float> %res11, i32 1
- %t14 = extractelement <4 x float> %res11, i32 2
- %t15 = fadd float %t12, %t13
- %e11 = fadd float %t14, %t15
- %t16 = extractelement <4 x float> %res12, i32 0
- %t17 = extractelement <4 x float> %res12, i32 1
- %t18 = extractelement <4 x float> %res12, i32 3
- %t19 = fadd float %t16, %t17
- %e12 = fadd float %t18, %t19
- %t20 = extractelement <4 x float> %res13, i32 0
- %t21 = extractelement <4 x float> %res13, i32 2
- %t22 = extractelement <4 x float> %res13, i32 3
- %t23 = fadd float %t20, %t21
- %e13 = fadd float %t22, %t23
- %t24 = extractelement <4 x float> %res14, i32 1
- %t25 = extractelement <4 x float> %res14, i32 2
- %t26 = extractelement <4 x float> %res14, i32 3
- %t27 = fadd float %t24, %t25
- %e14 = fadd float %t26, %t27
- %t28 = extractelement <4 x float> %res15, i32 0
- %t29 = extractelement <4 x float> %res15, i32 1
- %t30 = extractelement <4 x float> %res15, i32 2
- %t31 = extractelement <4 x float> %res15, i32 3
- %t32 = fadd float %t28, %t29
- %t33 = fadd float %t30, %t31
- %e15 = fadd float %t32, %t33
- %e16 = extractelement <4 x float> %res16, i32 3
- %s1 = fadd float %e1, %e2
- %s2 = fadd float %s1, %e3
- %s3 = fadd float %s2, %e4
- %s4 = fadd float %s3, %e5
- %s5 = fadd float %s4, %e6
- %s6 = fadd float %s5, %e7
- %s7 = fadd float %s6, %e8
- %s8 = fadd float %s7, %e9
- %s9 = fadd float %s8, %e10
- %s10 = fadd float %s9, %e11
- %s11 = fadd float %s10, %e12
- %s12 = fadd float %s11, %e13
- %s13 = fadd float %s12, %e14
- %s14 = fadd float %s13, %e15
- %s15 = fadd float %s14, %e16
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
- ret void
-}
-
-declare <4 x float> @llvm.SI.sampled.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="0" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-copy.ll Mon Jan 25 22:38:08 2016
@@ -4,10 +4,14 @@
; This test checks that no VGPR to SGPR copies are created by the register
; allocator.
+
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+
; CHECK-LABEL: {{^}}phi1:
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-define void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -31,7 +35,7 @@ ENDIF:
; Make sure this program doesn't crash
; CHECK-LABEL: {{^}}phi2:
-define void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -50,8 +54,8 @@ main_body:
%tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84)
%tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88)
%tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92)
- %tmp36 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %arg2, i32 0
- %tmp37 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp36, !tbaa !0
+ %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
+ %tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
%tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
%tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0
%tmp40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5)
@@ -63,7 +67,8 @@ main_body:
%tmp46 = bitcast float %tmp41 to i32
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
%tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
- %tmp49 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp48, <32 x i8> %tmp37, <16 x i8> %tmp39, i32 2)
+ %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32>
+ %tmp49 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp48, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp50 = extractelement <4 x float> %tmp49, i32 2
%tmp51 = call float @fabs(float %tmp50)
%tmp52 = fmul float %tmp42, %tmp42
@@ -151,7 +156,7 @@ ENDIF24:
; We just want ot make sure the program doesn't crash
; CHECK-LABEL: {{^}}loop:
-define void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -200,7 +205,7 @@ declare void @llvm.SI.export(i32, i32, i
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.amdgcn.rsq.f32(float) #3
@@ -222,27 +227,28 @@ declare i32 @llvm.SI.packf16(float, floa
; CHECK: image_sample
; CHECK: exp
; CHECK: s_endpgm
-define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
%tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16)
- %tmp23 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
- %tmp24 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp23, !tbaa !0
+ %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
+ %tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
%tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
%tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0
%tmp27 = fcmp oeq float %tmp22, 0.000000e+00
+ %tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32>
br i1 %tmp27, label %if, label %else
if: ; preds = %entry
- %val.if = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> zeroinitializer, <32 x i8> %tmp24, <16 x i8> %tmp26, i32 2)
+ %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> zeroinitializer, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%val.if.0 = extractelement <4 x float> %val.if, i32 0
%val.if.1 = extractelement <4 x float> %val.if, i32 1
%val.if.2 = extractelement <4 x float> %val.if, i32 2
br label %endif
else: ; preds = %entry
- %val.else = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> <i32 1, i32 0>, <32 x i8> %tmp24, <16 x i8> %tmp26, i32 2)
+ %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1, i32 0>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%val.else.0 = extractelement <4 x float> %val.else, i32 0
%val.else.1 = extractelement <4 x float> %val.else, i32 1
%val.else.2 = extractelement <4 x float> %val.else, i32 2
@@ -285,7 +291,7 @@ endif:
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
; CHECK: s_endpgm
-define void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
br label %LOOP68
@@ -335,9 +341,8 @@ bb38:
%tmp53 = bitcast float %tmp30 to i32
%tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
%tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
- %tmp56 = bitcast <8 x i32> %tmp26 to <32 x i8>
- %tmp57 = bitcast <4 x i32> %tmp28 to <16 x i8>
- %tmp58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp55, <32 x i8> %tmp56, <16 x i8> %tmp57, i32 2)
+ %tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32>
+ %tmp58 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp55, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
br label %bb71
bb80: ; preds = %bb
@@ -346,9 +351,8 @@ bb80:
%tmp82.2 = add i32 %tmp82, 1
%tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
%tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
- %tmp85 = bitcast <8 x i32> %tmp26 to <32 x i8>
- %tmp86 = bitcast <4 x i32> %tmp28 to <16 x i8>
- %tmp87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp84, <32 x i8> %tmp85, <16 x i8> %tmp86, i32 2)
+ %tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32>
+ %tmp87 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
br label %bb71
bb71: ; preds = %bb80, %bb38
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-lod-bias.ll Mon Jan 25 22:38:08 2016
@@ -6,13 +6,13 @@
; CHECK: {{^}}main:
; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
-define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
%tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
%tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
- %tmp22 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %arg2, i32 0
- %tmp23 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp22, !tbaa !0
+ %tmp22 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
+ %tmp23 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp22, !tbaa !0
%tmp24 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
%tmp25 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp24, !tbaa !0
%tmp26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5)
@@ -24,7 +24,8 @@ main_body:
%tmp32 = insertelement <4 x i32> %tmp31, i32 %tmp29, i32 1
%tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2
%tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3
- %tmp35 = call <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32> %tmp34, <32 x i8> %tmp23, <16 x i8> %tmp25, i32 2)
+ %tmp25.bc = bitcast <16 x i8> %tmp25 to <4 x i32>
+ %tmp35 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %tmp34, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp36 = extractelement <4 x float> %tmp35, i32 0
%tmp37 = extractelement <4 x float> %tmp35, i32 1
%tmp38 = extractelement <4 x float> %tmp35, i32 2
@@ -39,8 +40,8 @@ declare float @llvm.SI.load.const(<16 x
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sampleb.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-scheduler.ll Mon Jan 25 22:38:08 2016
@@ -23,7 +23,9 @@ main_body:
%tmp28 = bitcast float %tmp26 to i32
%tmp29 = insertelement <2 x i32> undef, i32 %tmp27, i32 0
%tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1
- %tmp31 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp30, <32 x i8> %tmp22, <16 x i8> %tmp24, i32 2)
+ %tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
+ %tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
+ %tmp31 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp30, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp32 = extractelement <4 x float> %tmp31, i32 0
%tmp33 = extractelement <4 x float> %tmp31, i32 1
%tmp34 = extractelement <4 x float> %tmp31, i32 2
@@ -39,8 +41,8 @@ main_body:
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-sgpr-spill.ll Mon Jan 25 22:38:08 2016
@@ -22,7 +22,7 @@
; Writing to M0 from an SMRD instruction will hang the GPU.
; CHECK-NOT: s_buffer_load_dword m0
; CHECK: s_endpgm
-define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
main_body:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -64,36 +64,37 @@ main_body:
%tmp57 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 372)
%tmp58 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 376)
%tmp59 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 384)
- %tmp60 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
- %tmp61 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp60, !tbaa !0
+ %tmp60 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
+ %tmp61 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp60, !tbaa !0
%tmp62 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
%tmp63 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp62, !tbaa !0
- %tmp64 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 1
- %tmp65 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp64, !tbaa !0
+ %tmp63.bc = bitcast <16 x i8> %tmp63 to <4 x i32>
+ %tmp64 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
+ %tmp65 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp64, !tbaa !0
%tmp66 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
%tmp67 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp66, !tbaa !0
- %tmp68 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 2
- %tmp69 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp68, !tbaa !0
+ %tmp68 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
+ %tmp69 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp68, !tbaa !0
%tmp70 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
%tmp71 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp70, !tbaa !0
- %tmp72 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 3
- %tmp73 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp72, !tbaa !0
+ %tmp72 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
+ %tmp73 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp72, !tbaa !0
%tmp74 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
%tmp75 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp74, !tbaa !0
- %tmp76 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 4
- %tmp77 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp76, !tbaa !0
+ %tmp76 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
+ %tmp77 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp76, !tbaa !0
%tmp78 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
%tmp79 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp78, !tbaa !0
- %tmp80 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 5
- %tmp81 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp80, !tbaa !0
+ %tmp80 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
+ %tmp81 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp80, !tbaa !0
%tmp82 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
%tmp83 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp82, !tbaa !0
- %tmp84 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 6
- %tmp85 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp84, !tbaa !0
+ %tmp84 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
+ %tmp85 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp84, !tbaa !0
%tmp86 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
%tmp87 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp86, !tbaa !0
- %tmp88 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 7
- %tmp89 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp88, !tbaa !0
+ %tmp88 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
+ %tmp89 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp88, !tbaa !0
%tmp90 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
%tmp91 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp90, !tbaa !0
%tmp92 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg4, <2 x i32> %arg6)
@@ -272,7 +273,7 @@ ENDIF:
%tmp240 = insertelement <8 x i32> %tmp239, i32 %tmp238, i32 5
%tmp241 = insertelement <8 x i32> %tmp240, i32 undef, i32 6
%tmp242 = insertelement <8 x i32> %tmp241, i32 undef, i32 7
- %tmp243 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp242, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
+ %tmp243 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp242, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp244 = extractelement <4 x float> %tmp243, i32 3
%tmp245 = fcmp oge float %temp30.0, %tmp244
%tmp246 = sext i1 %tmp245 to i32
@@ -317,7 +318,8 @@ IF67:
%tmp274 = insertelement <8 x i32> %tmp273, i32 %tmp268, i32 5
%tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
%tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
- %tmp277 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp276, <32 x i8> %tmp65, <16 x i8> %tmp67, i32 2)
+ %tmp67.bc = bitcast <16 x i8> %tmp67 to <4 x i32>
+ %tmp277 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp276, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp278 = extractelement <4 x float> %tmp277, i32 0
%tmp279 = extractelement <4 x float> %tmp277, i32 1
%tmp280 = extractelement <4 x float> %tmp277, i32 2
@@ -337,7 +339,8 @@ IF67:
%tmp294 = insertelement <8 x i32> %tmp293, i32 %tmp288, i32 5
%tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
%tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
- %tmp297 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp296, <32 x i8> %tmp81, <16 x i8> %tmp83, i32 2)
+ %tmp83.bc = bitcast <16 x i8> %tmp83 to <4 x i32>
+ %tmp297 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp296, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp298 = extractelement <4 x float> %tmp297, i32 0
%tmp299 = extractelement <4 x float> %tmp297, i32 1
%tmp300 = extractelement <4 x float> %tmp297, i32 2
@@ -355,7 +358,8 @@ IF67:
%tmp312 = insertelement <8 x i32> %tmp311, i32 %tmp306, i32 5
%tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
%tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
- %tmp315 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp314, <32 x i8> %tmp77, <16 x i8> %tmp79, i32 2)
+ %tmp79.bc = bitcast <16 x i8> %tmp79 to <4 x i32>
+ %tmp315 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp314, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp316 = extractelement <4 x float> %tmp315, i32 0
%tmp317 = extractelement <4 x float> %tmp315, i32 1
%tmp318 = extractelement <4 x float> %tmp315, i32 2
@@ -385,7 +389,7 @@ IF67:
%tmp342 = insertelement <8 x i32> %tmp341, i32 %tmp336, i32 5
%tmp343 = insertelement <8 x i32> %tmp342, i32 undef, i32 6
%tmp344 = insertelement <8 x i32> %tmp343, i32 undef, i32 7
- %tmp345 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp344, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
+ %tmp345 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp344, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp346 = extractelement <4 x float> %tmp345, i32 0
%tmp347 = extractelement <4 x float> %tmp345, i32 1
%tmp348 = extractelement <4 x float> %tmp345, i32 2
@@ -415,7 +419,8 @@ IF67:
%tmp372 = insertelement <8 x i32> %tmp371, i32 %tmp366, i32 5
%tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
%tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
- %tmp375 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp374, <32 x i8> %tmp69, <16 x i8> %tmp71, i32 2)
+ %tmp71.bc = bitcast <16 x i8> %tmp71 to <4 x i32>
+ %tmp375 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp374, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp376 = extractelement <4 x float> %tmp375, i32 0
%tmp377 = extractelement <4 x float> %tmp375, i32 1
%tmp378 = extractelement <4 x float> %tmp375, i32 2
@@ -469,7 +474,8 @@ IF67:
%tmp426 = insertelement <8 x i32> %tmp425, i32 %tmp420, i32 5
%tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
%tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
- %tmp429 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp428, <32 x i8> %tmp85, <16 x i8> %tmp87, i32 2)
+ %tmp87.bc = bitcast <16 x i8> %tmp87 to <4 x i32>
+ %tmp429 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp428, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp430 = extractelement <4 x float> %tmp429, i32 0
%tmp431 = extractelement <4 x float> %tmp429, i32 1
%tmp432 = extractelement <4 x float> %tmp429, i32 2
@@ -510,7 +516,8 @@ IF67:
%tmp467 = insertelement <4 x i32> %tmp466, i32 %tmp464, i32 1
%tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
%tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
- %tmp470 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %tmp469, <32 x i8> %tmp89, <16 x i8> %tmp91, i32 4)
+ %tmp91.bc = bitcast <16 x i8> %tmp91 to <4 x i32>
+ %tmp470 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tmp469, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp471 = extractelement <4 x float> %tmp470, i32 0
%tmp472 = extractelement <4 x float> %tmp470, i32 1
%tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -611,7 +618,8 @@ IF67:
%tmp568 = insertelement <8 x i32> %tmp567, i32 %tmp562, i32 5
%tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
%tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
- %tmp571 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp570, <32 x i8> %tmp73, <16 x i8> %tmp75, i32 2)
+ %tmp75.bc = bitcast <16 x i8> %tmp75 to <4 x i32>
+ %tmp571 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp570, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp572 = extractelement <4 x float> %tmp571, i32 0
%tmp573 = extractelement <4 x float> %tmp571, i32 1
%tmp574 = extractelement <4 x float> %tmp571, i32 2
@@ -635,7 +643,7 @@ ENDIF66:
%tmp588 = insertelement <8 x i32> %tmp587, i32 %tmp586, i32 5
%tmp589 = insertelement <8 x i32> %tmp588, i32 undef, i32 6
%tmp590 = insertelement <8 x i32> %tmp589, i32 undef, i32 7
- %tmp591 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %tmp590, <32 x i8> %tmp61, <16 x i8> %tmp63, i32 2)
+ %tmp591 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp590, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp592 = extractelement <4 x float> %tmp591, i32 3
%tmp593 = fcmp oge float %temp30.1, %tmp592
%tmp594 = sext i1 %tmp593 to i32
@@ -660,7 +668,7 @@ ENDIF66:
; CHECK-LABEL: {{^}}main1:
; CHECK: s_endpgm
-define void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
main_body:
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
%tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -767,40 +775,40 @@ main_body:
%tmp122 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 716)
%tmp123 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 864)
%tmp124 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 868)
- %tmp125 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 0
- %tmp126 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp125, !tbaa !0
+ %tmp125 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
+ %tmp126 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp125, !tbaa !0
%tmp127 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
%tmp128 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp127, !tbaa !0
- %tmp129 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 1
- %tmp130 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp129, !tbaa !0
+ %tmp129 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 1
+ %tmp130 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp129, !tbaa !0
%tmp131 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 1
%tmp132 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp131, !tbaa !0
- %tmp133 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 2
- %tmp134 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp133, !tbaa !0
+ %tmp133 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 2
+ %tmp134 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp133, !tbaa !0
%tmp135 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 2
%tmp136 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp135, !tbaa !0
- %tmp137 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 3
- %tmp138 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp137, !tbaa !0
+ %tmp137 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 3
+ %tmp138 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp137, !tbaa !0
%tmp139 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 3
%tmp140 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp139, !tbaa !0
- %tmp141 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 4
- %tmp142 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp141, !tbaa !0
+ %tmp141 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 4
+ %tmp142 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp141, !tbaa !0
%tmp143 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 4
%tmp144 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp143, !tbaa !0
- %tmp145 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 5
- %tmp146 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp145, !tbaa !0
+ %tmp145 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 5
+ %tmp146 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp145, !tbaa !0
%tmp147 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 5
%tmp148 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp147, !tbaa !0
- %tmp149 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 6
- %tmp150 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp149, !tbaa !0
+ %tmp149 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 6
+ %tmp150 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp149, !tbaa !0
%tmp151 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 6
%tmp152 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp151, !tbaa !0
- %tmp153 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 7
- %tmp154 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp153, !tbaa !0
+ %tmp153 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 7
+ %tmp154 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp153, !tbaa !0
%tmp155 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 7
%tmp156 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp155, !tbaa !0
- %tmp157 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %arg2, i64 0, i32 8
- %tmp158 = load <32 x i8>, <32 x i8> addrspace(2)* %tmp157, !tbaa !0
+ %tmp157 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 8
+ %tmp158 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp157, !tbaa !0
%tmp159 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 8
%tmp160 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp159, !tbaa !0
%tmp161 = fcmp ugt float %arg17, 0.000000e+00
@@ -868,7 +876,8 @@ main_body:
%tmp222 = bitcast float %tmp174 to i32
%tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
%tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
- %tmp225 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp224, <32 x i8> %tmp130, <16 x i8> %tmp132, i32 2)
+ %tmp132.bc = bitcast <16 x i8> %tmp132 to <4 x i32>
+ %tmp225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp224, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp226 = extractelement <4 x float> %tmp225, i32 0
%tmp227 = extractelement <4 x float> %tmp225, i32 1
%tmp228 = extractelement <4 x float> %tmp225, i32 2
@@ -938,7 +947,8 @@ LOOP:
%tmp279 = insertelement <4 x i32> %tmp278, i32 %tmp277, i32 1
%tmp280 = insertelement <4 x i32> %tmp279, i32 0, i32 2
%tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
- %tmp282 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp281, <32 x i8> %tmp146, <16 x i8> %tmp148, i32 2)
+ %tmp148.bc = bitcast <16 x i8> %tmp148 to <4 x i32>
+ %tmp282 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp281, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp283 = extractelement <4 x float> %tmp282, i32 3
%tmp284 = fadd float %temp168.0, %tmp273
%tmp285 = fadd float %temp169.0, %tmp274
@@ -1001,7 +1011,8 @@ IF189:
%tmp339 = bitcast float %tmp335 to i32
%tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
%tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
- %tmp342 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp341, <32 x i8> %tmp134, <16 x i8> %tmp136, i32 2)
+ %tmp136.bc = bitcast <16 x i8> %tmp136 to <4 x i32>
+ %tmp342 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp341, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp343 = extractelement <4 x float> %tmp342, i32 0
%tmp344 = extractelement <4 x float> %tmp342, i32 1
%tmp345 = extractelement <4 x float> %tmp342, i32 2
@@ -1033,7 +1044,8 @@ IF189:
%tmp359 = bitcast float %tmp337 to i32
%tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
%tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
- %tmp362 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp361, <32 x i8> %tmp150, <16 x i8> %tmp152, i32 2)
+ %tmp152.bc = bitcast <16 x i8> %tmp152 to <4 x i32>
+ %tmp362 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp361, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp363 = extractelement <4 x float> %tmp362, i32 2
%tmp364 = fmul float %result.i40, %result.i
%tmp365 = fmul float %result.i36, %result.i44
@@ -1043,7 +1055,8 @@ IF189:
%tmp369 = bitcast float %tmp311 to i32
%tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
%tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
- %tmp372 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp371, <32 x i8> %tmp138, <16 x i8> %tmp140, i32 2)
+ %tmp140.bc = bitcast <16 x i8> %tmp140 to <4 x i32>
+ %tmp372 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp371, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp373 = extractelement <4 x float> %tmp372, i32 0
%tmp374 = extractelement <4 x float> %tmp372, i32 1
%tmp375 = extractelement <4 x float> %tmp372, i32 2
@@ -1059,7 +1072,8 @@ IF189:
%tmp383 = bitcast float %tmp321 to i32
%tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
%tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
- %tmp386 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp385, <32 x i8> %tmp142, <16 x i8> %tmp144, i32 2)
+ %tmp144.bc = bitcast <16 x i8> %tmp144 to <4 x i32>
+ %tmp386 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp385, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp387 = extractelement <4 x float> %tmp386, i32 0
%tmp388 = extractelement <4 x float> %tmp386, i32 1
%tmp389 = extractelement <4 x float> %tmp386, i32 2
@@ -1155,7 +1169,8 @@ ENDIF197:
%tmp467 = bitcast float %tmp220 to i32
%tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
%tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
- %tmp470 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp469, <32 x i8> %tmp158, <16 x i8> %tmp160, i32 2)
+ %tmp160.bc = bitcast <16 x i8> %tmp160 to <4 x i32>
+ %tmp470 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp469, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp471 = extractelement <4 x float> %tmp470, i32 0
%tmp472 = extractelement <4 x float> %tmp470, i32 1
%tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -1172,7 +1187,8 @@ ENDIF197:
%tmp484 = bitcast float %tmp172 to i32
%tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
%tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
- %tmp487 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp486, <32 x i8> %tmp154, <16 x i8> %tmp156, i32 2)
+ %tmp156.bc = bitcast <16 x i8> %tmp156 to <4 x i32>
+ %tmp487 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp486, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp488 = extractelement <4 x float> %tmp487, i32 0
%tmp489 = extractelement <4 x float> %tmp487, i32 1
%tmp490 = extractelement <4 x float> %tmp487, i32 2
@@ -1377,7 +1393,8 @@ ENDIF209:
%tmp657 = insertelement <4 x i32> %tmp656, i32 %tmp654, i32 1
%tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
%tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
- %tmp660 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp659, <32 x i8> %tmp126, <16 x i8> %tmp128, i32 2)
+ %tmp128.bc = bitcast <16 x i8> %tmp128 to <4 x i32>
+ %tmp660 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp659, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp661 = extractelement <4 x float> %tmp660, i32 0
%tmp662 = extractelement <4 x float> %tmp660, i32 1
%tmp663 = bitcast float %tmp646 to i32
@@ -1387,7 +1404,7 @@ ENDIF209:
%tmp667 = insertelement <4 x i32> %tmp666, i32 %tmp664, i32 1
%tmp668 = insertelement <4 x i32> %tmp667, i32 %tmp665, i32 2
%tmp669 = insertelement <4 x i32> %tmp668, i32 undef, i32 3
- %tmp670 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %tmp669, <32 x i8> %tmp126, <16 x i8> %tmp128, i32 2)
+ %tmp670 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp669, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp671 = extractelement <4 x float> %tmp670, i32 0
%tmp672 = extractelement <4 x float> %tmp670, i32 1
%tmp673 = fsub float -0.000000e+00, %tmp662
@@ -1549,10 +1566,11 @@ ELSE214:
declare float @llvm.AMDGPU.clamp.f32(float, float, float) #1
; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #2
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #2
+declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
+
declare float @llvm.exp2.f32(float) #2
@@ -1572,7 +1590,7 @@ declare float @ceil(float) #3
declare float @llvm.amdgcn.rsq.f32(float) #2
; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #2
+declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #1
@@ -1581,7 +1599,8 @@ declare <4 x float> @llvm.AMDGPU.cube(<4
declare float @fabs(float) #1
; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #2
+declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
+
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #2
Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll?rev=258787&r1=258786&r2=258787&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll Mon Jan 25 22:38:08 2016
@@ -67,7 +67,7 @@ bb7:
br label %bb4
bb9: ; preds = %bb2
- %tmp10 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 2)
+ %tmp10 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp11 = extractelement <4 x float> %tmp10, i32 1
%tmp12 = extractelement <4 x float> %tmp10, i32 3
br label %bb14
@@ -98,7 +98,7 @@ bb27:
}
; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
More information about the llvm-commits
mailing list