[PATCH] D26724: AMDGPU/SI: Use float as the operand type for amdgcn.interp intrinsics
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 18:06:58 PST 2016
tstellarAMD created this revision.
tstellarAMD added reviewers: arsenm, nhaehnle.
tstellarAMD added a subscriber: llvm-commits.
Herald added subscribers: tony-tye, yaxunl, wdng, kzhuravl.
https://reviews.llvm.org/D26724
Files:
include/llvm/IR/IntrinsicsAMDGPU.td
lib/Target/AMDGPU/SIISelLowering.cpp
lib/Target/AMDGPU/SIInstructions.td
test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
Index: test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -6,23 +6,23 @@
;GCN: s_mov_b32 m0, s{{[0-9]+}}
;GCN: v_interp_p1_f32
;GCN: v_interp_p2_f32
-define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) {
+define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) {
main_body:
- %i = extractelement <2 x i32> %4, i32 0
- %j = extractelement <2 x i32> %4, i32 1
- %p0_0 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 0, i32 0, i32 %3)
- %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, i32 %j, i32 0, i32 0, i32 %3)
- %p0_1 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 1, i32 0, i32 %3)
- %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, i32 %j, i32 1, i32 0, i32 %3)
+ %i = extractelement <2 x float> %4, i32 0
+ %j = extractelement <2 x float> %4, i32 1
+ %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3)
+ %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3)
+ %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3)
+ %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %p1_1)
ret void
}
; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #0
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #0
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -52,7 +52,7 @@
(outs VGPR_32:$dst),
(ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
"v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
- [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
+ [(set f32:$dst, (AMDGPUinterp_p1 f32:$i, (i32 imm:$attr_chan),
(i32 imm:$attr)))]
>;
@@ -75,7 +75,7 @@
(outs VGPR_32:$dst),
(ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
"v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
- [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
+ [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, f32:$j, (i32 imm:$attr_chan),
(i32 imm:$attr)))]>;
} // End DisableEncoding = "$src0", Constraints = "$src0 = $dst"
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2514,6 +2514,8 @@
DAG.getConstant(0, DL, MVT::i32));
SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
DAG.getConstant(1, DL, MVT::i32));
+ I = DAG.getNode(ISD::BITCAST, DL, MVT::f32, I);
+ J = DAG.getNode(ISD::BITCAST, DL, MVT::f32, J);
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
SDValue Glue = M0.getValue(1);
SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
Index: include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- include/llvm/IR/IntrinsicsAMDGPU.td
+++ include/llvm/IR/IntrinsicsAMDGPU.td
@@ -476,15 +476,15 @@
def int_amdgcn_interp_p1 :
GCCBuiltin<"__builtin_amdgcn_interp_p1">,
Intrinsic<[llvm_float_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>; // This intrinsic reads from lds, but the memory
// values are constant, so it behaves like IntrNoMem.
// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
def int_amdgcn_interp_p2 :
GCCBuiltin<"__builtin_amdgcn_interp_p2">,
Intrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is
// IntrNoMem.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D26724.78116.patch
Type: text/x-patch
Size: 4789 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161116/76ac97e9/attachment.bin>
More information about the llvm-commits
mailing list