[llvm] r229238 - R600/SI: Use complex operand folding for div_scale
Matt Arsenault
Matthew.Arsenault at amd.com
Fri Feb 13 20:24:29 PST 2015
Author: arsenm
Date: Fri Feb 13 22:24:28 2015
New Revision: 229238
URL: http://llvm.org/viewvc/llvm-project?rev=229238&view=rev
Log:
R600/SI: Use complex operand folding for div_scale
Modified:
llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp?rev=229238&r1=229237&r2=229238&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp Fri Feb 13 22:24:28 2015
@@ -783,6 +783,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SU
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
}
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
SDLoc SL(N);
EVT VT = N->getValueType(0);
@@ -792,19 +794,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SC
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
- const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
- SDValue Ops[] = {
- Zero, // src0_modifiers
- N->getOperand(0), // src0
- Zero, // src1_modifiers
- N->getOperand(1), // src1
- Zero, // src2_modifiers
- N->getOperand(2), // src2
- False, // clamp
- Zero // omod
- };
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+ SDValue Ops[8];
+ SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
+ SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
}
Modified: llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll?rev=229238&r1=229237&r2=229238&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll Fri Feb 13 22:24:28 2015
@@ -3,6 +3,7 @@
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
; SI-LABEL @test_div_scale_f32_1:
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
@@ -285,3 +286,79 @@ define void @test_div_scale_f64_all_scal
store double %result0, double addrspace(1)* %out, align 8
ret void
}
+
+; SI-LABEL @test_div_scale_f32_inline_imm_num:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %a = load float addrspace(1)* %gep.0, align 4
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_inline_imm_den:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %a = load float addrspace(1)* %gep.0, align 4
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_fabs_num:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]|
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_fabs_den:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
More information about the llvm-commits
mailing list