[llvm-branch-commits] [llvm-branch] r235686 - Merging r229238:
Tom Stellard
thomas.stellard at amd.com
Thu Apr 23 18:30:56 PDT 2015
Author: tstellar
Date: Thu Apr 23 20:30:56 2015
New Revision: 235686
URL: http://llvm.org/viewvc/llvm-project?rev=235686&view=rev
Log:
Merging r229238:
------------------------------------------------------------------------
r229238 | Matthew.Arsenault | 2015-02-13 23:24:28 -0500 (Fri, 13 Feb 2015) | 2 lines
R600/SI: Use complex operand folding for div_scale
------------------------------------------------------------------------
Modified:
llvm/branches/release_36/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
Modified: llvm/branches/release_36/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPUISelDAGToDAG.cpp?rev=235686&r1=235685&r2=235686&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPUISelDAGToDAG.cpp Thu Apr 23 20:30:56 2015
@@ -786,6 +786,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SU
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
}
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
SDLoc SL(N);
EVT VT = N->getValueType(0);
@@ -795,19 +797,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SC
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
- const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
- SDValue Ops[] = {
- Zero, // src0_modifiers
- N->getOperand(0), // src0
- Zero, // src1_modifiers
- N->getOperand(1), // src1
- Zero, // src2_modifiers
- N->getOperand(2), // src2
- False, // clamp
- Zero // omod
- };
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+ SDValue Ops[8];
+ SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
+ SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
}
Modified: llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll?rev=235686&r1=235685&r2=235686&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll Thu Apr 23 20:30:56 2015
@@ -3,6 +3,7 @@
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
; SI-LABEL @test_div_scale_f32_1:
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
@@ -285,3 +286,79 @@ define void @test_div_scale_f64_all_scal
store double %result0, double addrspace(1)* %out, align 8
ret void
}
+
+; SI-LABEL @test_div_scale_f32_inline_imm_num:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %a = load float addrspace(1)* %gep.0, align 4
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_inline_imm_den:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %a = load float addrspace(1)* %gep.0, align 4
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_fabs_num:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]|
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f32_fabs_den:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+ %a = load float addrspace(1)* %gep.0, align 4
+ %b = load float addrspace(1)* %gep.1, align 4
+
+ %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
+
+ %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
+ %result0 = extractvalue { float, i1 } %result, 0
+ store float %result0, float addrspace(1)* %out, align 4
+ ret void
+}
More information about the llvm-branch-commits
mailing list