[llvm-branch-commits] [llvm-branch] r236016 - Merging r229239:
Tom Stellard
thomas.stellard at amd.com
Tue Apr 28 12:12:08 PDT 2015
Author: tstellar
Date: Tue Apr 28 14:12:08 2015
New Revision: 236016
URL: http://llvm.org/viewvc/llvm-project?rev=236016&view=rev
Log:
Merging r229239:
------------------------------------------------------------------------
r229239 | Matthew.Arsenault | 2015-02-13 23:30:08 -0500 (Fri, 13 Feb 2015) | 4 lines
R600/SI: Implement correct f64 fdiv
This version passes the OpenCL conformance test.
------------------------------------------------------------------------
Added:
llvm/branches/release_36/test/CodeGen/R600/fdiv.f64.ll
Removed:
llvm/branches/release_36/test/CodeGen/R600/fdiv64.ll
Modified:
llvm/branches/release_36/lib/Target/R600/AMDGPUInstructions.td
llvm/branches/release_36/lib/Target/R600/CaymanInstructions.td
llvm/branches/release_36/lib/Target/R600/EvergreenInstructions.td
llvm/branches/release_36/lib/Target/R600/R600Instructions.td
llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp
llvm/branches/release_36/lib/Target/R600/SIInstructions.td
llvm/branches/release_36/test/CodeGen/R600/frem.ll
llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll
Modified: llvm/branches/release_36/lib/Target/R600/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPUInstructions.td?rev=236016&r1=236015&r2=236016&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPUInstructions.td (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPUInstructions.td Tue Apr 28 14:12:08 2015
@@ -649,17 +649,10 @@ class RcpPat<Instruction RcpInst, ValueT
(RcpInst $src)
>;
-multiclass RsqPat<Instruction RsqInst, ValueType vt> {
- def : Pat <
- (fdiv FP_ONE, (fsqrt vt:$src)),
- (RsqInst $src)
- >;
-
- def : Pat <
- (AMDGPUrcp (fsqrt vt:$src)),
- (RsqInst $src)
- >;
-}
+class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
+ (AMDGPUrcp (fsqrt vt:$src)),
+ (RsqInst $src)
+>;
include "R600Instructions.td"
include "R700Instructions.td"
Modified: llvm/branches/release_36/lib/Target/R600/CaymanInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/CaymanInstructions.td?rev=236016&r1=236015&r2=236016&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/CaymanInstructions.td (original)
+++ llvm/branches/release_36/lib/Target/R600/CaymanInstructions.td Tue Apr 28 14:12:08 2015
@@ -46,7 +46,7 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
-defm : RsqPat<RECIPSQRT_IEEE_cm, f32>;
+def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
Modified: llvm/branches/release_36/lib/Target/R600/EvergreenInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/EvergreenInstructions.td?rev=236016&r1=236015&r2=236016&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/EvergreenInstructions.td (original)
+++ llvm/branches/release_36/lib/Target/R600/EvergreenInstructions.td Tue Apr 28 14:12:08 2015
@@ -69,7 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
-defm : RsqPat<RECIPSQRT_IEEE_eg, f32>;
+def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
Modified: llvm/branches/release_36/lib/Target/R600/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/R600Instructions.td?rev=236016&r1=236015&r2=236016&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/R600Instructions.td (original)
+++ llvm/branches/release_36/lib/Target/R600/R600Instructions.td Tue Apr 28 14:12:08 2015
@@ -1193,7 +1193,7 @@ let Predicates = [isR600] in {
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
- defm : RsqPat<RECIPSQRT_IEEE_r600, f32>;
+ def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
def : FROUNDPat <CNDGE_r600, CNDGT_r600>;
Modified: llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp?rev=236016&r1=236015&r2=236016&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/SIISelLowering.cpp Tue Apr 28 14:12:08 2015
@@ -210,6 +210,7 @@ SITargetLowering::SITargetLowering(Targe
}
setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::f64, Custom);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
@@ -1105,7 +1106,70 @@ SDValue SITargetLowering::LowerFDIV32(SD
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue();
+ if (DAG.getTarget().Options.UnsafeFPMath)
+ return LowerFastFDIV(Op, DAG);
+
+ SDLoc SL(Op);
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+
+ SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
+
+ SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
+
+ SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
+
+ SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
+
+ SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
+
+ SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
+
+ SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
+
+ SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
+
+ SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
+
+ SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
+ NegDivScale0, Mul, DivScale1);
+
+ SDValue Scale;
+
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ // Workaround a hardware bug on SI where the condition output from div_scale
+ // is not usable.
+
+ const SDValue Hi = DAG.getConstant(1, MVT::i32);
+
+ // Figure out if the scale to use for div_fmas.
+ SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
+ SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
+ SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
+ SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
+
+ SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
+ SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
+
+ SDValue Scale0Hi
+ = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
+ SDValue Scale1Hi
+ = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
+
+ SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
+ SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
+ Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
+ } else {
+ Scale = DivScale1.getValue(1);
+ }
+
+ SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
+ Fma4, Fma3, Mul, Scale);
+
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
}
SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
Modified: llvm/branches/release_36/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIInstructions.td?rev=236016&r1=236015&r2=236016&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIInstructions.td (original)
+++ llvm/branches/release_36/lib/Target/R600/SIInstructions.td Tue Apr 28 14:12:08 2015
@@ -2164,9 +2164,13 @@ def : Pat <
//===----------------------------------------------------------------------===//
let Predicates = [UnsafeFPMath] in {
-def : RcpPat<V_RCP_F64_e32, f64>;
-defm : RsqPat<V_RSQ_F64_e32, f64>;
-defm : RsqPat<V_RSQ_F32_e32, f32>;
+
+//def : RcpPat<V_RCP_F64_e32, f64>;
+//defm : RsqPat<V_RSQ_F64_e32, f64>;
+//defm : RsqPat<V_RSQ_F32_e32, f32>;
+
+def : RsqPat<V_RSQ_F32_e32, f32>;
+def : RsqPat<V_RSQ_F64_e32, f64>;
}
//===----------------------------------------------------------------------===//
@@ -2683,13 +2687,6 @@ def : Pat <
(V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
>;
-def : Pat<
- (fdiv f64:$src0, f64:$src1),
- (V_MUL_F64 0 /* src0_modifiers */, $src0,
- 0 /* src1_modifiers */, (V_RCP_F64_e32 $src1),
- 0 /* clamp */, 0 /* omod */)
->;
-
def : Pat <
(int_AMDGPU_cube v4f32:$src),
(REG_SEQUENCE VReg_128,
Added: llvm/branches/release_36/test/CodeGen/R600/fdiv.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/fdiv.f64.ll?rev=236016&view=auto
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/fdiv.f64.ll (added)
+++ llvm/branches/release_36/test/CodeGen/R600/fdiv.f64.ll Tue Apr 28 14:12:08 2015
@@ -0,0 +1,96 @@
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s
+
+
+; COMMON-LABEL: {{^}}fdiv_f64:
+; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0
+; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; CI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]]
+; CI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], vcc, [[NUM]], [[DEN]], [[NUM]]
+
+; Check for div_scale bug workaround on SI
+; SI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]]
+; SI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[NUM]], [[DEN]], [[NUM]]
+
+; COMMON-DAG: v_rcp_f64_e32 [[RCP_SCALE0:v\[[0-9]+:[0-9]+\]]], [[SCALE0]]
+
+; SI-DAG: v_cmp_eq_i32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}}
+; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}
+; SI-DAG: s_xor_b64 vcc, [[CMP0]], vcc
+
+; COMMON-DAG: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[RCP_SCALE0]], 1.0
+; COMMON-DAG: v_fma_f64 [[FMA1:v\[[0-9]+:[0-9]+\]]], [[RCP_SCALE0]], [[FMA0]], [[RCP_SCALE0]]
+; COMMON-DAG: v_fma_f64 [[FMA2:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[FMA1]], 1.0
+; COMMON-DAG: v_fma_f64 [[FMA3:v\[[0-9]+:[0-9]+\]]], [[FMA1]], [[FMA2]], [[FMA1]]
+; COMMON-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[SCALE1]], [[FMA3]]
+; COMMON-DAG: v_fma_f64 [[FMA4:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[MUL]], [[SCALE1]]
+; COMMON: v_div_fmas_f64 [[FMAS:v\[[0-9]+:[0-9]+\]]], [[FMA3]], [[FMA4]], [[MUL]]
+; COMMON: v_div_fixup_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[FMAS]], [[DEN]], [[NUM]]
+; COMMON: buffer_store_dwordx2 [[RESULT]]
+; COMMON: s_endpgm
+define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind {
+ %gep.1 = getelementptr double addrspace(1)* %in, i32 1
+ %num = load double addrspace(1)* %in
+ %den = load double addrspace(1)* %gep.1
+ %result = fdiv double %num, %den
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}fdiv_f64_s_v:
+define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind {
+ %den = load double addrspace(1)* %in
+ %result = fdiv double %num, %den
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}fdiv_f64_v_s:
+define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind {
+ %num = load double addrspace(1)* %in
+ %result = fdiv double %num, %den
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}fdiv_f64_s_s:
+define void @fdiv_f64_s_s(double addrspace(1)* %out, double %num, double %den) nounwind {
+ %result = fdiv double %num, %den
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}v_fdiv_v2f64:
+define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind {
+ %gep.1 = getelementptr <2 x double> addrspace(1)* %in, i32 1
+ %num = load <2 x double> addrspace(1)* %in
+ %den = load <2 x double> addrspace(1)* %gep.1
+ %result = fdiv <2 x double> %num, %den
+ store <2 x double> %result, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}s_fdiv_v2f64:
+define void @s_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %num, <2 x double> %den) {
+ %result = fdiv <2 x double> %num, %den
+ store <2 x double> %result, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}v_fdiv_v4f64:
+define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind {
+ %gep.1 = getelementptr <4 x double> addrspace(1)* %in, i32 1
+ %num = load <4 x double> addrspace(1)* %in
+ %den = load <4 x double> addrspace(1)* %gep.1
+ %result = fdiv <4 x double> %num, %den
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; COMMON-LABEL: {{^}}s_fdiv_v4f64:
+define void @s_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %num, <4 x double> %den) {
+ %result = fdiv <4 x double> %num, %den
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
+}
Removed: llvm/branches/release_36/test/CodeGen/R600/fdiv64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/fdiv64.ll?rev=236015&view=auto
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/fdiv64.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/fdiv64.ll (removed)
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; CHECK: {{^}}fdiv_f64:
-; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}}
-; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
-
-define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
- double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = fdiv double %r0, %r1
- store double %r2, double addrspace(1)* %out
- ret void
-}
Modified: llvm/branches/release_36/test/CodeGen/R600/frem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/frem.ll?rev=236016&r1=236015&r2=236016&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/frem.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/frem.ll Tue Apr 28 14:12:08 2015
@@ -40,10 +40,14 @@ define void @unsafe_frem_f32(float addrs
ret void
}
-; TODO: This should check something when f64 fdiv is implemented
-; correctly
-
; FUNC-LABEL: {{^}}frem_f64:
+; SI: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
+; SI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
+; SI-DAG: v_div_fmas_f64
+; SI-DAG: v_div_scale_f64
+; SI-DAG: v_mul_f64
+; SI: v_add_f64
+; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
Modified: llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll?rev=236016&r1=236015&r2=236016&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll Tue Apr 28 14:12:08 2015
@@ -23,6 +23,8 @@ define void @rcp_pat_f64(double addrspac
; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
; SI-UNSAFE: v_rsq_f64_e32
; SI-SAFE-NOT: v_rsq_f64_e32
+; SI-SAFE: v_sqrt_f64
+; SI-SAFE: v_rcp_f64
define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
%sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
%rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
More information about the llvm-branch-commits
mailing list