[llvm] r272056 - Revert "Differential Revision: http://reviews.llvm.org/D20557"
Eric Christopher via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 7 13:27:12 PDT 2016
Author: echristo
Date: Tue Jun 7 15:27:12 2016
New Revision: 272056
URL: http://llvm.org/viewvc/llvm-project?rev=272056&view=rev
Log:
Revert "Differential Revision: http://reviews.llvm.org/D20557"
Author: Wei Ding <wei.ding2 at amd.com>
Date: Tue Jun 7 19:04:44 2016 +0000
Differential Revision: http://reviews.llvm.org/D20557
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272044
91177308-0d34-0410-b5e6-96231b3b80d8
as it was breaking the bots.
This reverts commit r272044.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=272056&r1=272055&r2=272056&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jun 7 15:27:12 2016
@@ -36,12 +36,6 @@
using namespace llvm;
-// -amdgpu-fast-fdiv - Command line option to enable faster 2.5 ulp fdiv.
-static cl::opt<bool> EnableAMDGPUFastFDIV(
- "amdgpu-fast-fdiv",
- cl::desc("Enable faster 2.5 ulp fdiv"),
- cl::init(false));
-
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
@@ -1947,11 +1941,8 @@ SDValue SITargetLowering::LowerFastFDIV(
}
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
- const SDNodeFlags *Flags = Op->getFlags();
- if (Flags->hasAllowReciprocal()) {
- if (SDValue FastLowered = LowerFastFDIV(Op, DAG))
- return FastLowered;
- }
+ if (SDValue FastLowered = LowerFastFDIV(Op, DAG))
+ return FastLowered;
// This uses v_rcp_f32 which does not handle denormals. Let this hit a
// selection error for now rather than do something incorrect.
@@ -1962,61 +1953,32 @@ SDValue SITargetLowering::LowerFDIV32(SD
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- // faster 2.5 ulp fdiv when using -amdgpu-fast-fdiv flag
- if (EnableAMDGPUFastFDIV) {
- SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
+ SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
- const APFloat K0Val(BitsToFloat(0x6f800000));
- const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
+ const APFloat K0Val(BitsToFloat(0x6f800000));
+ const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
- const APFloat K1Val(BitsToFloat(0x2f800000));
- const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
+ const APFloat K1Val(BitsToFloat(0x2f800000));
+ const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
+ const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
- EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
- SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
+ SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
- SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+ SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
- // TODO: Should this propagate fast-math-flags?
+ // TODO: Should this propagate fast-math-flags?
- r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
+ r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
- SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
+ SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
- return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
- }
-
- // Generates more precise fpdiv32.
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
-
- SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
-
- SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS);
- SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS);
-
- SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, DenominatorScaled);
-
- SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, DenominatorScaled);
-
- SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, ApproxRcp, One);
- SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp, ApproxRcp);
-
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, NumeratorScaled, Fma1);
-
- SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, NumeratorScaled);
- SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul);
- SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled);
-
- SDValue Scale = NumeratorScaled.getValue(1);
- SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4, Fma1, Fma3, Scale);
-
- return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);
+ return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
Modified: llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll?rev=272056&r1=272055&r2=272056&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fdiv.ll Tue Jun 7 15:27:12 2016
@@ -1,33 +1,19 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-fast-fdiv < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=I754 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=UNSAFE-FP %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
; These tests check that fdiv is expanded correctly and also test that the
; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
; instruction groups.
-; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div.
-
; FUNC-LABEL: {{^}}fdiv_f32:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
-
-; I754-DAG: v_div_scale_f32
-; I754-DAG: v_rcp_f32
-; I754-DAG: v_fma_f32
-; I754-DAG: v_mul_f32
-; I754-DAG: v_fma_f32
-; I754-DAG: v_div_fixup_f32
define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
entry:
%0 = fdiv float %a, %b
@@ -35,55 +21,7 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}fdiv_f32_fast_math:
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-
-; I754-DAG: v_div_scale_f32
-; I754-DAG: v_rcp_f32
-; I754-DAG: v_fma_f32
-; I754-DAG: v_mul_f32
-; I754-DAG: v_fma_f32
-; I754-DAG: v_div_fixup_f32
-define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fdiv fast float %a, %b
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}fdiv_f32_arcp_math:
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-
-; I754-DAG: v_div_scale_f32
-; I754-DAG: v_rcp_f32
-; I754-DAG: v_fma_f32
-; I754-DAG: v_mul_f32
-; I754-DAG: v_fma_f32
-; I754-DAG: v_div_fixup_f32
-define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) {
-entry:
- %0 = fdiv arcp float %a, %b
- store float %0, float addrspace(1)* %out
- ret void
-}
; FUNC-LABEL: {{^}}fdiv_v2f32:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
@@ -91,22 +29,10 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
-
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
entry:
%0 = fdiv <2 x float> %a, %b
@@ -114,65 +40,6 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math:
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
-entry:
- %0 = fdiv fast <2 x float> %a, %b
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math:
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
-
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_rcp_f32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
-entry:
- %0 = fdiv arcp <2 x float> %a, %b
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-
; FUNC-LABEL: {{^}}fdiv_v4f32:
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -183,15 +50,6 @@ entry:
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
; SI-DAG: v_rcp_f32
@@ -200,19 +58,6 @@ entry:
; SI-DAG: v_mul_f32
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
-
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float>, <4 x float> addrspace(1) * %in
@@ -221,101 +66,3 @@ define void @fdiv_v4f32(<4 x float> addr
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}
-
-; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math:
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float>, <4 x float> addrspace(1) * %in
- %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
- %result = fdiv fast <4 x float> %a, %b
- store <4 x float> %result, <4 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math:
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
-
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_rcp_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-; UNSAFE-FP: v_mul_f32_e32
-
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-; SI-DAG: v_rcp_f32
-; SI-DAG: v_mul_f32
-
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_scale_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-; I754: v_div_fixup_f32
-define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float>, <4 x float> addrspace(1) * %in
- %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
- %result = fdiv arcp <4 x float> %a, %b
- store <4 x float> %result, <4 x float> addrspace(1)* %out
- ret void
-}
More information about the llvm-commits
mailing list