<div dir="ltr"><div>I believe this is causing a fatal error. See this bug:</div><a href="https://bugs.llvm.org/show_bug.cgi?id=35631">https://bugs.llvm.org/show_bug.cgi?id=35631</a><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Nov 20, 2017 at 2:33 PM, Fedor Sergeev via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Author: fedor.sergeev<br>
Date: Mon Nov 20 14:33:58 2017<br>
New Revision: 318704<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=318704&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=318704&view=rev</a><br>
Log:<br>
[Sparc] efficient pattern for UINT_TO_FP conversion<br>
<br>
Summary:<br>
while investigating performance degradation of imagick benchmark<br>
there were found inefficient pattern for UINT_TO_FP conversion.<br>
That pattern causes RAW hazard in assembly code. Specifically,<br>
uitofp IR operator results in poor assembler :<br>
<br>
st %i0, [%fp - 952]<br>
ldd [%fp - 952], %f0<br>
<br>
it stores 32-bit integer register into memory location and then<br>
loads 64-bit floating point data from that location.<br>
That is exactly RAW hazard case. To optimize that case it is<br>
possible to use SPISD::ITOF and SPISD::XTOF for conversion from<br>
integer to floating point data type and to use ISD::BITCAST to<br>
copy from integer register into floating point register.<br>
The fix is to write custom UINT_TO_FP pattern using SPISD::ITOF,<br>
SPISD::XTOF, ISD::BITCAST.<br>
<br>
Patch by Alexey Lapshin<br>
<br>
Reviewers: fedor.sergeev, jyknight, dcederman, lero_chris<br>
<br>
Reviewed By: jyknight<br>
<br>
Subscribers: llvm-commits<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D36875" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D36875</a><br>
<br>
Modified:<br>
llvm/trunk/lib/Target/Sparc/<wbr>SparcISelLowering.cpp<br>
llvm/trunk/lib/Target/Sparc/<wbr>SparcISelLowering.h<br>
llvm/trunk/lib/Target/Sparc/<wbr>SparcInstrVIS.td<br>
llvm/trunk/test/CodeGen/SPARC/<wbr>float.ll<br>
<br>
Modified: llvm/trunk/lib/Target/Sparc/<wbr>SparcISelLowering.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp?rev=318704&r1=318703&r2=318704&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>Sparc/SparcISelLowering.cpp?<wbr>rev=318704&r1=318703&r2=<wbr>318704&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/Sparc/<wbr>SparcISelLowering.cpp (original)<br>
+++ llvm/trunk/lib/Target/Sparc/<wbr>SparcISelLowering.cpp Mon Nov 20 14:33:58 2017<br>
@@ -1559,9 +1559,6 @@ SparcTargetLowering::<wbr>SparcTargetLowering<br>
setOperationAction(ISD::FP_TO_<wbr>UINT, MVT::i64, Custom);<br>
setOperationAction(ISD::UINT_<wbr>TO_FP, MVT::i64, Custom);<br>
<br>
- setOperationAction(ISD::<wbr>BITCAST, MVT::f32, Expand);<br>
- setOperationAction(ISD::<wbr>BITCAST, MVT::i32, Expand);<br>
-<br>
// Sparc has no select or setcc: expand to SELECT_CC.<br>
setOperationAction(ISD::<wbr>SELECT, MVT::i32, Expand);<br>
setOperationAction(ISD::<wbr>SELECT, MVT::f32, Expand);<br>
@@ -1590,13 +1587,14 @@ SparcTargetLowering::<wbr>SparcTargetLowering<br>
setOperationAction(ISD::EH_<wbr>SJLJ_SETJMP, MVT::i32, Custom);<br>
setOperationAction(ISD::EH_<wbr>SJLJ_LONGJMP, MVT::Other, Custom);<br>
<br>
+ setOperationAction(ISD::<wbr>BITCAST, MVT::i32, Custom);<br>
+ setOperationAction(ISD::<wbr>BITCAST, MVT::f32, Custom);<br>
+<br>
if (Subtarget->is64Bit()) {<br>
setOperationAction(ISD::ADDC, MVT::i64, Custom);<br>
setOperationAction(ISD::ADDE, MVT::i64, Custom);<br>
setOperationAction(ISD::SUBC, MVT::i64, Custom);<br>
setOperationAction(ISD::SUBE, MVT::i64, Custom);<br>
- setOperationAction(ISD::<wbr>BITCAST, MVT::f64, Expand);<br>
- setOperationAction(ISD::<wbr>BITCAST, MVT::i64, Expand);<br>
setOperationAction(ISD::<wbr>SELECT, MVT::i64, Expand);<br>
setOperationAction(ISD::SETCC, MVT::i64, Expand);<br>
setOperationAction(ISD::BR_CC, MVT::i64, Custom);<br>
@@ -1610,6 +1608,9 @@ SparcTargetLowering::<wbr>SparcTargetLowering<br>
setOperationAction(ISD::ROTL , MVT::i64, Expand);<br>
setOperationAction(ISD::ROTR , MVT::i64, Expand);<br>
setOperationAction(ISD::<wbr>DYNAMIC_STACKALLOC, MVT::i64, Custom);<br>
+<br>
+ setOperationAction(ISD::<wbr>BITCAST, MVT::i64, Custom);<br>
+ setOperationAction(ISD::<wbr>BITCAST, MVT::f64, Custom);<br>
}<br>
<br>
// ATOMICs.<br>
@@ -2425,23 +2426,76 @@ static SDValue LowerFP_TO_UINT(SDValue O<br>
1);<br>
}<br>
<br>
-static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,<br>
- const SparcTargetLowering &TLI,<br>
- bool hasHardQuad) {<br>
+SDValue SparcTargetLowering::<wbr>LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {<br>
+ SDLoc dl(Op);<br>
+ EVT SrcVT = Op.getOperand(0).getValueType(<wbr>);<br>
+<br>
+ EVT DstVT = Op.getValueType();<br>
+<br>
+ if (Subtarget->isVIS3()) {<br>
+ if (DstVT == MVT::f32 && SrcVT == MVT::i32) {<br>
+ return Op; // Legal<br>
+ } else if (DstVT == MVT::f64 && SrcVT == MVT::i64) {<br>
+ return (Subtarget->is64Bit())<br>
+ ? Op<br>
+ : SDValue(); // Legal on 64 bit, otherwise Expand<br>
+ } else if (DstVT == MVT::i64 && SrcVT == MVT::f64) {<br>
+ return (Subtarget->is64Bit())<br>
+ ? Op<br>
+ : SDValue(); // Legal on 64 bit, otherwise Expand<br>
+ }<br>
+ }<br>
+<br>
+ // Expand<br>
+ return SDValue();<br>
+}<br>
+<br>
+SDValue SparcTargetLowering::<wbr>LowerUINT_TO_FP(SDValue Op,<br>
+ SelectionDAG &DAG) const {<br>
SDLoc dl(Op);<br>
EVT OpVT = Op.getOperand(0).getValueType(<wbr>);<br>
assert(OpVT == MVT::i32 || OpVT == MVT::i64);<br>
<br>
- // Expand if it does not involve f128 or the target has support for<br>
- // quad floating point instructions and the operand type is legal.<br>
- if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT)))<br>
- return SDValue();<br>
+ // Expand f128 operations to fp128 ABI calls.<br>
+ if (Op.getValueType() == MVT::f128 &&<br>
+ (!Subtarget->hasHardQuad() || !isTypeLegal(OpVT))) {<br>
+ return LowerF128Op(Op, DAG,<br>
+ getLibcallName(OpVT == MVT::i32<br>
+ ? RTLIB::UINTTOFP_I32_F128<br>
+ : RTLIB::UINTTOFP_I64_F128),<br>
+ 1);<br>
+ }<br>
+<br>
+ // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't<br>
+ // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform<br>
+ // the optimization here.<br>
+ if (DAG.SignBitIsZero(Op.<wbr>getOperand(0))) {<br>
+<br>
+ EVT floatVT = MVT::f32;<br>
+ unsigned IntToFloatOpcode = SPISD::ITOF;<br>
+<br>
+ if (OpVT == MVT::i64) {<br>
+ floatVT = MVT::f64;<br>
+ IntToFloatOpcode = SPISD::XTOF;<br>
+ }<br>
<br>
- return TLI.LowerF128Op(Op, DAG,<br>
- TLI.getLibcallName(OpVT == MVT::i32<br>
- ? RTLIB::UINTTOFP_I32_F128<br>
- : RTLIB::UINTTOFP_I64_F128),<br>
- 1);<br>
+ // Convert the int value to FP in an FP register.<br>
+ SDValue FloatTmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0));<br>
+<br>
+ return DAG.getNode(IntToFloatOpcode, dl, Op.getValueType(), FloatTmp);<br>
+ }<br>
+<br>
+ if (OpVT == MVT::i32 && Subtarget->is64Bit()) {<br>
+<br>
+ SDValue Int64Tmp =<br>
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Op.getOperand(0));<br>
+<br>
+ SDValue Float64Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Int64Tmp);<br>
+<br>
+ return DAG.getNode(SPISD::XTOF, dl, Op.getValueType(), Float64Tmp);<br>
+ }<br>
+<br>
+ return SDValue();<br>
}<br>
<br>
static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,<br>
@@ -3059,8 +3113,7 @@ LowerOperation(SDValue Op, SelectionDAG<br>
hasHardQuad);<br>
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG, *this,<br>
hasHardQuad);<br>
- case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this,<br>
- hasHardQuad);<br>
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);<br>
case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this,<br>
hasHardQuad);<br>
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,<br>
@@ -3097,6 +3150,7 @@ LowerOperation(SDValue Op, SelectionDAG<br>
case ISD::ATOMIC_LOAD:<br>
case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG);<br>
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);<br>
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);<br>
}<br>
}<br>
<br>
<br>
Modified: llvm/trunk/lib/Target/Sparc/<wbr>SparcISelLowering.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcISelLowering.h?rev=318704&r1=318703&r2=318704&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>Sparc/SparcISelLowering.h?rev=<wbr>318704&r1=318703&r2=318704&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/Sparc/<wbr>SparcISelLowering.h (original)<br>
+++ llvm/trunk/lib/Target/Sparc/<wbr>SparcISelLowering.h Mon Nov 20 14:33:58 2017<br>
@@ -192,6 +192,10 @@ namespace llvm {<br>
<br>
SDValue LowerINTRINSIC_WO_CHAIN(<wbr>SDValue Op, SelectionDAG &DAG) const;<br>
<br>
+ SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;<br>
+<br>
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;<br>
+<br>
bool ShouldShrinkFPConstant(EVT VT) const override {<br>
// Do not shrink FP constpool if VT == MVT::f128.<br>
// (ldd, call _Q_fdtoq) is more expensive than two ldds.<br>
<br>
Modified: llvm/trunk/lib/Target/Sparc/<wbr>SparcInstrVIS.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td?rev=318704&r1=318703&r2=318704&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>Sparc/SparcInstrVIS.td?rev=<wbr>318704&r1=318703&r2=318704&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/Sparc/<wbr>SparcInstrVIS.td (original)<br>
+++ llvm/trunk/lib/Target/Sparc/<wbr>SparcInstrVIS.td Mon Nov 20 14:33:58 2017<br>
@@ -243,16 +243,21 @@ def LZCNT : VISInstFormat<0b00001011<br>
(ins I64Regs:$rs2), "lzcnt $rs2, $rd", []>;<br>
<br>
let rs1 = 0 in {<br>
-def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),<br>
- (ins DFPRegs:$rs2), "movstosw $rs2, $rd", []>;<br>
-def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),<br>
- (ins DFPRegs:$rs2), "movstouw $rs2, $rd", []>;<br>
-def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd),<br>
- (ins DFPRegs:$rs2), "movdtox $rs2, $rd", []>;<br>
-def MOVWTOS : VISInstFormat<0b100011001, (outs DFPRegs:$rd),<br>
- (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;<br>
-def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd),<br>
- (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;<br>
+def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd), (ins FPRegs:$rs2),<br>
+ "movstosw $rs2, $rd",<br>
+ [(set I64Regs:$rd, (sext (i32 (bitconvert FPRegs:$rs2))))]>;<br>
+def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd), (ins FPRegs:$rs2),<br>
+ "movstouw $rs2, $rd",<br>
+ [(set I64Regs:$rd, (zext (i32 (bitconvert FPRegs:$rs2))))]>;<br>
+def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd), (ins DFPRegs:$rs2),<br>
+ "movdtox $rs2, $rd",<br>
+ [(set I64Regs:$rd, (bitconvert DFPRegs:$rs2))]>;<br>
+def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs:$rd), (ins IntRegs:$rs2),<br>
+ "movwtos $rs2, $rd",<br>
+ [(set FPRegs:$rd, (bitconvert i32:$rs2))]>;<br>
+def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd), (ins I64Regs:$rs2),<br>
+ "movxtod $rs2, $rd",<br>
+ [(set DFPRegs:$rd, (bitconvert I64Regs:$rs2))]>;<br>
}<br>
<br>
def PDISTN : VISInst<0b000111111, "pdistn">;<br>
<br>
Modified: llvm/trunk/test/CodeGen/SPARC/<wbr>float.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SPARC/float.ll?rev=318704&r1=318703&r2=318704&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/SPARC/float.ll?rev=<wbr>318704&r1=318703&r2=318704&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/SPARC/<wbr>float.ll (original)<br>
+++ llvm/trunk/test/CodeGen/SPARC/<wbr>float.ll Mon Nov 20 14:33:58 2017<br>
@@ -3,6 +3,8 @@<br>
; RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=V8-UNOPT<br>
; RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9<br>
; RUN: llc -mtriple=sparc64-unknown-linux < %s | FileCheck %s -check-prefix=SPARC64<br>
+; RUN: llc -march=sparc -mcpu=niagara4 < %s | FileCheck %s -check-prefix=VIS3<br>
+; RUN: llc -march=sparcv9 -mcpu=niagara4 < %s | FileCheck %s -check-prefix=VIS3-64<br>
<br>
; V8-LABEL: test_neg:<br>
; V8: call get_double<br>
@@ -194,7 +196,7 @@ entry:<br>
; V9: fstoi<br>
<br>
; SPARC64-LABEL: test_utos_stou<br>
-; SPARC64: fdtos<br>
+; SPARC64: fxtos<br>
; SPARC64: fstoi<br>
<br>
define void @test_utos_stou(i32 %a, i32* %ptr0, float* %ptr1) {<br>
@@ -240,6 +242,9 @@ entry:<br>
; SPARC64-NOT: fitod<br>
; SPARC64: fdtoi<br>
<br>
+; VIS3-64-LABEL: test_utod_dtou<br>
+; VIS3-64: movxtod<br>
+<br>
define void @test_utod_dtou(i32 %a, double %b, i32* %ptr0, double* %ptr1) {<br>
entry:<br>
%0 = uitofp i32 %a to double<br>
@@ -248,3 +253,49 @@ entry:<br>
store i32 %1, i32* %ptr0, align 8<br>
ret void<br>
}<br>
+<br>
+; V8-LABEL: test_ustod<br>
+; V8: fitod<br>
+<br>
+; VIS3-LABEL: test_ustod<br>
+; VIS3: movwtos<br>
+<br>
+define double @test_ustod(i16 zeroext) {<br>
+ %2 = uitofp i16 %0 to double<br>
+ ret double %2<br>
+}<br>
+<br>
+; V8-LABEL: test_ustos<br>
+; V8: fitos<br>
+<br>
+; VIS3-LABEL: test_ustos<br>
+; VIS3: movwtos<br>
+<br>
+define float @test_ustos(i16 zeroext) {<br>
+ %2 = uitofp i16 %0 to float<br>
+ ret float %2<br>
+}<br>
+<br>
+; check for movwtos used for bitcast<br>
+;<br>
+; VIS3-LABEL: test_bitcast_utos<br>
+; VIS3:movwtos<br>
+<br>
+define float @test_bitcast_utos(i32 ) {<br>
+ %2 = bitcast i32 %0 to float<br>
+ ret float %2<br>
+}<br>
+<br>
+<br>
+; check for movxtod used for bitcast<br>
+;<br>
+; VIS3-64-LABEL: test_bitcast_uxtod<br>
+; VIS3-64:movxtod<br>
+<br>
+define double @test_bitcast_uxtod(i64 ) {<br>
+ %2 = bitcast i64 %0 to double<br>
+ ret double %2<br>
+}<br>
+<br>
+<br>
+<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>