[llvm] r318704 - [Sparc] efficient pattern for UINT_TO_FP conversion

Mon Dec 11 13:26:19 PST 2017

I believe this is causing a fatal error.  See this bug:
https://bugs.llvm.org/show_bug.cgi?id=35631

On Mon, Nov 20, 2017 at 2:33 PM, Fedor Sergeev via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: fedor.sergeev
> Date: Mon Nov 20 14:33:58 2017
> New Revision: 318704
>
> URL: http://llvm.org/viewvc/llvm-project?rev=318704&view=rev
> Log:
> [Sparc] efficient pattern for UINT_TO_FP conversion
>
> Summary:
>         while investigating performance degradation of imagick benchmark
>         there were found inefficient pattern for UINT_TO_FP conversion.
>         That pattern causes RAW hazard in assembly code. Specifically,
>         uitofp IR operator results in poor assembler :
>
>         st          %i0, [%fp - 952]
>         ldd         [%fp - 952], %f0
>
>         it stores 32-bit integer register into memory location and then
>         loads 64-bit floating point data from that location.
>         That is exactly RAW hazard case. To optimize that case it is
>         possible to use SPISD::ITOF and SPISD::XTOF for conversion from
>         integer to floating point data type and to use ISD::BITCAST to
>         copy from integer register into floating point register.
>         The fix is to write custom UINT_TO_FP pattern using SPISD::ITOF,
>         SPISD::XTOF, ISD::BITCAST.
>
> Patch by Alexey Lapshin
>
> Reviewers: fedor.sergeev, jyknight, dcederman, lero_chris
>
> Reviewed By: jyknight
>
> Subscribers: llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D36875
>
> Modified:
>     llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp
>     llvm/trunk/lib/Target/Sparc/SparcISelLowering.h
>     llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td
>     llvm/trunk/test/CodeGen/SPARC/float.ll
>
> Modified: llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> Sparc/SparcISelLowering.cpp?rev=318704&r1=318703&r2=318704&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp Mon Nov 20 14:33:58
> 2017
> @@ -1559,9 +1559,6 @@ SparcTargetLowering::SparcTargetLowering
>    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
>    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
>
> -  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
> -  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
> -
>    // Sparc has no select or setcc: expand to SELECT_CC.
>    setOperationAction(ISD::SELECT, MVT::i32, Expand);
>    setOperationAction(ISD::SELECT, MVT::f32, Expand);
> @@ -1590,13 +1587,14 @@ SparcTargetLowering::SparcTargetLowering
>    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
>    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
>
> +  setOperationAction(ISD::BITCAST, MVT::i32, Custom);
> +  setOperationAction(ISD::BITCAST, MVT::f32, Custom);
> +
>    if (Subtarget->is64Bit()) {
>      setOperationAction(ISD::ADDC, MVT::i64, Custom);
>      setOperationAction(ISD::ADDE, MVT::i64, Custom);
>      setOperationAction(ISD::SUBC, MVT::i64, Custom);
>      setOperationAction(ISD::SUBE, MVT::i64, Custom);
> -    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
> -    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
>      setOperationAction(ISD::SELECT, MVT::i64, Expand);
>      setOperationAction(ISD::SETCC, MVT::i64, Expand);
>      setOperationAction(ISD::BR_CC, MVT::i64, Custom);
> @@ -1610,6 +1608,9 @@ SparcTargetLowering::SparcTargetLowering
>      setOperationAction(ISD::ROTL , MVT::i64, Expand);
>      setOperationAction(ISD::ROTR , MVT::i64, Expand);
>      setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
> +
> +    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
> +    setOperationAction(ISD::BITCAST, MVT::f64, Custom);
>    }
>
>    // ATOMICs.
> @@ -2425,23 +2426,76 @@ static SDValue LowerFP_TO_UINT(SDValue O
>                           1);
>  }
>
> -static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
> -                               const SparcTargetLowering &TLI,
> -                               bool hasHardQuad) {
> +SDValue SparcTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG)
> const {
> +  SDLoc dl(Op);
> +  EVT SrcVT = Op.getOperand(0).getValueType();
> +
> +  EVT DstVT = Op.getValueType();
> +
> +  if (Subtarget->isVIS3()) {
> +    if (DstVT == MVT::f32 && SrcVT == MVT::i32) {
> +      return Op; // Legal
> +    } else if (DstVT == MVT::f64 && SrcVT == MVT::i64) {
> +      return (Subtarget->is64Bit())
> +                 ? Op
> +                 : SDValue(); // Legal on 64 bit, otherwise Expand
> +    } else if (DstVT == MVT::i64 && SrcVT == MVT::f64) {
> +      return (Subtarget->is64Bit())
> +                 ? Op
> +                 : SDValue(); // Legal on 64 bit, otherwise Expand
> +    }
> +  }
> +
> +  // Expand
> +  return SDValue();
> +}
> +
> +SDValue SparcTargetLowering::LowerUINT_TO_FP(SDValue Op,
> +                                             SelectionDAG &DAG) const {
>    SDLoc dl(Op);
>    EVT OpVT = Op.getOperand(0).getValueType();
>    assert(OpVT == MVT::i32 || OpVT == MVT::i64);
>
> -  // Expand if it does not involve f128 or the target has support for
> -  // quad floating point instructions and the operand type is legal.
> -  if (Op.getValueType() != MVT::f128 || (hasHardQuad &&
> TLI.isTypeLegal(OpVT)))
> -    return SDValue();
> +  // Expand f128 operations to fp128 ABI calls.
> +  if (Op.getValueType() == MVT::f128 &&
> +      (!Subtarget->hasHardQuad() || !isTypeLegal(OpVT))) {
> +    return LowerF128Op(Op, DAG,
> +                       getLibcallName(OpVT == MVT::i32
> +                                          ? RTLIB::UINTTOFP_I32_F128
> +                                          : RTLIB::UINTTOFP_I64_F128),
> +                       1);
> +  }
> +
> +  // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
> +  // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
> +  // the optimization here.
> +  if (DAG.SignBitIsZero(Op.getOperand(0))) {
> +
> +    EVT floatVT = MVT::f32;
> +    unsigned IntToFloatOpcode = SPISD::ITOF;
> +
> +    if (OpVT == MVT::i64) {
> +      floatVT = MVT::f64;
> +      IntToFloatOpcode = SPISD::XTOF;
> +    }
>
> -  return TLI.LowerF128Op(Op, DAG,
> -                         TLI.getLibcallName(OpVT == MVT::i32
> -                                            ? RTLIB::UINTTOFP_I32_F128
> -                                            : RTLIB::UINTTOFP_I64_F128),
> -                         1);
> +    // Convert the int value to FP in an FP register.
> +    SDValue FloatTmp = DAG.getNode(ISD::BITCAST, dl, floatVT,
> Op.getOperand(0));
> +
> +    return DAG.getNode(IntToFloatOpcode, dl, Op.getValueType(), FloatTmp);
> +  }
> +
> +  if (OpVT == MVT::i32 && Subtarget->is64Bit()) {
> +
> +    SDValue Int64Tmp =
> +        DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Op.getOperand(0));
> +
> +    SDValue Float64Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f64,
> Int64Tmp);
> +
> +    return DAG.getNode(SPISD::XTOF, dl, Op.getValueType(), Float64Tmp);
> +  }
> +
> +  return SDValue();
>  }
>
>  static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
> @@ -3059,8 +3113,7 @@ LowerOperation(SDValue Op, SelectionDAG
>                                                         hasHardQuad);
>    case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG, *this,
>                                                         hasHardQuad);
> -  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG, *this,
> -                                                       hasHardQuad);
> +  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
>    case ISD::BR_CC:              return LowerBR_CC(Op, DAG, *this,
>                                                    hasHardQuad);
>    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG, *this,
> @@ -3097,6 +3150,7 @@ LowerOperation(SDValue Op, SelectionDAG
>    case ISD::ATOMIC_LOAD:
>    case ISD::ATOMIC_STORE:       return LowerATOMIC_LOAD_STORE(Op, DAG);
>    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
> +  case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
>    }
>  }
>
>
> Modified: llvm/trunk/lib/Target/Sparc/SparcISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> Sparc/SparcISelLowering.h?rev=318704&r1=318703&r2=318704&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.h (original)
> +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.h Mon Nov 20 14:33:58
> 2017
> @@ -192,6 +192,10 @@ namespace llvm {
>
>      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
>
> +    SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
> +
> +    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
> +
>      bool ShouldShrinkFPConstant(EVT VT) const override {
>        // Do not shrink FP constpool if VT == MVT::f128.
>        // (ldd, call _Q_fdtoq) is more expensive than two ldds.
>
> Modified: llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> Sparc/SparcInstrVIS.td?rev=318704&r1=318703&r2=318704&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td (original)
> +++ llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td Mon Nov 20 14:33:58 2017
> @@ -243,16 +243,21 @@ def LZCNT     : VISInstFormat<0b00001011
>                     (ins I64Regs:$rs2), "lzcnt $rs2, $rd", []>;
>
>  let rs1 = 0 in {
> -def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
> -                   (ins DFPRegs:$rs2), "movstosw $rs2, $rd", []>;
> -def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
> -                   (ins DFPRegs:$rs2), "movstouw $rs2, $rd", []>;
> -def MOVDTOX  : VISInstFormat<0b100010000, (outs I64Regs:$rd),
> -                   (ins DFPRegs:$rs2), "movdtox $rs2, $rd", []>;
> -def MOVWTOS  :  VISInstFormat<0b100011001, (outs DFPRegs:$rd),
> -                   (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
> -def MOVXTOD  :  VISInstFormat<0b100011000, (outs DFPRegs:$rd),
> -                   (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
> +def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd), (ins
> FPRegs:$rs2),
> +                   "movstosw $rs2, $rd",
> +                   [(set I64Regs:$rd, (sext (i32 (bitconvert
> FPRegs:$rs2))))]>;
> +def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd), (ins
> FPRegs:$rs2),
> +                   "movstouw $rs2, $rd",
> +                   [(set I64Regs:$rd, (zext (i32 (bitconvert
> FPRegs:$rs2))))]>;
> +def MOVDTOX  : VISInstFormat<0b100010000, (outs I64Regs:$rd), (ins
> DFPRegs:$rs2),
> +                   "movdtox $rs2, $rd",
> +                   [(set I64Regs:$rd, (bitconvert DFPRegs:$rs2))]>;
> +def MOVWTOS  :  VISInstFormat<0b100011001, (outs FPRegs:$rd), (ins
> IntRegs:$rs2),
> +                   "movwtos $rs2, $rd",
> +                   [(set FPRegs:$rd, (bitconvert i32:$rs2))]>;
> +def MOVXTOD  :  VISInstFormat<0b100011000, (outs DFPRegs:$rd), (ins
> I64Regs:$rs2),
> +                   "movxtod $rs2, $rd",
> +                   [(set DFPRegs:$rd, (bitconvert I64Regs:$rs2))]>;
>  }
>
>  def PDISTN   : VISInst<0b000111111, "pdistn">;
>
> Modified: llvm/trunk/test/CodeGen/SPARC/float.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/SPARC/float.ll?rev=318704&r1=318703&r2=318704&view=diff
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/SPARC/float.ll (original)
> +++ llvm/trunk/test/CodeGen/SPARC/float.ll Mon Nov 20 14:33:58 2017
> @@ -3,6 +3,8 @@
>  ; RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=V8-UNOPT
>  ; RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
>  ; RUN: llc -mtriple=sparc64-unknown-linux < %s | FileCheck %s
> -check-prefix=SPARC64
> +; RUN: llc -march=sparc -mcpu=niagara4 < %s  | FileCheck %s
> -check-prefix=VIS3
> +; RUN: llc -march=sparcv9 -mcpu=niagara4 < %s | FileCheck %s
> -check-prefix=VIS3-64
>
>  ; V8-LABEL:     test_neg:
>  ; V8:     call get_double
> @@ -194,7 +196,7 @@ entry:
>  ; V9:          fstoi
>
>  ; SPARC64-LABEL:    test_utos_stou
> -; SPARC64:     fdtos
> +; SPARC64:     fxtos
>  ; SPARC64:     fstoi
>
>  define void @test_utos_stou(i32 %a, i32* %ptr0, float* %ptr1) {
> @@ -240,6 +242,9 @@ entry:
>  ; SPARC64-NOT:      fitod
>  ; SPARC64:          fdtoi
>
> +; VIS3-64-LABEL:  test_utod_dtou
> +; VIS3-64:        movxtod
> +
>  define void @test_utod_dtou(i32 %a, double %b, i32* %ptr0, double* %ptr1)
> {
>  entry:
>    %0 = uitofp i32 %a to double
> @@ -248,3 +253,49 @@ entry:
>    store i32 %1, i32* %ptr0, align 8
>    ret void
>  }
> +
> +; V8-LABEL:    test_ustod
> +; V8:          fitod
> +
> +; VIS3-LABEL:  test_ustod
> +; VIS3:        movwtos
> +
> +define double @test_ustod(i16 zeroext) {
> +  %2 = uitofp i16 %0 to double
> +  ret double %2
> +}
> +
> +; V8-LABEL:    test_ustos
> +; V8:          fitos
> +
> +; VIS3-LABEL:  test_ustos
> +; VIS3:        movwtos
> +
> +define float @test_ustos(i16 zeroext) {
> +  %2 = uitofp i16 %0 to float
> +  ret float %2
> +}
> +
> +; check for movwtos used for bitcast
> +;
> +; VIS3-LABEL:  test_bitcast_utos
> +; VIS3:movwtos
> +
> +define float @test_bitcast_utos(i32 ) {
> +  %2 = bitcast i32 %0 to float
> +  ret float %2
> +}
> +
> +
> +; check for movxtod used for bitcast
> +;
> +; VIS3-64-LABEL:  test_bitcast_uxtod
> +; VIS3-64:movxtod
> +
> +define double @test_bitcast_uxtod(i64 ) {
> +  %2 = bitcast i64 %0 to double
> +  ret double %2
> +}
> +
> +
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171211/fc12d0e7/attachment.html>