[llvm] r318704 - [Sparc] efficient pattern for UINT_TO_FP conversion

Mon Dec 11 14:27:41 PST 2017

Reverted in r320429.

On Mon, Dec 11, 2017 at 1:26 PM, Richard Trieu <rtrieu at google.com> wrote:

> I believe this is causing a fatal error.  See this bug:
> https://bugs.llvm.org/show_bug.cgi?id=35631
>
>
> On Mon, Nov 20, 2017 at 2:33 PM, Fedor Sergeev via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: fedor.sergeev
>> Date: Mon Nov 20 14:33:58 2017
>> New Revision: 318704
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=318704&view=rev
>> Log:
>> [Sparc] efficient pattern for UINT_TO_FP conversion
>>
>> Summary:
>>         while investigating performance degradation of imagick benchmark
>>         there were found inefficient pattern for UINT_TO_FP conversion.
>>         That pattern causes RAW hazard in assembly code. Specifically,
>>         uitofp IR operator results in poor assembler :
>>
>>         st          %i0, [%fp - 952]
>>         ldd         [%fp - 952], %f0
>>
>>         it stores 32-bit integer register into memory location and then
>>         loads 64-bit floating point data from that location.
>>         That is exactly RAW hazard case. To optimize that case it is
>>         possible to use SPISD::ITOF and SPISD::XTOF for conversion from
>>         integer to floating point data type and to use ISD::BITCAST to
>>         copy from integer register into floating point register.
>>         The fix is to write custom UINT_TO_FP pattern using SPISD::ITOF,
>>         SPISD::XTOF, ISD::BITCAST.
>>
>> Patch by Alexey Lapshin
>>
>> Reviewers: fedor.sergeev, jyknight, dcederman, lero_chris
>>
>> Reviewed By: jyknight
>>
>> Subscribers: llvm-commits
>>
>> Differential Revision: https://reviews.llvm.org/D36875
>>
>> Modified:
>>     llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp
>>     llvm/trunk/lib/Target/Sparc/SparcISelLowering.h
>>     llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td
>>     llvm/trunk/test/CodeGen/SPARC/float.ll
>>
>> Modified: llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sp
>> arc/SparcISelLowering.cpp?rev=318704&r1=318703&r2=318704&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp Mon Nov 20
>> 14:33:58 2017
>> @@ -1559,9 +1559,6 @@ SparcTargetLowering::SparcTargetLowering
>>    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
>>    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
>>
>> -  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
>> -  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
>> -
>>    // Sparc has no select or setcc: expand to SELECT_CC.
>>    setOperationAction(ISD::SELECT, MVT::i32, Expand);
>>    setOperationAction(ISD::SELECT, MVT::f32, Expand);
>> @@ -1590,13 +1587,14 @@ SparcTargetLowering::SparcTargetLowering
>>    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
>>    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
>>
>> +  setOperationAction(ISD::BITCAST, MVT::i32, Custom);
>> +  setOperationAction(ISD::BITCAST, MVT::f32, Custom);
>> +
>>    if (Subtarget->is64Bit()) {
>>      setOperationAction(ISD::ADDC, MVT::i64, Custom);
>>      setOperationAction(ISD::ADDE, MVT::i64, Custom);
>>      setOperationAction(ISD::SUBC, MVT::i64, Custom);
>>      setOperationAction(ISD::SUBE, MVT::i64, Custom);
>> -    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
>> -    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
>>      setOperationAction(ISD::SELECT, MVT::i64, Expand);
>>      setOperationAction(ISD::SETCC, MVT::i64, Expand);
>>      setOperationAction(ISD::BR_CC, MVT::i64, Custom);
>> @@ -1610,6 +1608,9 @@ SparcTargetLowering::SparcTargetLowering
>>      setOperationAction(ISD::ROTL , MVT::i64, Expand);
>>      setOperationAction(ISD::ROTR , MVT::i64, Expand);
>>      setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
>> +
>> +    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
>> +    setOperationAction(ISD::BITCAST, MVT::f64, Custom);
>>    }
>>
>>    // ATOMICs.
>> @@ -2425,23 +2426,76 @@ static SDValue LowerFP_TO_UINT(SDValue O
>>                           1);
>>  }
>>
>> -static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
>> -                               const SparcTargetLowering &TLI,
>> -                               bool hasHardQuad) {
>> +SDValue SparcTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG
>> &DAG) const {
>> +  SDLoc dl(Op);
>> +  EVT SrcVT = Op.getOperand(0).getValueType();
>> +
>> +  EVT DstVT = Op.getValueType();
>> +
>> +  if (Subtarget->isVIS3()) {
>> +    if (DstVT == MVT::f32 && SrcVT == MVT::i32) {
>> +      return Op; // Legal
>> +    } else if (DstVT == MVT::f64 && SrcVT == MVT::i64) {
>> +      return (Subtarget->is64Bit())
>> +                 ? Op
>> +                 : SDValue(); // Legal on 64 bit, otherwise Expand
>> +    } else if (DstVT == MVT::i64 && SrcVT == MVT::f64) {
>> +      return (Subtarget->is64Bit())
>> +                 ? Op
>> +                 : SDValue(); // Legal on 64 bit, otherwise Expand
>> +    }
>> +  }
>> +
>> +  // Expand
>> +  return SDValue();
>> +}
>> +
>> +SDValue SparcTargetLowering::LowerUINT_TO_FP(SDValue Op,
>> +                                             SelectionDAG &DAG) const {
>>    SDLoc dl(Op);
>>    EVT OpVT = Op.getOperand(0).getValueType();
>>    assert(OpVT == MVT::i32 || OpVT == MVT::i64);
>>
>> -  // Expand if it does not involve f128 or the target has support for
>> -  // quad floating point instructions and the operand type is legal.
>> -  if (Op.getValueType() != MVT::f128 || (hasHardQuad &&
>> TLI.isTypeLegal(OpVT)))
>> -    return SDValue();
>> +  // Expand f128 operations to fp128 ABI calls.
>> +  if (Op.getValueType() == MVT::f128 &&
>> +      (!Subtarget->hasHardQuad() || !isTypeLegal(OpVT))) {
>> +    return LowerF128Op(Op, DAG,
>> +                       getLibcallName(OpVT == MVT::i32
>> +                                          ? RTLIB::UINTTOFP_I32_F128
>> +                                          : RTLIB::UINTTOFP_I64_F128),
>> +                       1);
>> +  }
>> +
>> +  // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
>> +  // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
>> +  // the optimization here.
>> +  if (DAG.SignBitIsZero(Op.getOperand(0))) {
>> +
>> +    EVT floatVT = MVT::f32;
>> +    unsigned IntToFloatOpcode = SPISD::ITOF;
>> +
>> +    if (OpVT == MVT::i64) {
>> +      floatVT = MVT::f64;
>> +      IntToFloatOpcode = SPISD::XTOF;
>> +    }
>>
>> -  return TLI.LowerF128Op(Op, DAG,
>> -                         TLI.getLibcallName(OpVT == MVT::i32
>> -                                            ? RTLIB::UINTTOFP_I32_F128
>> -                                            : RTLIB::UINTTOFP_I64_F128),
>> -                         1);
>> +    // Convert the int value to FP in an FP register.
>> +    SDValue FloatTmp = DAG.getNode(ISD::BITCAST, dl, floatVT,
>> Op.getOperand(0));
>> +
>> +    return DAG.getNode(IntToFloatOpcode, dl, Op.getValueType(),
>> FloatTmp);
>> +  }
>> +
>> +  if (OpVT == MVT::i32 && Subtarget->is64Bit()) {
>> +
>> +    SDValue Int64Tmp =
>> +        DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Op.getOperand(0));
>> +
>> +    SDValue Float64Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f64,
>> Int64Tmp);
>> +
>> +    return DAG.getNode(SPISD::XTOF, dl, Op.getValueType(), Float64Tmp);
>> +  }
>> +
>> +  return SDValue();
>>  }
>>
>>  static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
>> @@ -3059,8 +3113,7 @@ LowerOperation(SDValue Op, SelectionDAG
>>                                                         hasHardQuad);
>>    case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG, *this,
>>                                                         hasHardQuad);
>> -  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG, *this,
>> -                                                       hasHardQuad);
>> +  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
>>    case ISD::BR_CC:              return LowerBR_CC(Op, DAG, *this,
>>                                                    hasHardQuad);
>>    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG, *this,
>> @@ -3097,6 +3150,7 @@ LowerOperation(SDValue Op, SelectionDAG
>>    case ISD::ATOMIC_LOAD:
>>    case ISD::ATOMIC_STORE:       return LowerATOMIC_LOAD_STORE(Op, DAG);
>>    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
>> +  case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
>>    }
>>  }
>>
>>
>> Modified: llvm/trunk/lib/Target/Sparc/SparcISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sp
>> arc/SparcISelLowering.h?rev=318704&r1=318703&r2=318704&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.h Mon Nov 20 14:33:58
>> 2017
>> @@ -192,6 +192,10 @@ namespace llvm {
>>
>>      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
>> const;
>>
>> +    SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
>> +
>> +    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
>> +
>>      bool ShouldShrinkFPConstant(EVT VT) const override {
>>        // Do not shrink FP constpool if VT == MVT::f128.
>>        // (ldd, call _Q_fdtoq) is more expensive than two ldds.
>>
>> Modified: llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sp
>> arc/SparcInstrVIS.td?rev=318704&r1=318703&r2=318704&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td (original)
>> +++ llvm/trunk/lib/Target/Sparc/SparcInstrVIS.td Mon Nov 20 14:33:58 2017
>> @@ -243,16 +243,21 @@ def LZCNT     : VISInstFormat<0b00001011
>>                     (ins I64Regs:$rs2), "lzcnt $rs2, $rd", []>;
>>
>>  let rs1 = 0 in {
>> -def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
>> -                   (ins DFPRegs:$rs2), "movstosw $rs2, $rd", []>;
>> -def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
>> -                   (ins DFPRegs:$rs2), "movstouw $rs2, $rd", []>;
>> -def MOVDTOX  : VISInstFormat<0b100010000, (outs I64Regs:$rd),
>> -                   (ins DFPRegs:$rs2), "movdtox $rs2, $rd", []>;
>> -def MOVWTOS  :  VISInstFormat<0b100011001, (outs DFPRegs:$rd),
>> -                   (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
>> -def MOVXTOD  :  VISInstFormat<0b100011000, (outs DFPRegs:$rd),
>> -                   (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>;
>> +def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd), (ins
>> FPRegs:$rs2),
>> +                   "movstosw $rs2, $rd",
>> +                   [(set I64Regs:$rd, (sext (i32 (bitconvert
>> FPRegs:$rs2))))]>;
>> +def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd), (ins
>> FPRegs:$rs2),
>> +                   "movstouw $rs2, $rd",
>> +                   [(set I64Regs:$rd, (zext (i32 (bitconvert
>> FPRegs:$rs2))))]>;
>> +def MOVDTOX  : VISInstFormat<0b100010000, (outs I64Regs:$rd), (ins
>> DFPRegs:$rs2),
>> +                   "movdtox $rs2, $rd",
>> +                   [(set I64Regs:$rd, (bitconvert DFPRegs:$rs2))]>;
>> +def MOVWTOS  :  VISInstFormat<0b100011001, (outs FPRegs:$rd), (ins
>> IntRegs:$rs2),
>> +                   "movwtos $rs2, $rd",
>> +                   [(set FPRegs:$rd, (bitconvert i32:$rs2))]>;
>> +def MOVXTOD  :  VISInstFormat<0b100011000, (outs DFPRegs:$rd), (ins
>> I64Regs:$rs2),
>> +                   "movxtod $rs2, $rd",
>> +                   [(set DFPRegs:$rd, (bitconvert I64Regs:$rs2))]>;
>>  }
>>
>>  def PDISTN   : VISInst<0b000111111, "pdistn">;
>>
>> Modified: llvm/trunk/test/CodeGen/SPARC/float.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> SPARC/float.ll?rev=318704&r1=318703&r2=318704&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/SPARC/float.ll (original)
>> +++ llvm/trunk/test/CodeGen/SPARC/float.ll Mon Nov 20 14:33:58 2017
>> @@ -3,6 +3,8 @@
>>  ; RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=V8-UNOPT
>>  ; RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
>>  ; RUN: llc -mtriple=sparc64-unknown-linux < %s | FileCheck %s
>> -check-prefix=SPARC64
>> +; RUN: llc -march=sparc -mcpu=niagara4 < %s  | FileCheck %s
>> -check-prefix=VIS3
>> +; RUN: llc -march=sparcv9 -mcpu=niagara4 < %s | FileCheck %s
>> -check-prefix=VIS3-64
>>
>>  ; V8-LABEL:     test_neg:
>>  ; V8:     call get_double
>> @@ -194,7 +196,7 @@ entry:
>>  ; V9:          fstoi
>>
>>  ; SPARC64-LABEL:    test_utos_stou
>> -; SPARC64:     fdtos
>> +; SPARC64:     fxtos
>>  ; SPARC64:     fstoi
>>
>>  define void @test_utos_stou(i32 %a, i32* %ptr0, float* %ptr1) {
>> @@ -240,6 +242,9 @@ entry:
>>  ; SPARC64-NOT:      fitod
>>  ; SPARC64:          fdtoi
>>
>> +; VIS3-64-LABEL:  test_utod_dtou
>> +; VIS3-64:        movxtod
>> +
>>  define void @test_utod_dtou(i32 %a, double %b, i32* %ptr0, double*
>> %ptr1) {
>>  entry:
>>    %0 = uitofp i32 %a to double
>> @@ -248,3 +253,49 @@ entry:
>>    store i32 %1, i32* %ptr0, align 8
>>    ret void
>>  }
>> +
>> +; V8-LABEL:    test_ustod
>> +; V8:          fitod
>> +
>> +; VIS3-LABEL:  test_ustod
>> +; VIS3:        movwtos
>> +
>> +define double @test_ustod(i16 zeroext) {
>> +  %2 = uitofp i16 %0 to double
>> +  ret double %2
>> +}
>> +
>> +; V8-LABEL:    test_ustos
>> +; V8:          fitos
>> +
>> +; VIS3-LABEL:  test_ustos
>> +; VIS3:        movwtos
>> +
>> +define float @test_ustos(i16 zeroext) {
>> +  %2 = uitofp i16 %0 to float
>> +  ret float %2
>> +}
>> +
>> +; check for movwtos used for bitcast
>> +;
>> +; VIS3-LABEL:  test_bitcast_utos
>> +; VIS3:movwtos
>> +
>> +define float @test_bitcast_utos(i32 ) {
>> +  %2 = bitcast i32 %0 to float
>> +  ret float %2
>> +}
>> +
>> +
>> +; check for movxtod used for bitcast
>> +;
>> +; VIS3-64-LABEL:  test_bitcast_uxtod
>> +; VIS3-64:movxtod
>> +
>> +define double @test_bitcast_uxtod(i64 ) {
>> +  %2 = bitcast i64 %0 to double
>> +  ret double %2
>> +}
>> +
>> +
>> +
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171211/3dc3e117/attachment-0001.html>