[llvm] r178006 - PowerPC: Simplify FADD in round-to-zero mode.

Tue Mar 26 07:40:08 PDT 2013

Hi Uli,

Just a note about the sequence (which I realize you just copied).
Saving and restoring the entire FPSCR will lose any effect of the FADD
on the sticky bits of the FPSCR with exceptions masked.  It's very
unlikely ever to make a difference, though, so I'm not sure I'd suggest
changing it.  Just for your awareness.

Bill

On Tue, 2013-03-26 at 10:56 +0000, Ulrich Weigand wrote:
> Author: uweigand
> Date: Tue Mar 26 05:56:22 2013
> New Revision: 178006
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=178006&view=rev
> Log:
> PowerPC: Simplify FADD in round-to-zero mode.
> 
> As part of the the sequence generated to implement long double -> int
> conversions, we need to perform an FADD in round-to-zero mode.  This is
> problematical since the FPSCR is not at all modeled at the SelectionDAG
> level, and thus there is a risk of getting floating point instructions
> generated out of sequence with the instructions to modify FPSCR.
> 
> The current code handles this by somewhat "special" patterns that in part
> have dummy operands, and/or duplicate existing instructions, making them
> awkward to handle in the asm parser.
> 
> This commit changes this by leaving the "FADD in round-to-zero mode"
> as an atomic operation on the SelectionDAG level, and only split it up into
> real instructions at the MI level (via custom inserter).  Since at *this*
> level the FPSCR *is* modeled (via the "RM" hard register), much of the
> "special" stuff can just go away, and the resulting patterns can be used by
> the asm parser.
> 
> No significant change in generated code expected.
> 
> 
> Modified:
>     llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
>     llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
>     llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td
>     llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=178006&r1=178005&r2=178006&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Mar 26 05:56:22 2013
> @@ -584,10 +584,7 @@ const char *PPCTargetLowering::getTarget
>    case PPCISD::STCX:            return "PPCISD::STCX";
>    case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
>    case PPCISD::MFFS:            return "PPCISD::MFFS";
> -  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
> -  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
>    case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
> -  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
>    case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
>    case PPCISD::CR6SET:          return "PPCISD::CR6SET";
>    case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
> @@ -5667,50 +5664,8 @@ void PPCTargetLowering::ReplaceNodeResul
>                               MVT::f64, N->getOperand(0),
>                               DAG.getIntPtrConstant(1));
> 
> -    // This sequence changes FPSCR to do round-to-zero, adds the two halves
> -    // of the long double, and puts FPSCR back the way it was.  We do not
> -    // actually model FPSCR.
> -    std::vector<EVT> NodeTys;
> -    SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
> -
> -    NodeTys.push_back(MVT::f64);   // Return register
> -    NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
> -    Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
> -    MFFSreg = Result.getValue(0);
> -    InFlag = Result.getValue(1);
> -
> -    NodeTys.clear();
> -    NodeTys.push_back(MVT::Glue);   // Returns a flag
> -    Ops[0] = DAG.getConstant(31, MVT::i32);
> -    Ops[1] = InFlag;
> -    Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
> -    InFlag = Result.getValue(0);
> -
> -    NodeTys.clear();
> -    NodeTys.push_back(MVT::Glue);   // Returns a flag
> -    Ops[0] = DAG.getConstant(30, MVT::i32);
> -    Ops[1] = InFlag;
> -    Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
> -    InFlag = Result.getValue(0);
> -
> -    NodeTys.clear();
> -    NodeTys.push_back(MVT::f64);    // result of add
> -    NodeTys.push_back(MVT::Glue);   // Returns a flag
> -    Ops[0] = Lo;
> -    Ops[1] = Hi;
> -    Ops[2] = InFlag;
> -    Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
> -    FPreg = Result.getValue(0);
> -    InFlag = Result.getValue(1);
> -
> -    NodeTys.clear();
> -    NodeTys.push_back(MVT::f64);
> -    Ops[0] = DAG.getConstant(1, MVT::i32);
> -    Ops[1] = MFFSreg;
> -    Ops[2] = FPreg;
> -    Ops[3] = InFlag;
> -    Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
> -    FPreg = Result.getValue(0);
> +    // Add the two halves of the long double in round-to-zero mode.
> +    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
> 
>      // We know the low half is about to be thrown away, so just use something
>      // convenient.
> @@ -6523,6 +6478,30 @@ PPCTargetLowering::EmitInstrWithCustomIn
>      BB = exitMBB;
>      BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
>        .addReg(ShiftReg);
> +  } else if (MI->getOpcode() == PPC::FADDrtz) {
> +    // This pseudo performs an FADD with rounding mode temporarily forced
> +    // to round-to-zero.  We emit this via custom inserter since the FPSCR
> +    // is not modeled at the SelectionDAG level.
> +    unsigned Dest = MI->getOperand(0).getReg();
> +    unsigned Src1 = MI->getOperand(1).getReg();
> +    unsigned Src2 = MI->getOperand(2).getReg();
> +    DebugLoc dl   = MI->getDebugLoc();
> +
> +    MachineRegisterInfo &RegInfo = F->getRegInfo();
> +    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
> +
> +    // Save FPSCR value.
> +    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
> +
> +    // Set rounding mode to round-to-zero.
> +    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
> +    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
> +
> +    // Perform addition.
> +    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
> +
> +    // Restore FPSCR value.
> +    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
>    } else {
>      llvm_unreachable("Unexpected instr type to insert");
>    }
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=178006&r1=178005&r2=178006&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Mar 26 05:56:22 2013
> @@ -142,26 +142,13 @@ namespace llvm {
>        /// an optional input flag argument.
>        COND_BRANCH,
> 
> -      // The following 5 instructions are used only as part of the
> -      // long double-to-int conversion sequence.
> -
> -      /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
> -      /// register.
> -      MFFS,
> -
> -      /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
> -      MTFSB0,
> -
> -      /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
> -      MTFSB1,
> -
> -      /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
> -      /// rounding towards zero.  It has flags added so it won't move past the
> -      /// FPSCR-setting instructions.
> +      /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
> +      /// towards zero.  Used only as part of the long double-to-int
> +      /// conversion sequence.
>        FADDRTZ,
> 
> -      /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
> -      MTFSF,
> +      /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
> +      MFFS,
> 
>        /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
>        /// reserve indexed. This is used to implement atomic operations.
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td?rev=178006&r1=178005&r2=178006&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCInstrFormats.td Tue Mar 26 05:56:22 2013
> @@ -676,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits
>  // This is probably 1.7.9, but I don't have the reference that uses this
>  // numbering scheme...
>  class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
> -                      string cstr, InstrItinClass itin, list<dag>pattern>
> +              InstrItinClass itin, list<dag>pattern>
>    : I<opcode, OOL, IOL, asmstr, itin> {
>    bits<8> FM;
>    bits<5> rT;
> 
>    bit RC = 0;    // set by isDOT
>    let Pattern = pattern;
> -  let Constraints = cstr;
> 
>    let Inst{6} = 0;
>    let Inst{7-14}  = FM;
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=178006&r1=178005&r2=178006&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue Mar 26 05:56:22 2013
> @@ -64,20 +64,13 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ",
>  def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
>                         [SDNPHasChain, SDNPMayStore]>;
> 
> -// This sequence is used for long double->int conversions.  It changes the
> -// bits in the FPSCR which is not modelled.  
> -def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
> -                        [SDNPOutGlue]>;
> -def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
> -                       [SDNPInGlue, SDNPOutGlue]>;
> -def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
> -                       [SDNPInGlue, SDNPOutGlue]>;
> -def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
> -                       [SDNPInGlue, SDNPOutGlue]>;
> -def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
> -                       [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
> -                        SDTCisVT<3, f64>]>,
> -                       [SDNPInGlue]>;
> +// Extract FPSCR (not modeled at the DAG level).
> +def PPCmffs   : SDNode<"PPCISD::MFFS",
> +                       SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
> +
> +// Perform FADD in round-to-zero mode.
> +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
> +
> 
>  def PPCfsel   : SDNode<"PPCISD::FSEL",  
>     // Type constraint for fsel.
> @@ -1288,27 +1281,23 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPR
>                         "mfocrf $rT, $FXM", SprMFCR>,
>              PPC970_DGroup_First, PPC970_Unit_CRU;
> 
> -// Instructions to manipulate FPSCR.  Only long double handling uses these.
> -// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
> +// Pseudo instruction to perform FADD in round-to-zero mode.
> +let usesCustomInserter = 1, Uses = [RM] in {
> +  def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
> +                      [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
> +}
> 
> +// The above pseudo gets expanded to make use of the following instructions
> +// to manipulate FPSCR.  Note that FPSCR is not modeled at the DAG level.
>  let Uses = [RM], Defs = [RM] in { 
>    def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
> -                         "mtfsb0 $FM", IntMTFSB0,
> -                        [(PPCmtfsb0 (i32 imm:$FM))]>,
> +                        "mtfsb0 $FM", IntMTFSB0, []>,
>                 PPC970_DGroup_Single, PPC970_Unit_FPU;
>    def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
> -                         "mtfsb1 $FM", IntMTFSB0,
> -                        [(PPCmtfsb1 (i32 imm:$FM))]>,
> +                        "mtfsb1 $FM", IntMTFSB0, []>,
>                 PPC970_DGroup_Single, PPC970_Unit_FPU;
> -  // MTFSF does not actually produce an FP result.  We pretend it copies
> -  // input reg B to the output.  If we didn't do this it would look like the
> -  // instruction had no outputs (because we aren't modelling the FPSCR) and
> -  // it would be deleted.
> -  def MTFSF  : XFLForm<63, 711, (outs F8RC:$FRA),
> -                                (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
> -                         "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
> -                         [(set f64:$FRA, (PPCmtfsf (i32 imm:$FM),
> -                                                    f64:$rT, f64:$FRB))]>,
> +  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
> +                       "mtfsf $FM, $rT", IntMTFSB0, []>,
>                 PPC970_DGroup_Single, PPC970_Unit_FPU;
>  }
>  let Uses = [RM] in {
> @@ -1316,11 +1305,6 @@ let Uses = [RM] in {
>                           "mffs $rT", IntMFFS,
>                           [(set f64:$rT, (PPCmffs))]>,
>                 PPC970_DGroup_Single, PPC970_Unit_FPU;
> -  def FADDrtz: AForm_2<63, 21,
> -                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
> -                      "fadd $FRT, $FRA, $FRB", FPAddSub,
> -                      [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>,
> -               PPC970_DGroup_Single, PPC970_Unit_FPU;
>  }
> 
> 
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>