[llvm] 705271d - [PowerPC] Expand constrained ppc_fp128 to i32 conversion

Fri Sep 4 22:42:35 PDT 2020

Author: Qiu Chaofan
Date: 2020-09-05T13:16:20+08:00
New Revision: 705271d9cd0e981b2df41cf2802880dcd5925281

URL: https://github.com/llvm/llvm-project/commit/705271d9cd0e981b2df41cf2802880dcd5925281
DIFF: https://github.com/llvm/llvm-project/commit/705271d9cd0e981b2df41cf2802880dcd5925281.diff

LOG: [PowerPC] Expand constrained ppc_fp128 to i32 conversion

Libcall __gcc_qtou is not available, which breaks some tests needing
it. On PowerPC, we have code to manually expand the operation, this
patch applies it to constrained conversion. To keep it strict-safe,
it's using the algorithm similar to expandFP_TO_UINT.

For constrained operations marking FP exception behavior as 'ignore',
we should set the NoFPExcept flag. However, in some custom lowering
the flag is missed. This should be fixed by future patches.

Reviewed By: uweigand

Differential Revision: https://reviews.llvm.org/D86605

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
    llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4fe29f7f2994..b213abb57aa8 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -262,6 +262,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   // PPC (the libcall is not available).
   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
+  setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
 
   // We do not currently implement these libm ops for PowerPC.
   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
@@ -1505,6 +1507,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
+  case PPCISD::STRICT_FADDRTZ:
+    return "PPCISD::STRICT_FADDRTZ";
   case PPCISD::STRICT_FCTIDZ:
     return "PPCISD::STRICT_FCTIDZ";
   case PPCISD::STRICT_FCTIWZ:
@@ -8164,38 +8168,86 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+  EVT SrcVT = Src.getValueType();
+  EVT DstVT = Op.getValueType();
+
   // FP to INT conversions are legal for f128.
-  if (Src.getValueType() == MVT::f128)
+  if (SrcVT == MVT::f128)
     return Op;
 
   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
   // PPC (the libcall is not available).
-  if (Src.getValueType() == MVT::ppcf128 && !IsStrict) {
-    if (Op.getValueType() == MVT::i32) {
+  if (SrcVT == MVT::ppcf128) {
+    if (DstVT == MVT::i32) {
+      // TODO: Conservatively pass only nofpexcept flag here. Need to check and
+      // set other fast-math flags to FP operations in both strict and
+      // non-strict cases. (FP_TO_SINT, FSUB)
+      SDNodeFlags Flags;
+      Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
       if (IsSigned) {
         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
                                  DAG.getIntPtrConstant(0, dl));
         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
                                  DAG.getIntPtrConstant(1, dl));
 
-        // Add the two halves of the long double in round-to-zero mode.
-        SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
-
-        // Now use a smaller FP_TO_SINT.
-        return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+        // Add the two halves of the long double in round-to-zero mode, and use
+        // a smaller FP_TO_SINT.
+        if (IsStrict) {
+          SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
+                                    DAG.getVTList(MVT::f64, MVT::Other),
+                                    {Op.getOperand(0), Lo, Hi}, Flags);
+          return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
+                             DAG.getVTList(MVT::i32, MVT::Other),
+                             {Res.getValue(1), Res}, Flags);
+        } else {
+          SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
+          return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+        }
       } else {
         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
-        SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
-        //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
-        // FIXME: generated code sucks.
-        // TODO: Are there fast-math-flags to propagate to this FSUB?
-        SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp);
-        True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
-        True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
-                           DAG.getConstant(0x80000000, dl, MVT::i32));
-        SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
-        return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE);
+        SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+        SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
+        if (IsStrict) {
+          // Sel = Src < 0x80000000
+          // FltOfs = select Sel, 0.0, 0x80000000
+          // IntOfs = select Sel, 0, 0x80000000
+          // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
+          SDValue Chain = Op.getOperand(0);
+          EVT SetCCVT =
+              getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+          EVT DstSetCCVT =
+              getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
+          SDValue Sel =
+              DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true);
+          Chain = Sel.getValue(1);
+
+          SDValue FltOfs = DAG.getSelect(
+              dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+          Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+
+          SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
+                                    DAG.getVTList(SrcVT, MVT::Other),
+                                    {Chain, Src, FltOfs}, Flags);
+          Chain = Val.getValue(1);
+          SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
+                                     DAG.getVTList(DstVT, MVT::Other),
+                                     {Chain, Val}, Flags);
+          Chain = SInt.getValue(1);
+          SDValue IntOfs = DAG.getSelect(
+              dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
+          SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
+          return DAG.getMergeValues({Result, Chain}, dl);
+        } else {
+          // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+          // FIXME: generated code sucks.
+          SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
+          True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
+          True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
+          SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
+          return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
+        }
       }
     }
 
@@ -12170,7 +12222,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
         .addReg(PPC::RM, RegState::ImplicitDefine);
 
     // Perform addition.
-    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+    auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
+                   .addReg(Src1)
+                   .addReg(Src2);
+    if (MI.getFlag(MachineInstr::NoFPExcept))
+      MIB.setMIFlag(MachineInstr::NoFPExcept);
 
     // Restore FPSCR value.
     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index f51cd2823fcd..05c9a5d31413 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -453,6 +453,9 @@ namespace llvm {
     STRICT_FCFIDS,
     STRICT_FCFIDUS,
 
+    /// Constrained floating point add in round-to-zero mode.
+    STRICT_FADDRTZ,
+
     /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
     /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
     /// the GPRC input, then stores it through Ptr.  Type can be either i16 or

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 04ecb72a5ccd..a6932005d5ad 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -182,7 +182,12 @@ def PPCmffs   : SDNode<"PPCISD::MFFS",
 
 // Perform FADD in round-to-zero mode.
 def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp,
+                              [SDNPHasChain]>;
 
+def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs),
+                             [(PPCfaddrtz node:$lhs, node:$rhs),
+                              (PPCstrict_faddrtz node:$lhs, node:$rhs)]>;
 
 def PPCfsel   : SDNode<"PPCISD::FSEL",
    // Type constraint for fsel.
@@ -2960,9 +2965,9 @@ def : InstAlias<"mtcr $rA", (MTCRF 255, gprc:$rA)>;
 
 let Predicates = [HasFPU] in {
 // Custom inserter instruction to perform FADD in round-to-zero mode.
-let Uses = [RM] in {
+let Uses = [RM], mayRaiseFPException = 1 in {
   def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
-                      [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+                      [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>;
 }
 
 // The above pseudo gets expanded to make use of the following instructions

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
index d8ef98c149f6..b4927f3da063 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
@@ -403,47 +403,39 @@ entry:
 define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 {
 ; P8-LABEL: ppcq_to_i32:
 ; P8:       # %bb.0: # %entry
-; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
-; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    .cfi_def_cfa_offset 112
-; P8-NEXT:    .cfi_offset lr, 16
-; P8-NEXT:    bl __gcc_qtou
-; P8-NEXT:    nop
+; P8-NEXT:    mffs f0
+; P8-NEXT:    mtfsb1 31
+; P8-NEXT:    mtfsb0 30
+; P8-NEXT:    fadd f1, f2, f1
+; P8-NEXT:    mtfsf 1, f0
+; P8-NEXT:    xscvdpsxws f0, f1
+; P8-NEXT:    mffprwz r3, f0
 ; P8-NEXT:    extsw r3, r3
-; P8-NEXT:    addi r1, r1, 112
-; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    mtlr r0
 ; P8-NEXT:    blr
 ;
 ; P9-LABEL: ppcq_to_i32:
 ; P9:       # %bb.0: # %entry
-; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
-; P9-NEXT:    stdu r1, -32(r1)
-; P9-NEXT:    .cfi_def_cfa_offset 32
-; P9-NEXT:    .cfi_offset lr, 16
-; P9-NEXT:    bl __gcc_qtou
-; P9-NEXT:    nop
+; P9-NEXT:    mffs f0
+; P9-NEXT:    mtfsb1 31
+; P9-NEXT:    mtfsb0 30
+; P9-NEXT:    fadd f1, f2, f1
+; P9-NEXT:    mtfsf 1, f0
+; P9-NEXT:    xscvdpsxws f0, f1
+; P9-NEXT:    mffprwz r3, f0
 ; P9-NEXT:    extsw r3, r3
-; P9-NEXT:    addi r1, r1, 32
-; P9-NEXT:    ld r0, 16(r1)
-; P9-NEXT:    mtlr r0
 ; P9-NEXT:    blr
 ;
 ; NOVSX-LABEL: ppcq_to_i32:
 ; NOVSX:       # %bb.0: # %entry
-; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
-; NOVSX-NEXT:    stdu r1, -32(r1)
-; NOVSX-NEXT:    .cfi_def_cfa_offset 32
-; NOVSX-NEXT:    .cfi_offset lr, 16
-; NOVSX-NEXT:    bl __gcc_qtou
-; NOVSX-NEXT:    nop
-; NOVSX-NEXT:    extsw r3, r3
-; NOVSX-NEXT:    addi r1, r1, 32
-; NOVSX-NEXT:    ld r0, 16(r1)
-; NOVSX-NEXT:    mtlr r0
+; NOVSX-NEXT:    mffs f0
+; NOVSX-NEXT:    mtfsb1 31
+; NOVSX-NEXT:    addi r3, r1, -4
+; NOVSX-NEXT:    mtfsb0 30
+; NOVSX-NEXT:    fadd f1, f2, f1
+; NOVSX-NEXT:    mtfsf 1, f0
+; NOVSX-NEXT:    fctiwz f0, f1
+; NOVSX-NEXT:    stfiwx f0, 0, r3
+; NOVSX-NEXT:    lwa r3, -4(r1)
 ; NOVSX-NEXT:    blr
 entry:
   %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0
@@ -549,12 +541,40 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    std r0, 16(r1)
-; P8-NEXT:    stdu r1, -112(r1)
-; P8-NEXT:    .cfi_def_cfa_offset 112
+; P8-NEXT:    stdu r1, -128(r1)
+; P8-NEXT:    .cfi_def_cfa_offset 128
 ; P8-NEXT:    .cfi_offset lr, 16
-; P8-NEXT:    bl __fixunstfsi
+; P8-NEXT:    .cfi_offset r30, -16
+; P8-NEXT:    addis r3, r2, .LCPI11_0 at toc@ha
+; P8-NEXT:    xxlxor f3, f3, f3
+; P8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; P8-NEXT:    lfs f0, .LCPI11_0 at toc@l(r3)
+; P8-NEXT:    fcmpo cr0, f2, f3
+; P8-NEXT:    lis r3, -32768
+; P8-NEXT:    xxlxor f3, f3, f3
+; P8-NEXT:    fcmpo cr1, f1, f0
+; P8-NEXT:    crand 4*cr5+lt, 4*cr1+eq, lt
+; P8-NEXT:    crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
+; P8-NEXT:    cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
+; P8-NEXT:    isel r30, 0, r3, 4*cr5+lt
+; P8-NEXT:    bc 12, 4*cr5+lt, .LBB11_2
+; P8-NEXT:  # %bb.1: # %entry
+; P8-NEXT:    fmr f3, f0
+; P8-NEXT:  .LBB11_2: # %entry
+; P8-NEXT:    xxlxor f4, f4, f4
+; P8-NEXT:    bl __gcc_qsub
 ; P8-NEXT:    nop
-; P8-NEXT:    addi r1, r1, 112
+; P8-NEXT:    mffs f0
+; P8-NEXT:    mtfsb1 31
+; P8-NEXT:    mtfsb0 30
+; P8-NEXT:    fadd f1, f2, f1
+; P8-NEXT:    mtfsf 1, f0
+; P8-NEXT:    xscvdpsxws f0, f1
+; P8-NEXT:    mffprwz r3, f0
+; P8-NEXT:    xor r3, r3, r30
+; P8-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
+; P8-NEXT:    clrldi r3, r3, 32
+; P8-NEXT:    addi r1, r1, 128
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0
 ; P8-NEXT:    blr
@@ -562,28 +582,88 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; P9-LABEL: ppcq_to_u32:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
-; P9-NEXT:    stdu r1, -32(r1)
-; P9-NEXT:    .cfi_def_cfa_offset 32
+; P9-NEXT:    .cfi_def_cfa_offset 48
 ; P9-NEXT:    .cfi_offset lr, 16
-; P9-NEXT:    bl __fixunstfsi
+; P9-NEXT:    .cfi_offset r30, -16
+; P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; P9-NEXT:    std r0, 16(r1)
+; P9-NEXT:    stdu r1, -48(r1)
+; P9-NEXT:    addis r3, r2, .LCPI11_0 at toc@ha
+; P9-NEXT:    xxlxor f3, f3, f3
+; P9-NEXT:    lfs f0, .LCPI11_0 at toc@l(r3)
+; P9-NEXT:    fcmpo cr1, f2, f3
+; P9-NEXT:    lis r3, -32768
+; P9-NEXT:    fcmpo cr0, f1, f0
+; P9-NEXT:    xxlxor f3, f3, f3
+; P9-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
+; P9-NEXT:    crandc 4*cr5+gt, lt, eq
+; P9-NEXT:    cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
+; P9-NEXT:    isel r30, 0, r3, 4*cr5+lt
+; P9-NEXT:    bc 12, 4*cr5+lt, .LBB11_2
+; P9-NEXT:  # %bb.1: # %entry
+; P9-NEXT:    fmr f3, f0
+; P9-NEXT:  .LBB11_2: # %entry
+; P9-NEXT:    xxlxor f4, f4, f4
+; P9-NEXT:    bl __gcc_qsub
 ; P9-NEXT:    nop
-; P9-NEXT:    addi r1, r1, 32
+; P9-NEXT:    mffs f0
+; P9-NEXT:    mtfsb1 31
+; P9-NEXT:    mtfsb0 30
+; P9-NEXT:    fadd f1, f2, f1
+; P9-NEXT:    mtfsf 1, f0
+; P9-NEXT:    xscvdpsxws f0, f1
+; P9-NEXT:    mffprwz r3, f0
+; P9-NEXT:    xor r3, r3, r30
+; P9-NEXT:    clrldi r3, r3, 32
+; P9-NEXT:    addi r1, r1, 48
 ; P9-NEXT:    ld r0, 16(r1)
+; P9-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; P9-NEXT:    mtlr r0
 ; P9-NEXT:    blr
 ;
 ; NOVSX-LABEL: ppcq_to_u32:
 ; NOVSX:       # %bb.0: # %entry
+; NOVSX-NEXT:    mfocrf r12, 32
 ; NOVSX-NEXT:    mflr r0
 ; NOVSX-NEXT:    std r0, 16(r1)
-; NOVSX-NEXT:    stdu r1, -32(r1)
-; NOVSX-NEXT:    .cfi_def_cfa_offset 32
+; NOVSX-NEXT:    stw r12, 8(r1)
+; NOVSX-NEXT:    stdu r1, -48(r1)
+; NOVSX-NEXT:    .cfi_def_cfa_offset 48
 ; NOVSX-NEXT:    .cfi_offset lr, 16
-; NOVSX-NEXT:    bl __fixunstfsi
+; NOVSX-NEXT:    .cfi_offset cr2, 8
+; NOVSX-NEXT:    addis r3, r2, .LCPI11_0 at toc@ha
+; NOVSX-NEXT:    addis r4, r2, .LCPI11_1 at toc@ha
+; NOVSX-NEXT:    lfs f0, .LCPI11_0 at toc@l(r3)
+; NOVSX-NEXT:    lfs f4, .LCPI11_1 at toc@l(r4)
+; NOVSX-NEXT:    fcmpo cr0, f1, f0
+; NOVSX-NEXT:    fcmpo cr1, f2, f4
+; NOVSX-NEXT:    fmr f3, f4
+; NOVSX-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
+; NOVSX-NEXT:    crandc 4*cr5+gt, lt, eq
+; NOVSX-NEXT:    cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt
+; NOVSX-NEXT:    bc 12, 4*cr2+lt, .LBB11_2
+; NOVSX-NEXT:  # %bb.1: # %entry
+; NOVSX-NEXT:    fmr f3, f0
+; NOVSX-NEXT:  .LBB11_2: # %entry
+; NOVSX-NEXT:    bl __gcc_qsub
 ; NOVSX-NEXT:    nop
-; NOVSX-NEXT:    addi r1, r1, 32
+; NOVSX-NEXT:    mffs f0
+; NOVSX-NEXT:    mtfsb1 31
+; NOVSX-NEXT:    addi r3, r1, 44
+; NOVSX-NEXT:    mtfsb0 30
+; NOVSX-NEXT:    fadd f1, f2, f1
+; NOVSX-NEXT:    mtfsf 1, f0
+; NOVSX-NEXT:    fctiwz f0, f1
+; NOVSX-NEXT:    stfiwx f0, 0, r3
+; NOVSX-NEXT:    lis r3, -32768
+; NOVSX-NEXT:    lwz r4, 44(r1)
+; NOVSX-NEXT:    isel r3, 0, r3, 4*cr2+lt
+; NOVSX-NEXT:    xor r3, r4, r3
+; NOVSX-NEXT:    clrldi r3, r3, 32
+; NOVSX-NEXT:    addi r1, r1, 48
 ; NOVSX-NEXT:    ld r0, 16(r1)
+; NOVSX-NEXT:    lwz r12, 8(r1)
+; NOVSX-NEXT:    mtocrf 32, r12
 ; NOVSX-NEXT:    mtlr r0
 ; NOVSX-NEXT:    blr
 entry:
@@ -747,12 +827,17 @@ entry:
   ret fp128 %conv
 }
 
-define void @fptoint_nofpexcept(fp128 %m, i32* %addr1, i64* %addr2) {
+define void @fptoint_nofpexcept(ppc_fp128 %p, fp128 %m, i32* %addr1, i64* %addr2) {
 ; MIR-LABEL: name: fptoint_nofpexcept
 ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSWZ
 ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUWZ
 ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSDZ
 ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUDZ
+;
+; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD
+; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS
+; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD
+; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS
 entry:
   %conv1 = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0
   store volatile i32 %conv1, i32* %addr1, align 4
@@ -762,6 +847,11 @@ entry:
   store volatile i64 %conv3, i64* %addr2, align 8
   %conv4 = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0
   store volatile i64 %conv4, i64* %addr2, align 8
+
+  %conv5 = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0
+  store volatile i32 %conv5, i32* %addr1, align 4
+  %conv6 = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0
+  store volatile i32 %conv6, i32* %addr1, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
index 72c6a137b9af..5ab12093954f 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
@@ -1202,38 +1202,36 @@ entry:
 define i32 @test_fptosi_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_fptosi_ppc_i32_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    bl __gcc_qtou
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    mffs 0
+; PC64LE-NEXT:    mtfsb1 31
+; PC64LE-NEXT:    mtfsb0 30
+; PC64LE-NEXT:    fadd 1, 2, 1
+; PC64LE-NEXT:    mtfsf 1, 0
+; PC64LE-NEXT:    xscvdpsxws 0, 1
+; PC64LE-NEXT:    mffprwz 3, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: test_fptosi_ppc_i32_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    bl __gcc_qtou
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    mffs 0
+; PC64LE9-NEXT:    mtfsb1 31
+; PC64LE9-NEXT:    mtfsb0 30
+; PC64LE9-NEXT:    fadd 1, 2, 1
+; PC64LE9-NEXT:    mtfsf 1, 0
+; PC64LE9-NEXT:    xscvdpsxws 0, 1
+; PC64LE9-NEXT:    mffprwz 3, 0
 ; PC64LE9-NEXT:    blr
 ;
 ; PC64-LABEL: test_fptosi_ppc_i32_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    bl __gcc_qtou
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    mffs 0
+; PC64-NEXT:    mtfsb1 31
+; PC64-NEXT:    mtfsb0 30
+; PC64-NEXT:    fadd 1, 2, 1
+; PC64-NEXT:    mtfsf 1, 0
+; PC64-NEXT:    fctiwz 0, 1
+; PC64-NEXT:    stfd 0, -8(1)
+; PC64-NEXT:    lwz 3, -4(1)
 ; PC64-NEXT:    blr
 entry:
   %fpext = call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(
@@ -1289,24 +1287,76 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_fptoui_ppc_i32_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    bl __fixunstfsi
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
+; PC64LE-NEXT:    xxlxor 3, 3, 3
+; PC64LE-NEXT:    lfs 0, .LCPI31_0 at toc@l(3)
+; PC64LE-NEXT:    fcmpo 0, 2, 3
+; PC64LE-NEXT:    lis 3, -32768
+; PC64LE-NEXT:    xxlxor 3, 3, 3
+; PC64LE-NEXT:    fcmpo 1, 1, 0
+; PC64LE-NEXT:    crand 20, 6, 0
+; PC64LE-NEXT:    crandc 21, 4, 6
+; PC64LE-NEXT:    cror 20, 21, 20
+; PC64LE-NEXT:    isel 30, 0, 3, 20
+; PC64LE-NEXT:    bc 12, 20, .LBB31_2
+; PC64LE-NEXT:  # %bb.1: # %entry
+; PC64LE-NEXT:    fmr 3, 0
+; PC64LE-NEXT:  .LBB31_2: # %entry
+; PC64LE-NEXT:    xxlxor 4, 4, 4
+; PC64LE-NEXT:    bl __gcc_qsub
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    mffs 0
+; PC64LE-NEXT:    mtfsb1 31
+; PC64LE-NEXT:    mtfsb0 30
+; PC64LE-NEXT:    fadd 1, 2, 1
+; PC64LE-NEXT:    mtfsf 1, 0
+; PC64LE-NEXT:    xscvdpsxws 0, 1
+; PC64LE-NEXT:    mffprwz 3, 0
+; PC64LE-NEXT:    xor 3, 3, 30
+; PC64LE-NEXT:    addi 1, 1, 48
 ; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: test_fptoui_ppc_i32_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 30, -16(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    bl __fixunstfsi
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
+; PC64LE9-NEXT:    xxlxor 3, 3, 3
+; PC64LE9-NEXT:    lfs 0, .LCPI31_0 at toc@l(3)
+; PC64LE9-NEXT:    fcmpo 1, 2, 3
+; PC64LE9-NEXT:    lis 3, -32768
+; PC64LE9-NEXT:    fcmpo 0, 1, 0
+; PC64LE9-NEXT:    xxlxor 3, 3, 3
+; PC64LE9-NEXT:    crand 20, 2, 4
+; PC64LE9-NEXT:    crandc 21, 0, 2
+; PC64LE9-NEXT:    cror 20, 21, 20
+; PC64LE9-NEXT:    isel 30, 0, 3, 20
+; PC64LE9-NEXT:    bc 12, 20, .LBB31_2
+; PC64LE9-NEXT:  # %bb.1: # %entry
+; PC64LE9-NEXT:    fmr 3, 0
+; PC64LE9-NEXT:  .LBB31_2: # %entry
+; PC64LE9-NEXT:    xxlxor 4, 4, 4
+; PC64LE9-NEXT:    bl __gcc_qsub
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    mffs 0
+; PC64LE9-NEXT:    mtfsb1 31
+; PC64LE9-NEXT:    mtfsb0 30
+; PC64LE9-NEXT:    fadd 1, 2, 1
+; PC64LE9-NEXT:    mtfsf 1, 0
+; PC64LE9-NEXT:    xscvdpsxws 0, 1
+; PC64LE9-NEXT:    mffprwz 3, 0
+; PC64LE9-NEXT:    xor 3, 3, 30
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 ;
@@ -1314,12 +1364,45 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
 ; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    bl __fixunstfsi
+; PC64-NEXT:    mfcr 12
+; PC64-NEXT:    stw 12, 8(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
+; PC64-NEXT:    lfs 0, .LCPI31_0 at toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI31_1 at toc@ha
+; PC64-NEXT:    lfs 4, .LCPI31_1 at toc@l(3)
+; PC64-NEXT:    fcmpo 0, 1, 0
+; PC64-NEXT:    crandc 21, 0, 2
+; PC64-NEXT:    fcmpo 1, 2, 4
+; PC64-NEXT:    crand 20, 2, 4
+; PC64-NEXT:    cror 8, 21, 20
+; PC64-NEXT:    fmr 3, 4
+; PC64-NEXT:    bc 12, 8, .LBB31_2
+; PC64-NEXT:  # %bb.1: # %entry
+; PC64-NEXT:    fmr 3, 0
+; PC64-NEXT:  .LBB31_2: # %entry
+; PC64-NEXT:    bl __gcc_qsub
 ; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    mffs 0
+; PC64-NEXT:    mtfsb1 31
+; PC64-NEXT:    lis 4, -32768
+; PC64-NEXT:    bc 12, 8, .LBB31_3
+; PC64-NEXT:    b .LBB31_4
+; PC64-NEXT:  .LBB31_3: # %entry
+; PC64-NEXT:    li 4, 0
+; PC64-NEXT:  .LBB31_4: # %entry
+; PC64-NEXT:    mtfsb0 30
+; PC64-NEXT:    fadd 1, 2, 1
+; PC64-NEXT:    mtfsf 1, 0
+; PC64-NEXT:    fctiwz 0, 1
+; PC64-NEXT:    stfd 0, 120(1)
+; PC64-NEXT:    lwz 3, 124(1)
+; PC64-NEXT:    xor 3, 3, 4
+; PC64-NEXT:    addi 1, 1, 128
 ; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    lwz 12, 8(1)
 ; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    mtcrf 32, 12 # cr2
 ; PC64-NEXT:    blr
 entry:
   %fpext = call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(