[llvm] r368012 - [Strict FP] Allow custom operation actions

Ulrich Weigand via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 6 03:43:13 PDT 2019


Author: uweigand
Date: Tue Aug  6 03:43:13 2019
New Revision: 368012

URL: http://llvm.org/viewvc/llvm-project?rev=368012&view=rev
Log:
[Strict FP] Allow custom operation actions

This patch changes the DAG legalizer to respect the operation actions
set by the target for strict floating-point operations. (Currently, the
legalizer will usually fall back to mutate to the non-strict action
(which is assumed to be legal), and only skip mutation if the strict
operation is marked legal.)

With this patch, if whenever a strict operation is marked as Legal or
Custom, it is passed to the target as usual. Only if it is marked as
Expand will the legalizer attempt to mutate to the non-strict operation.
Note that this will now fail if the non-strict operation is itself
marked as Custom -- the target will have to provide a Custom definition
for the strict operation then as well.

Reviewed By: hfinkel

Differential Revision: https://reviews.llvm.org/D65226


Modified:
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/trunk/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=368012&r1=368011&r2=368012&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Tue Aug  6 03:43:13 2019
@@ -930,6 +930,8 @@ public:
     return Supported ? Action : Expand;
   }
 
+  // If Op is a strict floating-point operation, return the result
+  // of getOperationAction for the equivalent non-strict operation.
   LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
     unsigned EqOpc;
     switch (Op) {
@@ -962,14 +964,7 @@ public:
       case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
     }
 
-    auto Action = getOperationAction(EqOpc, VT);
-
-    // We don't currently handle Custom or Promote for strict FP pseudo-ops.
-    // For now, we just expand for those cases.
-    if (Action != Legal)
-      Action = Expand;
-
-    return Action;
+    return getOperationAction(EqOpc, VT);
   }
 
   /// Return true if the specified operation is legal on this target or can be

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=368012&r1=368011&r2=368012&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Tue Aug  6 03:43:13 2019
@@ -1097,39 +1097,6 @@ void SelectionDAGLegalize::LegalizeOp(SD
       return;
     }
     break;
-  case ISD::STRICT_FADD:
-  case ISD::STRICT_FSUB:
-  case ISD::STRICT_FMUL:
-  case ISD::STRICT_FDIV:
-  case ISD::STRICT_FREM:
-  case ISD::STRICT_FSQRT:
-  case ISD::STRICT_FMA:
-  case ISD::STRICT_FPOW:
-  case ISD::STRICT_FPOWI:
-  case ISD::STRICT_FSIN:
-  case ISD::STRICT_FCOS:
-  case ISD::STRICT_FEXP:
-  case ISD::STRICT_FEXP2:
-  case ISD::STRICT_FLOG:
-  case ISD::STRICT_FLOG10:
-  case ISD::STRICT_FLOG2:
-  case ISD::STRICT_FRINT:
-  case ISD::STRICT_FNEARBYINT:
-  case ISD::STRICT_FMAXNUM:
-  case ISD::STRICT_FMINNUM:
-  case ISD::STRICT_FCEIL:
-  case ISD::STRICT_FFLOOR:
-  case ISD::STRICT_FROUND:
-  case ISD::STRICT_FTRUNC:
-  case ISD::STRICT_FP_ROUND:
-  case ISD::STRICT_FP_EXTEND:
-    // These pseudo-ops get legalized as if they were their non-strict
-    // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
-    // is also legal, but if ISD::FSQRT requires expansion then so does
-    // ISD::STRICT_FSQRT.
-    Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
-                                            Node->getValueType(0));
-    break;
   case ISD::SADDSAT:
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
@@ -2815,6 +2782,12 @@ bool SelectionDAGLegalize::ExpandNode(SD
     break;
   }
   case ISD::STRICT_FP_ROUND:
+    // This expansion does not honor the "strict" properties anyway,
+    // so prefer falling back to the non-strict operation if legal.
+    if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+                                       Node->getValueType(0))
+        == TargetLowering::Legal)
+      break;
     Tmp1 = EmitStackConvert(Node->getOperand(1), 
                             Node->getValueType(0),
                             Node->getValueType(0), dl, Node->getOperand(0));
@@ -2829,6 +2802,12 @@ bool SelectionDAGLegalize::ExpandNode(SD
     Results.push_back(Tmp1);
     break;
   case ISD::STRICT_FP_EXTEND:
+    // This expansion does not honor the "strict" properties anyway,
+    // so prefer falling back to the non-strict operation if legal.
+    if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+                                       Node->getValueType(0))
+        == TargetLowering::Legal)
+      break;
     Tmp1 = EmitStackConvert(Node->getOperand(1),
                             Node->getOperand(1).getValueType(),
                             Node->getValueType(0), dl, Node->getOperand(0));
@@ -3715,6 +3694,18 @@ bool SelectionDAGLegalize::ExpandNode(SD
     break;
   }
 
+  if (Results.empty() && Node->isStrictFPOpcode()) {
+    // FIXME: We were asked to expand a strict floating-point operation,
+    // but there is currently no expansion implemented that would preserve
+    // the "strict" properties.  For now, we just fall back to the non-strict
+    // version if that is legal on the target.  The actual mutation of the
+    // operation will happen in SelectionDAGISel::DoInstructionSelection.
+    if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+                                       Node->getValueType(0))
+        == TargetLowering::Legal)
+      return true;
+  }
+
   // Replace the original node with the legalized result.
   if (Results.empty()) {
     LLVM_DEBUG(dbgs() << "Cannot expand node\n");

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=368012&r1=368011&r2=368012&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Tue Aug  6 03:43:13 2019
@@ -335,12 +335,23 @@ SDValue VectorLegalizer::LegalizeOp(SDVa
   case ISD::STRICT_FTRUNC:
   case ISD::STRICT_FP_ROUND:
   case ISD::STRICT_FP_EXTEND:
-    // These pseudo-ops get legalized as if they were their non-strict
-    // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
-    // is also legal, but if ISD::FSQRT requires expansion then so does
-    // ISD::STRICT_FSQRT.
-    Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
-                                            Node->getValueType(0));
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    // If we're asked to expand a strict vector floating-point operation,
+    // by default we're going to simply unroll it.  That is usually the
+    // best approach, except in the case where the resulting strict (scalar)
+    // operations would themselves use the fallback mutation to non-strict.
+    // In that specific case, just do the fallback on the vector op.
+    if (Action == TargetLowering::Expand &&
+        TLI.getStrictFPOperationAction(Node->getOpcode(),
+                                       Node->getValueType(0))
+        == TargetLowering::Legal) {
+      EVT EltVT = Node->getValueType(0).getVectorElementType();
+      if (TLI.getOperationAction(Node->getOpcode(), EltVT)
+          == TargetLowering::Expand &&
+          TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
+          == TargetLowering::Legal)
+        Action = TargetLowering::Legal;
+    }
     break;
   case ISD::ADD:
   case ISD::SUB:

Modified: llvm/trunk/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll?rev=368012&r1=368011&r2=368012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll Tue Aug  6 03:43:13 2019
@@ -5447,19 +5447,54 @@ entry:
 define <2 x double> @constrained_vector_nearbyint_v2f64() {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
 ; PC64LE-NEXT:    addis 3, 2, .LCPI81_0 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI81_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpic 34, 0
+; PC64LE-NEXT:    lfd 1, .LCPI81_0 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI81_1 at toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI81_1 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI81_0 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI81_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 34, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI81_0 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI81_1 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI81_1 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
@@ -5568,23 +5603,37 @@ define <3 x double> @constrained_vector_
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
 ; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI83_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI83_0 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI83_1 at toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI83_1 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI83_2 at toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI83_2 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI83_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpic 2, 0
-; PC64LE-NEXT:    xxswapd 0, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -5593,22 +5642,33 @@ define <3 x double> @constrained_vector_
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
 ; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI83_0 at toc@ha
 ; PC64LE9-NEXT:    lfd 1, .LCPI83_0 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI83_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI83_1 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 2, 0
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI83_1 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI83_2 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI83_2 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -5623,28 +5683,92 @@ entry:
 define <4 x double> @constrained_vector_nearbyint_v4f64() {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI84_0 at toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI84_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI84_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addi 3, 4, .LCPI84_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 1, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvrdpic 34, 0
-; PC64LE-NEXT:    xvrdpic 35, 1
+; PC64LE-NEXT:    lfd 1, .LCPI84_0 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI84_1 at toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI84_1 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI84_2 at toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI84_2 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI84_3 at toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI84_3 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI84_0 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI84_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    lfd 1, .LCPI84_0 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI84_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI84_1 at toc@l
-; PC64LE9-NEXT:    xvrdpic 34, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 35, 0
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI84_1 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI84_2 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI84_2 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI84_3 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI84_3 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
@@ -5706,26 +5830,62 @@ entry:
 define <2 x double> @constrained_vector_maxnum_v2f64() {
 ; PC64LE-LABEL: constrained_vector_maxnum_v2f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
 ; PC64LE-NEXT:    addis 3, 2, .LCPI86_0 at toc@ha
 ; PC64LE-NEXT:    addis 4, 2, .LCPI86_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI86_0 at toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI86_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvmaxdp 34, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI86_0 at toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI86_1 at toc@l(4)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI86_3 at toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI86_2 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI86_3 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI86_2 at toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_maxnum_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI86_0 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI86_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI86_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI86_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI86_1 at toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvmaxdp 34, 1, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI86_1 at toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_2 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI86_2 at toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_3 at toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI86_3 at toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
@@ -5854,29 +6014,43 @@ define <3 x double> @constrained_vector_
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
 ; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI88_0 at toc@ha
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    addis 4, 2, .LCPI88_1 at toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI88_0 at toc@l(3)
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_0 at toc@ha
 ; PC64LE-NEXT:    lfs 2, .LCPI88_1 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI88_0 at toc@l(3)
 ; PC64LE-NEXT:    bl fmax
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI88_2 at toc@ha
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    addis 4, 2, .LCPI88_3 at toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_2 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI88_3 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI88_2 at toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI88_5 at toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_4 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI88_5 at toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI88_4 at toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI88_2 at toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI88_3 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 2, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 2, 2
-; PC64LE-NEXT:    xvmaxdp 2, 2, 0
-; PC64LE-NEXT:    xxswapd 0, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -5885,27 +6059,39 @@ define <3 x double> @constrained_vector_
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
 ; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI88_0 at toc@ha
 ; PC64LE9-NEXT:    lfs 1, .LCPI88_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI88_1 at toc@ha
 ; PC64LE9-NEXT:    lfs 2, .LCPI88_1 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl fmax
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI88_2 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI88_2 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI88_2 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI88_3 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI88_3 at toc@l
+; PC64LE9-NEXT:    lfs 2, .LCPI88_3 at toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_4 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI88_4 at toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_5 at toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI88_5 at toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvmaxdp 2, 1, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -5921,42 +6107,108 @@ entry:
 define <4 x double> @constrained_vector_maxnum_v4f64() {
 ; PC64LE-LABEL: constrained_vector_maxnum_v4f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI89_0 at toc@ha
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    addis 4, 2, .LCPI89_1 at toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI89_2 at toc@ha
-; PC64LE-NEXT:    addis 6, 2, .LCPI89_3 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI89_0 at toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI89_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    addi 3, 5, .LCPI89_2 at toc@l
-; PC64LE-NEXT:    addi 4, 6, .LCPI89_3 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    lxvd2x 3, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xxswapd 2, 2
-; PC64LE-NEXT:    xxswapd 3, 3
-; PC64LE-NEXT:    xvmaxdp 34, 1, 0
-; PC64LE-NEXT:    xvmaxdp 35, 3, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_0 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_1 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_0 at toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_3 at toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_2 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_3 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_2 at toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_5 at toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_4 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_5 at toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI89_4 at toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_7 at toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_6 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_7 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_6 at toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_maxnum_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI89_0 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI89_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI89_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI89_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI89_1 at toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    lfs 2, .LCPI89_1 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI89_2 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI89_2 at toc@l
-; PC64LE9-NEXT:    xvmaxdp 34, 1, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI89_2 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI89_3 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI89_3 at toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvmaxdp 35, 1, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI89_3 at toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_4 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI89_4 at toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_5 at toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_5 at toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_6 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI89_6 at toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_7 at toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_7 at toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
@@ -6020,26 +6272,62 @@ define <1 x float> @constrained_vector_m
 define <2 x double> @constrained_vector_minnum_v2f64() {
 ; PC64LE-LABEL: constrained_vector_minnum_v2f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
 ; PC64LE-NEXT:    addis 3, 2, .LCPI91_0 at toc@ha
 ; PC64LE-NEXT:    addis 4, 2, .LCPI91_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI91_0 at toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI91_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvmindp 34, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI91_0 at toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI91_1 at toc@l(4)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI91_3 at toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI91_2 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI91_3 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI91_2 at toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_minnum_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI91_0 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI91_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI91_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI91_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI91_1 at toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvmindp 34, 1, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI91_1 at toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_2 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI91_2 at toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_3 at toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI91_3 at toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
@@ -6168,29 +6456,43 @@ define <3 x double> @constrained_vector_
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
 ; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI93_0 at toc@ha
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    addis 4, 2, .LCPI93_1 at toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI93_0 at toc@l(3)
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_0 at toc@ha
 ; PC64LE-NEXT:    lfs 2, .LCPI93_1 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI93_0 at toc@l(3)
 ; PC64LE-NEXT:    bl fmin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI93_2 at toc@ha
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    addis 4, 2, .LCPI93_3 at toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_2 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI93_3 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI93_2 at toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI93_5 at toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_4 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI93_5 at toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI93_4 at toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI93_2 at toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI93_3 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 2, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 2, 2
-; PC64LE-NEXT:    xvmindp 2, 2, 0
-; PC64LE-NEXT:    xxswapd 0, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -6199,27 +6501,39 @@ define <3 x double> @constrained_vector_
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
 ; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI93_0 at toc@ha
 ; PC64LE9-NEXT:    lfs 1, .LCPI93_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI93_1 at toc@ha
 ; PC64LE9-NEXT:    lfs 2, .LCPI93_1 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl fmin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI93_2 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI93_2 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI93_2 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI93_3 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI93_3 at toc@l
+; PC64LE9-NEXT:    lfs 2, .LCPI93_3 at toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_4 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI93_4 at toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_5 at toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI93_5 at toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvmindp 2, 1, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -6235,42 +6549,108 @@ entry:
 define <4 x double> @constrained_vector_minnum_v4f64() {
 ; PC64LE-LABEL: constrained_vector_minnum_v4f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI94_0 at toc@ha
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    addis 4, 2, .LCPI94_1 at toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI94_2 at toc@ha
-; PC64LE-NEXT:    addis 6, 2, .LCPI94_3 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI94_0 at toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI94_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    addi 3, 5, .LCPI94_2 at toc@l
-; PC64LE-NEXT:    addi 4, 6, .LCPI94_3 at toc@l
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    lxvd2x 3, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xxswapd 2, 2
-; PC64LE-NEXT:    xxswapd 3, 3
-; PC64LE-NEXT:    xvmindp 34, 1, 0
-; PC64LE-NEXT:    xvmindp 35, 3, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_0 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_1 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_0 at toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_3 at toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_2 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_3 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_2 at toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_5 at toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_4 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_5 at toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI94_4 at toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_7 at toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_6 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_7 at toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_6 at toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_minnum_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI94_0 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI94_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI94_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI94_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI94_1 at toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    lfs 2, .LCPI94_1 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI94_2 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI94_2 at toc@l
-; PC64LE9-NEXT:    xvmindp 34, 1, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI94_2 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI94_3 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI94_3 at toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvmindp 35, 1, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI94_3 at toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_4 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI94_4 at toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_5 at toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_5 at toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_6 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI94_6 at toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_7 at toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_7 at toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(

Modified: llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll?rev=368012&r1=368011&r2=368012&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll Tue Aug  6 03:43:13 2019
@@ -544,14 +544,19 @@ entry:
 define <2 x double> @constrained_vector_fadd_v2f64() {
 ; CHECK-LABEL: constrained_vector_fadd_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; CHECK-NEXT:    addpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    addsd %xmm0, %xmm1
+; CHECK-NEXT:    addsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
 ;
 ; AVX-LABEL: constrained_vector_fadd_v2f64:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; AVX-NEXT:    vaddpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm1
+; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
 entry:
   %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
@@ -598,22 +603,24 @@ entry:
 define <3 x double> @constrained_vector_fadd_v3f64() {
 ; CHECK-LABEL: constrained_vector_fadd_v3f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; CHECK-NEXT:    addpd {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    xorpd %xmm1, %xmm1
+; CHECK-NEXT:    xorpd %xmm2, %xmm2
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    addsd %xmm1, %xmm2
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    addsd %xmm1, %xmm0
 ; CHECK-NEXT:    addsd {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movapd %xmm0, %xmm1
-; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    retq
 ;
 ; AVX-LABEL: constrained_vector_fadd_v3f64:
 ; AVX:       # %bb.0: # %entry
 ; AVX-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; AVX-NEXT:    vaddpd {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm1, %xmm2
+; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
 entry:
@@ -629,16 +636,28 @@ entry:
 define <4 x double> @constrained_vector_fadd_v4f64() {
 ; CHECK-LABEL: constrained_vector_fadd_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308]
-; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [1.0E+0,1.0000000000000001E-1]
-; CHECK-NEXT:    addpd %xmm1, %xmm0
-; CHECK-NEXT:    addpd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT:    addsd %xmm1, %xmm2
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    addsd %xmm1, %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT:    addsd %xmm1, %xmm2
+; CHECK-NEXT:    addsd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; CHECK-NEXT:    retq
 ;
 ; AVX-LABEL: constrained_vector_fadd_v4f64:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308]
-; AVX-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm1
+; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm2
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm2
+; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 entry:
   %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
@@ -675,14 +694,19 @@ entry:
 define <2 x double> @constrained_vector_fsub_v2f64() {
 ; CHECK-LABEL: constrained_vector_fsub_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
-; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movapd %xmm0, %xmm1
+; CHECK-NEXT:    subsd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    subsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
 ;
 ; AVX-LABEL: constrained_vector_fsub_v2f64:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
-; AVX-NEXT:    vsubpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vsubsd {{.*}}(%rip), %xmm0, %xmm1
+; AVX-NEXT:    vsubsd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
 entry:
   %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
@@ -732,12 +756,12 @@ define <3 x double> @constrained_vector_
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xorpd %xmm0, %xmm0
 ; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT:    subsd %xmm0, %xmm1
-; CHECK-NEXT:    movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
-; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    movsd %xmm1, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movapd %xmm0, %xmm1
-; CHECK-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; CHECK-NEXT:    movapd %xmm1, %xmm2
+; CHECK-NEXT:    subsd %xmm0, %xmm2
+; CHECK-NEXT:    movapd %xmm1, %xmm0
+; CHECK-NEXT:    subsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    subsd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    movsd %xmm2, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    retq
 ;
@@ -746,8 +770,9 @@ define <3 x double> @constrained_vector_
 ; AVX-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
-; AVX-NEXT:    vsubpd {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vsubsd {{.*}}(%rip), %xmm1, %xmm2
+; AVX-NEXT:    vsubsd {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
 entry:
@@ -763,16 +788,28 @@ entry:
 define <4 x double> @constrained_vector_fsub_v4f64() {
 ; CHECK-LABEL: constrained_vector_fsub_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    movapd %xmm1, %xmm2
+; CHECK-NEXT:    subsd {{.*}}(%rip), %xmm2
 ; CHECK-NEXT:    movapd %xmm1, %xmm0
-; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    subpd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    subsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT:    movapd %xmm1, %xmm2
+; CHECK-NEXT:    subsd {{.*}}(%rip), %xmm2
+; CHECK-NEXT:    subsd {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; CHECK-NEXT:    retq
 ;
 ; AVX-LABEL: constrained_vector_fsub_v4f64:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308]
-; AVX-NEXT:    vsubpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vsubsd {{.*}}(%rip), %xmm0, %xmm1
+; AVX-NEXT:    vsubsd {{.*}}(%rip), %xmm0, %xmm2
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT:    vsubsd {{.*}}(%rip), %xmm0, %xmm2
+; AVX-NEXT:    vsubsd {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 entry:
   %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(




More information about the llvm-commits mailing list