[llvm] 0148bf5 - [PowerPC] Use correct node to get a super register from a subreg

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 13 17:52:33 PDT 2021


Author: Nemanja Ivanovic
Date: 2021-04-13T19:52:21-05:00
New Revision: 0148bf53f0a0b533eb742a7c8005a40328c48d66

URL: https://github.com/llvm/llvm-project/commit/0148bf53f0a0b533eb742a7c8005a40328c48d66
DIFF: https://github.com/llvm/llvm-project/commit/0148bf53f0a0b533eb742a7c8005a40328c48d66.diff

LOG: [PowerPC] Use correct node to get a super register from a subreg

The VSX tablegen file has some rather eggregious uses of
COPY_TO_REGCLASS even in situations where it needs to use
SUBREG_TO_REG. While this produces correct code, it often doesn't
allow the register coalescer to coalesce copies and the resulting
code ends up being suboptimal. This patch just changes over
patterns that should use SUBREG_TO_REG.

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/fp-strict-round.ll
    llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
    llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 475098efedd71..8673d6a7bc6cc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2198,7 +2198,7 @@ def VectorExtractions {
 
 def AlignValues {
   dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
-  dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
+  dag I32_TO_BE_WORD1 = (SUBREG_TO_REG (i64 1), (MTVSRWZ $B), sub_64);
 }
 
 // Integer extend helper dags 32 -> 64
@@ -2349,14 +2349,14 @@ def LoadFP {
 
 // FP merge dags (for f32 -> v4f32)
 def MrgFP {
-  dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC);
-  dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC);
-  dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC);
-  dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC);
-  dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
-                               (COPY_TO_REGCLASS $C, VSRC), 0));
-  dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
-                               (COPY_TO_REGCLASS $D, VSRC), 0));
+  dag LD32A = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$A), sub_64);
+  dag LD32B = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$B), sub_64);
+  dag LD32C = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$C), sub_64);
+  dag LD32D = (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$D), sub_64);
+  dag AC = (XVCVDPSP (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
+                               (SUBREG_TO_REG (i64 1), $C, sub_64), 0));
+  dag BD = (XVCVDPSP (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64),
+                               (SUBREG_TO_REG (i64 1), $D, sub_64), 0));
   dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0));
   dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3));
   dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0));
@@ -2383,10 +2383,10 @@ def MrgWords {
 
   // For big endian, we merge hi doublewords of (A, C) and (B, D), convert
   // then merge.
-  dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC),
-                            (COPY_TO_REGCLASS f64:$C, VSRC), 0));
-  dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC),
-                            (COPY_TO_REGCLASS f64:$D, VSRC), 0));
+  dag AC = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$A, sub_64),
+                            (SUBREG_TO_REG (i64 1), f64:$C, sub_64), 0));
+  dag BD = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$B, sub_64),
+                            (SUBREG_TO_REG (i64 1), f64:$D, sub_64), 0));
   dag CVACS = (v4i32 (XVCVDPSXWS AC));
   dag CVBDS = (v4i32 (XVCVDPSXWS BD));
   dag CVACU = (v4i32 (XVCVDPUXWS AC));
@@ -2394,10 +2394,10 @@ def MrgWords {
 
   // For little endian, we merge hi doublewords of (D, B) and (C, A), convert
   // then merge.
-  dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC),
-                            (COPY_TO_REGCLASS f64:$B, VSRC), 0));
-  dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC),
-                            (COPY_TO_REGCLASS f64:$A, VSRC), 0));
+  dag DB = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$D, sub_64),
+                            (SUBREG_TO_REG (i64 1), f64:$B, sub_64), 0));
+  dag CA = (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), f64:$C, sub_64),
+                            (SUBREG_TO_REG (i64 1), f64:$A, sub_64), 0));
   dag CVDBS = (v4i32 (XVCVDPSXWS DB));
   dag CVCAS = (v4i32 (XVCVDPSXWS CA));
   dag CVDBU = (v4i32 (XVCVDPUXWS DB));
@@ -2762,24 +2762,24 @@ def : Pat<(v2i64 immAllZerosV),
 // Build vectors of floating point converted to i32.
 def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
                                DblToInt.A, DblToInt.A)),
-          (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
+          (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS $A), sub_64), 1))>;
 def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
                                DblToUInt.A, DblToUInt.A)),
-          (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
+          (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS $A), sub_64), 1))>;
 def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
-          (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
-                           (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
+          (v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPSXDS $A), sub_64),
+                           (SUBREG_TO_REG (i64 1), (XSCVDPSXDS $A), sub_64), 0))>;
 def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
-          (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
-                           (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
+          (v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64),
+                           (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>;
 defm : ScalToVecWPermute<
   v4i32, FltToIntLoad.A,
-  (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1),
-  (COPY_TO_REGCLASS (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC)>;
+  (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), sub_64), 1),
+  (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), sub_64)>;
 defm : ScalToVecWPermute<
   v4i32, FltToUIntLoad.A,
-  (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1),
-  (COPY_TO_REGCLASS (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC)>;
+  (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), sub_64), 1),
+  (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), sub_64)>;
 def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
           (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
 def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
@@ -2825,8 +2825,8 @@ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
 
 def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
           (v2f64 (XXPERMDI
-                    (COPY_TO_REGCLASS $A, VSRC),
-                    (COPY_TO_REGCLASS $B, VSRC), 0))>;
+                    (SUBREG_TO_REG (i64 1), $A, sub_64),
+                    (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>;
 // Using VMRGEW to assemble the final vector would be a lower latency
 // solution. However, we choose to go with the slightly higher latency
 // XXPERMDI for 2 reasons:
@@ -2930,8 +2930,8 @@ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
 // Little endian, available on all targets with VSX
 def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
           (v2f64 (XXPERMDI
-                    (COPY_TO_REGCLASS $B, VSRC),
-                    (COPY_TO_REGCLASS $A, VSRC), 0))>;
+                    (SUBREG_TO_REG (i64 1), $B, sub_64),
+                    (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
 // Using VMRGEW to assemble the final vector would be a lower latency
 // solution. However, we choose to go with the slightly higher latency
 // XXPERMDI for 2 reasons:
@@ -3012,12 +3012,12 @@ def : Pat<(PPCstore_scal_int_from_vsr
 // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
 defm : ScalToVecWPermute<
   v4i32, DblToIntLoad.A,
-  (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1),
-  (COPY_TO_REGCLASS (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC)>;
+  (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), sub_64), 1),
+  (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), sub_64)>;
 defm : ScalToVecWPermute<
   v4i32, DblToUIntLoad.A,
-  (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1),
-  (COPY_TO_REGCLASS (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC)>;
+  (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), sub_64), 1),
+  (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), sub_64)>;
 defm : ScalToVecWPermute<
   v2i64, FltToLongLoad.A,
   (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$A), VSFRC)), 0),
@@ -3193,15 +3193,13 @@ def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
 // LIWZX - This instruction will be emitted for i32, f32, and when
 //         zero-extending i32 to i64 (zext i32 -> i64).
 def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
-          (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
+          (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64))>;
 def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
-          (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
+          (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), VSRC))>;
 def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
-          (v4i32 (XXSLDWIs
-          (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+          (v4i32 (XXSLDWIs (LIWZX xoaddr:$src), 1))>;
 def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
-          (v4f32 (XXSLDWIs
-          (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+          (v4f32 (XXSLDWIs (LIWZX xoaddr:$src), 1))>;
 
 def : Pat<DWToSPExtractConv.BVU,
           (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
@@ -3281,22 +3279,22 @@ def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
 //         zero-extending i32 to i64 (zext i32 -> i64).
 defm : ScalToVecWPermute<
   v2i64, (i64 (sextloadi32 xoaddr:$src)),
-  (XXPERMDIs (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2),
+  (XXPERMDIs (LIWAX xoaddr:$src), 2),
   (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64)>;
 
 defm : ScalToVecWPermute<
   v2i64, (i64 (zextloadi32 xoaddr:$src)),
-  (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+  (XXPERMDIs (LIWZX xoaddr:$src), 2),
   (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
 
 defm : ScalToVecWPermute<
   v4i32, (i32 (load xoaddr:$src)),
-  (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+  (XXPERMDIs (LIWZX xoaddr:$src), 2),
   (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
 
 defm : ScalToVecWPermute<
   v4f32, (f32 (load xoaddr:$src)),
-  (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2),
+  (XXPERMDIs (LIWZX xoaddr:$src), 2),
   (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>;
 
 def : Pat<DWToSPExtractConv.BVU,
@@ -3595,16 +3593,16 @@ let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] in {
 // Big endian integer vectors using direct moves.
 def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
           (v2i64 (XXPERMDI
-                    (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
-                    (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
+                    (SUBREG_TO_REG (i64 1), (MTVSRD $A), sub_64),
+                    (SUBREG_TO_REG (i64 1), (MTVSRD $B), sub_64), 0))>;
 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
           (XXPERMDI
-            (COPY_TO_REGCLASS
-              (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
-            (COPY_TO_REGCLASS
-              (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
+            (SUBREG_TO_REG (i64 1),
+              (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), sub_64),
+            (SUBREG_TO_REG (i64 1),
+              (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), sub_64), 0)>;
 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
-          (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+          (XXSPLTW (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64), 1)>;
 } // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64
 
 // Little endian pre-Power9 VSX subtarget that has direct moves.
@@ -3612,16 +3610,16 @@ let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in {
 // Little endian integer vectors using direct moves.
 def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
           (v2i64 (XXPERMDI
-                    (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
-                    (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
+                    (SUBREG_TO_REG (i64 1), (MTVSRD $B), sub_64),
+                    (SUBREG_TO_REG (i64 1), (MTVSRD $A), sub_64), 0))>;
 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
           (XXPERMDI
-            (COPY_TO_REGCLASS
-              (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
-            (COPY_TO_REGCLASS
-              (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
+            (SUBREG_TO_REG (i64 1),
+              (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), sub_64),
+            (SUBREG_TO_REG (i64 1),
+              (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), sub_64), 0)>;
 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
-          (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+          (XXSPLTW (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64), 1)>;
 }
 
 // Any Power9 VSX subtarget.
@@ -3797,9 +3795,9 @@ def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
           (f32 (DFLOADf32 iaddrX4:$src))>;
 
 def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
-          (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
+          (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddr:$src), sub_64)>;
 def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
-          (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
+          (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
 
 // Convert (Un)Signed DWord in memory -> QP
 def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
@@ -3928,11 +3926,11 @@ defm : ScalToVecWPermute<v4i32, FltToUIntLoad.A,
                          (XVCVSPUXWS (LXVWSX xoaddr:$A))>;
 defm : ScalToVecWPermute<
   v4i32, DblToIntLoadP9.A,
-  (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1),
+  (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64), 1),
   (SUBREG_TO_REG (i64 1), (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), sub_64)>;
 defm : ScalToVecWPermute<
   v4i32, DblToUIntLoadP9.A,
-  (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1),
+  (XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64), 1),
   (SUBREG_TO_REG (i64 1), (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), sub_64)>;
 defm : ScalToVecWPermute<
   v2i64, FltToLongLoadP9.A,
@@ -4072,14 +4070,14 @@ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
           (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
 
 def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
-          (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+          (v2i64 (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64))>;
 def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
-          (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+          (v2i64 (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64))>;
 
 def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
-          (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+          (v2f64 (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64))>;
 def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
-          (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+          (v2f64 (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64))>;
 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
           (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
                        sub_64), xaddrX4:$src)>;
@@ -4259,19 +4257,19 @@ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
 
 defm : ScalToVecWPermute<
   v2i64, (i64 (load iaddrX4:$src)),
-  (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2),
+  (XXPERMDIs (DFLOADf64 iaddrX4:$src), 2),
   (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
 defm : ScalToVecWPermute<
   v2i64, (i64 (load xaddrX4:$src)),
-  (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2),
+  (XXPERMDIs (XFLOADf64 xaddrX4:$src), 2),
   (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>;
 defm : ScalToVecWPermute<
   v2f64, (f64 (load iaddrX4:$src)),
-  (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2),
+  (XXPERMDIs (DFLOADf64 iaddrX4:$src), 2),
   (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64)>;
 defm : ScalToVecWPermute<
   v2f64, (f64 (load xaddrX4:$src)),
-  (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2),
+  (XXPERMDIs (XFLOADf64 xaddrX4:$src), 2),
   (SUBREG_TO_REG (i64 1), (XFLOADf64 xaddrX4:$src), sub_64)>;
 
 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
index fa36f244d6239..2e226a386549d 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -210,49 +210,46 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) {
 ; P8-NEXT:    stdu r1, -176(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 176
 ; P8-NEXT:    .cfi_offset lr, 16
+; P8-NEXT:    .cfi_offset v29, -48
 ; P8-NEXT:    .cfi_offset v30, -32
 ; P8-NEXT:    .cfi_offset v31, -16
 ; P8-NEXT:    xxsldwi vs0, v2, v2, 3
+; P8-NEXT:    li r3, 128
+; P8-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 144
 ; P8-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 160
+; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    vmr v31, v2
-; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    bl nearbyintf
 ; P8-NEXT:    nop
 ; P8-NEXT:    xxsldwi vs0, v31, v31, 1
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    li r3, 128
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
+; P8-NEXT:    xxlor v30, f1, f1
 ; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    bl nearbyintf
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 128
 ; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    lxvd2x vs0, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    xxmrghd vs0, vs1, vs0
+; P8-NEXT:    xxmrghd vs0, vs1, v30
 ; P8-NEXT:    xscvspdpn f1, v31
-; P8-NEXT:    xvcvdpsp v30, vs0
+; P8-NEXT:    xvcvdpsp v29, vs0
 ; P8-NEXT:    bl nearbyintf
 ; P8-NEXT:    nop
 ; P8-NEXT:    xxswapd vs0, v31
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    li r3, 128
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
+; P8-NEXT:    xxlor v30, f1, f1
 ; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    bl nearbyintf
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 128
 ; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    lxvd2x vs0, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    xxmrghd vs0, v30, vs1
 ; P8-NEXT:    li r3, 160
 ; P8-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 144
-; P8-NEXT:    xxmrghd vs0, vs0, vs1
-; P8-NEXT:    xvcvdpsp v2, vs0
-; P8-NEXT:    vmrgew v2, v2, v30
 ; P8-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    li r3, 128
+; P8-NEXT:    xvcvdpsp v2, vs0
+; P8-NEXT:    vmrgew v2, v2, v29
+; P8-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 176
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0
@@ -265,41 +262,40 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) {
 ; P9-NEXT:    stdu r1, -80(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 80
 ; P9-NEXT:    .cfi_offset lr, 16
+; P9-NEXT:    .cfi_offset v29, -48
 ; P9-NEXT:    .cfi_offset v30, -32
 ; P9-NEXT:    .cfi_offset v31, -16
 ; P9-NEXT:    xxsldwi vs0, v2, v2, 3
-; P9-NEXT:    stxv v30, 48(r1) # 16-byte Folded Spill
+; P9-NEXT:    stxv v29, 32(r1) # 16-byte Folded Spill
 ; P9-NEXT:    xscvspdpn f1, vs0
+; P9-NEXT:    stxv v30, 48(r1) # 16-byte Folded Spill
 ; P9-NEXT:    stxv v31, 64(r1) # 16-byte Folded Spill
 ; P9-NEXT:    vmr v31, v2
 ; P9-NEXT:    bl nearbyintf
 ; P9-NEXT:    nop
 ; P9-NEXT:    xxsldwi vs0, v31, v31, 1
-; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P9-NEXT:    stxv vs1, 32(r1) # 16-byte Folded Spill
+; P9-NEXT:    xscpsgndp v30, f1, f1
 ; P9-NEXT:    xscvspdpn f1, vs0
 ; P9-NEXT:    bl nearbyintf
 ; P9-NEXT:    nop
-; P9-NEXT:    lxv vs0, 32(r1) # 16-byte Folded Reload
 ; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P9-NEXT:    xxmrghd vs0, vs1, vs0
+; P9-NEXT:    xxmrghd vs0, vs1, v30
 ; P9-NEXT:    xscvspdpn f1, v31
-; P9-NEXT:    xvcvdpsp v30, vs0
+; P9-NEXT:    xvcvdpsp v29, vs0
 ; P9-NEXT:    bl nearbyintf
 ; P9-NEXT:    nop
 ; P9-NEXT:    xxswapd vs0, v31
-; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P9-NEXT:    stxv vs1, 32(r1) # 16-byte Folded Spill
+; P9-NEXT:    xscpsgndp v30, f1, f1
 ; P9-NEXT:    xscvspdpn f1, vs0
 ; P9-NEXT:    bl nearbyintf
 ; P9-NEXT:    nop
-; P9-NEXT:    lxv vs0, 32(r1) # 16-byte Folded Reload
 ; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P9-NEXT:    xxmrghd vs0, v30, vs1
 ; P9-NEXT:    lxv v31, 64(r1) # 16-byte Folded Reload
-; P9-NEXT:    xxmrghd vs0, vs0, vs1
-; P9-NEXT:    xvcvdpsp v2, vs0
-; P9-NEXT:    vmrgew v2, v2, v30
 ; P9-NEXT:    lxv v30, 48(r1) # 16-byte Folded Reload
+; P9-NEXT:    xvcvdpsp v2, vs0
+; P9-NEXT:    vmrgew v2, v2, v29
+; P9-NEXT:    lxv v29, 32(r1) # 16-byte Folded Reload
 ; P9-NEXT:    addi r1, r1, 80
 ; P9-NEXT:    ld r0, 16(r1)
 ; P9-NEXT:    mtlr r0
@@ -319,26 +315,27 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) {
 ; P8-NEXT:    stdu r1, -160(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 160
 ; P8-NEXT:    .cfi_offset lr, 16
+; P8-NEXT:    .cfi_offset v30, -32
 ; P8-NEXT:    .cfi_offset v31, -16
+; P8-NEXT:    li r3, 128
+; P8-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 144
 ; P8-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    vmr v31, v2
 ; P8-NEXT:    xxlor f1, v31, v31
 ; P8-NEXT:    bl nearbyint
 ; P8-NEXT:    nop
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    li r3, 128
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
+; P8-NEXT:    xxlor v30, f1, f1
 ; P8-NEXT:    xxswapd vs1, v31
 ; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P8-NEXT:    bl nearbyint
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 128
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    lxvd2x vs0, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 144
+; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; P8-NEXT:    xxmrghd v2, v30, vs1
 ; P8-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    xxmrghd v2, vs0, vs1
+; P8-NEXT:    li r3, 128
+; P8-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 160
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0
@@ -351,22 +348,23 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) {
 ; P9-NEXT:    stdu r1, -64(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 64
 ; P9-NEXT:    .cfi_offset lr, 16
+; P9-NEXT:    .cfi_offset v30, -32
 ; P9-NEXT:    .cfi_offset v31, -16
 ; P9-NEXT:    stxv v31, 48(r1) # 16-byte Folded Spill
 ; P9-NEXT:    vmr v31, v2
 ; P9-NEXT:    xscpsgndp f1, v31, v31
+; P9-NEXT:    stxv v30, 32(r1) # 16-byte Folded Spill
 ; P9-NEXT:    bl nearbyint
 ; P9-NEXT:    nop
-; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P9-NEXT:    stxv vs1, 32(r1) # 16-byte Folded Spill
+; P9-NEXT:    xscpsgndp v30, f1, f1
 ; P9-NEXT:    xxswapd vs1, v31
 ; P9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P9-NEXT:    bl nearbyint
 ; P9-NEXT:    nop
-; P9-NEXT:    lxv vs0, 32(r1) # 16-byte Folded Reload
-; P9-NEXT:    lxv v31, 48(r1) # 16-byte Folded Reload
 ; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P9-NEXT:    xxmrghd v2, vs0, vs1
+; P9-NEXT:    xxmrghd v2, v30, vs1
+; P9-NEXT:    lxv v31, 48(r1) # 16-byte Folded Reload
+; P9-NEXT:    lxv v30, 32(r1) # 16-byte Folded Reload
 ; P9-NEXT:    addi r1, r1, 64
 ; P9-NEXT:    ld r0, 16(r1)
 ; P9-NEXT:    mtlr r0

diff  --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
index ab19afa2beb56..d7edb0e6b5128 100644
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
@@ -640,46 +640,46 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
 ; P8-LABEL: test_extend32_vec4:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    li r4, 48
+; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    mr r30, r3
 ; P8-NEXT:    lhz r3, 6(r3)
+; P8-NEXT:    stxvd2x vs61, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 64
+; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 80
+; P8-NEXT:    stxvd2x vs63, r1, r4 # 16-byte Folded Spill
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 80
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    lhz r3, 2(r30)
+; P8-NEXT:    xxlor vs63, f1, f1
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 64
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    lhz r3, 4(r30)
+; P8-NEXT:    xxlor vs62, f1, f1
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    lhz r3, 0(r30)
+; P8-NEXT:    xxlor vs61, f1, f1
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 80
 ; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    lxvd2x vs0, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    xxmrghd vs0, vs63, vs62
+; P8-NEXT:    li r3, 80
+; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT:    xxmrghd vs1, vs61, vs1
+; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 64
-; P8-NEXT:    lxvd2x vs2, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 48
-; P8-NEXT:    xxmrghd vs0, vs0, vs2
-; P8-NEXT:    lxvd2x vs2, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    xxmrghd vs1, vs2, vs1
 ; P8-NEXT:    xvcvdpsp vs34, vs0
+; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    xvcvdpsp vs35, vs1
 ; P8-NEXT:    vmrgew v2, v2, v3
 ; P8-NEXT:    addi r1, r1, 112
 ; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; P8-NEXT:    mtlr r0
 ; P8-NEXT:    blr
 ;
@@ -749,43 +749,43 @@ define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
 ; P8-LABEL: test_extend64_vec4:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    li r4, 48
+; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    mr r30, r3
 ; P8-NEXT:    lhz r3, 6(r3)
+; P8-NEXT:    stxvd2x vs61, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 64
+; P8-NEXT:    stxvd2x vs62, r1, r4 # 16-byte Folded Spill
+; P8-NEXT:    li r4, 80
+; P8-NEXT:    stxvd2x vs63, r1, r4 # 16-byte Folded Spill
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 80
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    lhz r3, 4(r30)
+; P8-NEXT:    xxlor vs63, f1, f1
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 64
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    lhz r3, 2(r30)
+; P8-NEXT:    xxlor vs62, f1, f1
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
-; P8-NEXT:    li r3, 48
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    stxvd2x vs1, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    lhz r3, 0(r30)
+; P8-NEXT:    xxlor vs61, f1, f1
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
 ; P8-NEXT:    li r3, 80
+; P8-NEXT:    xxmrghd vs35, vs63, vs62
 ; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; P8-NEXT:    lxvd2x vs0, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    xxmrghd vs34, vs61, vs1
+; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
+; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 64
-; P8-NEXT:    lxvd2x vs2, r1, r3 # 16-byte Folded Reload
+; P8-NEXT:    lxvd2x vs62, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 48
-; P8-NEXT:    xxmrghd vs35, vs0, vs2
-; P8-NEXT:    lxvd2x vs0, r1, r3 # 16-byte Folded Reload
-; P8-NEXT:    xxmrghd vs34, vs0, vs1
+; P8-NEXT:    lxvd2x vs61, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    addi r1, r1, 112
 ; P8-NEXT:    ld r0, 16(r1)
-; P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; P8-NEXT:    mtlr r0
 ; P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 93d0d296e51a1..42d9a8bd6f1b5 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -67,14 +67,13 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
 ;
 ; P9BE-LABEL: test_pre_inc_disable_1:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    lfd f0, 0(r5)
+; P9BE-NEXT:    lxsd v5, 0(r5)
 ; P9BE-NEXT:    addis r5, r2, .LCPI0_0 at toc@ha
 ; P9BE-NEXT:    xxlxor v3, v3, v3
 ; P9BE-NEXT:    li r6, 0
 ; P9BE-NEXT:    addi r5, r5, .LCPI0_0 at toc@l
 ; P9BE-NEXT:    lxvx v2, 0, r5
 ; P9BE-NEXT:    addis r5, r2, .LCPI0_1 at toc@ha
-; P9BE-NEXT:    xxlor v5, vs0, vs0
 ; P9BE-NEXT:    addi r5, r5, .LCPI0_1 at toc@l
 ; P9BE-NEXT:    lxvx v4, 0, r5
 ; P9BE-NEXT:    li r5, 4
@@ -87,10 +86,8 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
 ; P9BE-NEXT:    .p2align 4
 ; P9BE-NEXT:  .LBB0_1: # %for.cond1.preheader
 ; P9BE-NEXT:    #
-; P9BE-NEXT:    lfd f0, 0(r3)
+; P9BE-NEXT:    lxsd v1, 0(r3)
 ; P9BE-NEXT:    add r7, r3, r4
-; P9BE-NEXT:    xxlor v1, vs0, vs0
-; P9BE-NEXT:    lfdx f0, r3, r4
 ; P9BE-NEXT:    vperm v6, v3, v1, v4
 ; P9BE-NEXT:    vperm v1, v3, v1, v2
 ; P9BE-NEXT:    xvnegsp v1, v1
@@ -102,14 +99,14 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
 ; P9BE-NEXT:    vadduwm v1, v1, v6
 ; P9BE-NEXT:    xxspltw v6, v1, 1
 ; P9BE-NEXT:    vadduwm v1, v1, v6
-; P9BE-NEXT:    xxlor v6, vs0, vs0
+; P9BE-NEXT:    lxsdx v6, r3, r4
+; P9BE-NEXT:    vextuwlx r3, r5, v1
 ; P9BE-NEXT:    vperm v7, v3, v6, v4
 ; P9BE-NEXT:    vperm v6, v3, v6, v2
-; P9BE-NEXT:    vextuwlx r3, r5, v1
-; P9BE-NEXT:    xvnegsp v6, v6
 ; P9BE-NEXT:    add r6, r3, r6
-; P9BE-NEXT:    xvnegsp v1, v7
 ; P9BE-NEXT:    add r3, r7, r4
+; P9BE-NEXT:    xvnegsp v6, v6
+; P9BE-NEXT:    xvnegsp v1, v7
 ; P9BE-NEXT:    vabsduw v6, v6, v5
 ; P9BE-NEXT:    vabsduw v1, v1, v0
 ; P9BE-NEXT:    vadduwm v1, v1, v6
@@ -205,17 +202,15 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc
 ;
 ; P9BE-LABEL: test_pre_inc_disable_2:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    lfd f0, 0(r3)
+; P9BE-NEXT:    lxsd v2, 0(r3)
 ; P9BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
+; P9BE-NEXT:    lxsd v1, 0(r4)
 ; P9BE-NEXT:    xxlxor v3, v3, v3
 ; P9BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
 ; P9BE-NEXT:    lxvx v4, 0, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI1_1 at toc@ha
 ; P9BE-NEXT:    addi r3, r3, .LCPI1_1 at toc@l
-; P9BE-NEXT:    xxlor v2, vs0, vs0
-; P9BE-NEXT:    lfd f0, 0(r4)
 ; P9BE-NEXT:    lxvx v0, 0, r3
-; P9BE-NEXT:    xxlor v1, vs0, vs0
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vperm v5, v3, v2, v4
 ; P9BE-NEXT:    vperm v2, v3, v2, v0

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 21fc855aa8547..dc543aaa6132e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -261,41 +261,42 @@ define <2 x double> @constrained_vector_frem_v2f64() #0 {
 ; PC64LE-LABEL: constrained_vector_frem_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    addis 4, 2, .LCPI6_1 at toc@ha
+; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
 ; PC64LE-NEXT:    lfs 31, .LCPI6_1 at toc@l(4)
 ; PC64LE-NEXT:    lfs 1, .LCPI6_0 at toc@l(3)
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI6_2 at toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI6_2 at toc@l(3)
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    xxmrghd 34, 1, 63
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_frem_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 1, .LCPI6_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI6_1 at toc@ha
 ; PC64LE9-NEXT:    lfs 31, .LCPI6_1 at toc@l(3)
@@ -303,18 +304,17 @@ define <2 x double> @constrained_vector_frem_v2f64() #0 {
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI6_2 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    lfs 1, .LCPI6_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    xxmrghd 34, 1, 63
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -432,10 +432,10 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    addis 4, 2, .LCPI8_1 at toc@ha
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
 ; PC64LE-NEXT:    lfs 31, .LCPI8_1 at toc@l(4)
@@ -443,31 +443,27 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI8_2 at toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI8_2 at toc@l(3)
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 3, 2, .LCPI8_3 at toc@ha
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI8_3 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfs 1, .LCPI8_3 at toc@l(3)
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -476,10 +472,10 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 1, .LCPI8_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI8_1 at toc@ha
 ; PC64LE9-NEXT:    lfs 31, .LCPI8_1 at toc@l(3)
@@ -487,27 +483,25 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI8_2 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    lfs 1, .LCPI8_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI8_3 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfs 1, .LCPI8_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -528,47 +522,43 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 {
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    addis 4, 2, .LCPI9_1 at toc@ha
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lfs 31, .LCPI9_1 at toc@l(4)
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI9_1 at toc@l(4)
 ; PC64LE-NEXT:    lfs 1, .LCPI9_0 at toc@l(3)
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI9_2 at toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI9_2 at toc@l(3)
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_3 at toc@ha
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI9_3 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfs 1, .LCPI9_3 at toc@l(3)
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI9_4 at toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI9_4 at toc@l(3)
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 35, 1, 62
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -581,41 +571,39 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 {
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
 ; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 1, .LCPI9_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_1 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 31, .LCPI9_1 at toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_2 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    lfs 1, .LCPI9_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_3 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfs 1, .LCPI9_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_4 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    lfs 1, .LCPI9_4 at toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 62
 ; PC64LE9-NEXT:    vmr 2, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -1490,41 +1478,42 @@ define <2 x double> @constrained_vector_pow_v2f64() #0 {
 ; PC64LE-LABEL: constrained_vector_pow_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    addis 4, 2, .LCPI31_1 at toc@ha
+; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
 ; PC64LE-NEXT:    lfs 31, .LCPI31_1 at toc@l(4)
 ; PC64LE-NEXT:    lfd 1, .LCPI31_0 at toc@l(3)
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI31_2 at toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI31_2 at toc@l(3)
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    xxmrghd 34, 1, 63
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_pow_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI31_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI31_1 at toc@ha
 ; PC64LE9-NEXT:    lfs 31, .LCPI31_1 at toc@l(3)
@@ -1532,18 +1521,17 @@ define <2 x double> @constrained_vector_pow_v2f64() #0 {
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI31_2 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    lfd 1, .LCPI31_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    xxmrghd 34, 1, 63
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
@@ -1661,10 +1649,10 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    addis 4, 2, .LCPI33_1 at toc@ha
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI33_0 at toc@ha
 ; PC64LE-NEXT:    lfs 31, .LCPI33_1 at toc@l(4)
@@ -1672,31 +1660,27 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI33_2 at toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI33_2 at toc@l(3)
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 3, 2, .LCPI33_3 at toc@ha
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI33_3 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI33_3 at toc@l(3)
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -1705,10 +1689,10 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI33_0 at toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 1, .LCPI33_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI33_1 at toc@ha
 ; PC64LE9-NEXT:    lfs 31, .LCPI33_1 at toc@l(3)
@@ -1716,27 +1700,25 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI33_2 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    lfd 1, .LCPI33_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI33_3 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfd 1, .LCPI33_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -1757,47 +1739,43 @@ define <4 x double> @constrained_vector_pow_v4f64() #0 {
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    addis 4, 2, .LCPI34_1 at toc@ha
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lfs 31, .LCPI34_1 at toc@l(4)
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI34_0 at toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI34_1 at toc@l(4)
 ; PC64LE-NEXT:    lfd 1, .LCPI34_0 at toc@l(3)
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI34_2 at toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI34_2 at toc@l(3)
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_3 at toc@ha
 ; PC64LE-NEXT:    fmr 2, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI34_3 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI34_3 at toc@l(3)
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI34_4 at toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI34_4 at toc@l(3)
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 35, 1, 62
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1810,41 +1788,39 @@ define <4 x double> @constrained_vector_pow_v4f64() #0 {
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_0 at toc@ha
 ; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI34_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_1 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 31, .LCPI34_1 at toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_2 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    lfd 1, .LCPI34_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_3 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfd 1, .LCPI34_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_4 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    lfd 1, .LCPI34_4 at toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 62
 ; PC64LE9-NEXT:    vmr 2, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
 ; PC64LE9-NEXT:    addi 1, 1, 80
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -1905,23 +1881,23 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI36_0 at toc@ha
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI36_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI36_0 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI36_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
+; PC64LE-NEXT:    li 4, 3
 ; PC64LE-NEXT:    lfd 1, .LCPI36_1 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    xxmrghd 34, 1, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1934,19 +1910,19 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI36_0 at toc@ha
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI36_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI36_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    lfd 1, .LCPI36_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    xxmrghd 34, 1, 63
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2060,38 +2036,34 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    li 4, 3
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI38_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI38_0 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI38_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
+; PC64LE-NEXT:    li 4, 3
 ; PC64LE-NEXT:    lfs 1, .LCPI38_1 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 3, 2, .LCPI38_2 at toc@ha
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI38_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI38_2 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -2100,34 +2072,32 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI38_0 at toc@ha
 ; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI38_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI38_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    lfs 1, .LCPI38_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI38_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI38_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -2146,45 +2116,41 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI39_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI39_0 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI39_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
+; PC64LE-NEXT:    li 4, 3
 ; PC64LE-NEXT:    lfd 1, .LCPI39_1 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_2 at toc@ha
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI39_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE-NEXT:    lfd 1, .LCPI39_2 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI39_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
+; PC64LE-NEXT:    li 4, 3
 ; PC64LE-NEXT:    lfd 1, .LCPI39_3 at toc@l(3)
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 2, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 35, 1, 62
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2197,37 +2163,35 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI39_0 at toc@ha
 ; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI39_0 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI39_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    lfd 1, .LCPI39_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI39_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfd 1, .LCPI39_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI39_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    lfd 1, .LCPI39_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 62
 ; PC64LE9-NEXT:    vmr 2, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2284,21 +2248,21 @@ define <2 x double> @constrained_vector_sin_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI41_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI41_0 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI41_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI41_1 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    xxmrghd 34, 63, 1
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2310,18 +2274,18 @@ define <2 x double> @constrained_vector_sin_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI41_0 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI41_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI41_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI41_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    xxmrghd 34, 63, 1
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2425,35 +2389,31 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI43_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI43_0 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI43_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI43_1 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI43_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI43_2 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -2462,31 +2422,29 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI43_0 at toc@ha
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI43_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI43_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI43_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI43_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI43_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -2504,41 +2462,37 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI44_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI44_0 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI44_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI44_1 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI44_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI44_2 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI44_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI44_3 at toc@l(3)
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 3, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2550,34 +2504,32 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI44_0 at toc@ha
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI44_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI44_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI44_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI44_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    lfd 1, .LCPI44_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI44_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI44_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    vmr 3, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2633,21 +2585,21 @@ define <2 x double> @constrained_vector_cos_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI46_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI46_0 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI46_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI46_1 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    xxmrghd 34, 63, 1
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2659,18 +2611,18 @@ define <2 x double> @constrained_vector_cos_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI46_0 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI46_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI46_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI46_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    xxmrghd 34, 63, 1
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2774,35 +2726,31 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI48_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI48_0 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI48_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI48_1 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI48_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI48_2 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -2811,31 +2759,29 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI48_0 at toc@ha
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI48_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI48_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI48_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI48_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI48_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -2853,41 +2799,37 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI49_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI49_0 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI49_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI49_1 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI49_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI49_2 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI49_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI49_3 at toc@l(3)
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 3, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -2899,34 +2841,32 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI49_0 at toc@ha
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI49_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI49_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI49_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI49_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    lfd 1, .LCPI49_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI49_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI49_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    vmr 3, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -2982,21 +2922,21 @@ define <2 x double> @constrained_vector_exp_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI51_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI51_0 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI51_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI51_1 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    xxmrghd 34, 63, 1
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3008,18 +2948,18 @@ define <2 x double> @constrained_vector_exp_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI51_0 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI51_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI51_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI51_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    xxmrghd 34, 63, 1
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3123,35 +3063,31 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI53_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI53_0 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI53_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI53_1 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI53_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI53_2 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -3160,31 +3096,29 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI53_0 at toc@ha
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI53_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI53_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI53_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI53_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI53_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -3202,41 +3136,37 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI54_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI54_0 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI54_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI54_1 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI54_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI54_2 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI54_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI54_3 at toc@l(3)
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 3, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3248,34 +3178,32 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI54_0 at toc@ha
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI54_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI54_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI54_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI54_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    lfd 1, .LCPI54_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI54_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI54_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    vmr 3, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3331,21 +3259,21 @@ define <2 x double> @constrained_vector_exp2_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI56_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI56_0 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI56_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI56_1 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    xxmrghd 34, 1, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3357,18 +3285,18 @@ define <2 x double> @constrained_vector_exp2_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI56_0 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI56_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI56_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI56_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    xxmrghd 34, 1, 63
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3472,35 +3400,31 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI58_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI58_0 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI58_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI58_1 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI58_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI58_2 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -3509,31 +3433,29 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI58_0 at toc@ha
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI58_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI58_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI58_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI58_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI58_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -3551,41 +3473,37 @@ define <4 x double> @constrained_vector_exp2_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI59_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI59_0 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI59_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI59_1 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI59_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI59_2 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI59_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI59_3 at toc@l(3)
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 2, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 35, 1, 62
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3597,34 +3515,32 @@ define <4 x double> @constrained_vector_exp2_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI59_0 at toc@ha
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI59_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI59_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI59_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI59_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    lfd 1, .LCPI59_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI59_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI59_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 62
 ; PC64LE9-NEXT:    vmr 2, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3680,21 +3596,21 @@ define <2 x double> @constrained_vector_log_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI61_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI61_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI61_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI61_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    xxmrghd 34, 63, 1
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3706,18 +3622,18 @@ define <2 x double> @constrained_vector_log_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI61_0 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI61_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI61_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI61_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    xxmrghd 34, 63, 1
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -3821,35 +3737,31 @@ define <3 x double> @constrained_vector_log_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI63_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI63_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI63_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI63_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI63_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI63_2 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -3858,31 +3770,29 @@ define <3 x double> @constrained_vector_log_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI63_0 at toc@ha
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI63_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI63_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI63_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI63_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI63_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -3900,41 +3810,37 @@ define <4 x double> @constrained_vector_log_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI64_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI64_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI64_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI64_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI64_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI64_2 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI64_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI64_3 at toc@l(3)
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 3, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -3946,34 +3852,32 @@ define <4 x double> @constrained_vector_log_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI64_0 at toc@ha
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI64_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI64_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI64_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI64_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    lfd 1, .LCPI64_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI64_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI64_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    vmr 3, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4029,21 +3933,21 @@ define <2 x double> @constrained_vector_log10_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI66_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI66_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI66_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI66_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    xxmrghd 34, 63, 1
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4055,18 +3959,18 @@ define <2 x double> @constrained_vector_log10_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI66_0 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI66_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI66_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI66_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    xxmrghd 34, 63, 1
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4170,35 +4074,31 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI68_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI68_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI68_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI68_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI68_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI68_2 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -4207,31 +4107,29 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI68_0 at toc@ha
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI68_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI68_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI68_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI68_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI68_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -4249,41 +4147,37 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI69_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI69_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI69_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI69_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI69_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI69_2 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI69_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI69_3 at toc@l(3)
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 3, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4295,34 +4189,32 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI69_0 at toc@ha
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI69_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI69_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI69_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI69_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    lfd 1, .LCPI69_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI69_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI69_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    vmr 3, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4378,21 +4270,21 @@ define <2 x double> @constrained_vector_log2_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI71_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI71_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI71_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI71_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    xxmrghd 34, 63, 1
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4404,18 +4296,18 @@ define <2 x double> @constrained_vector_log2_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI71_0 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI71_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI71_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI71_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    xxmrghd 34, 63, 1
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4519,35 +4411,31 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI73_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI73_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI73_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI73_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI73_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI73_2 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -4556,31 +4444,29 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI73_0 at toc@ha
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI73_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI73_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI73_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI73_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI73_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -4598,41 +4484,37 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI74_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI74_0 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI74_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI74_1 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI74_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI74_2 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI74_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI74_3 at toc@l(3)
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 3, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4644,34 +4526,32 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI74_0 at toc@ha
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI74_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI74_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI74_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI74_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    lfd 1, .LCPI74_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI74_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI74_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    vmr 3, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -4902,21 +4782,21 @@ define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI81_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI81_0 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI81_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI81_1 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    xxmrghd 34, 1, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -4928,18 +4808,18 @@ define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI81_0 at toc@ha
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI81_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI81_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI81_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    xxmrghd 34, 1, 63
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -5043,35 +4923,31 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI83_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI83_0 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI83_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfs 1, .LCPI83_1 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI83_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI83_2 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    addi 1, 1, 64
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
@@ -5080,31 +4956,29 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI83_0 at toc@ha
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI83_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI83_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI83_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI83_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    xxmrghd 63, 63, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI83_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
@@ -5122,41 +4996,37 @@ define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI84_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI84_0 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI84_1 at toc@ha
+; PC64LE-NEXT:    xxlor 63, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI84_1 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addis 3, 2, .LCPI84_2 at toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    lfd 1, .LCPI84_2 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI84_3 at toc@ha
+; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    lfd 1, .LCPI84_3 at toc@l(3)
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    vmr 2, 31
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxmrghd 35, 1, 62
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    lxvd2x 62, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -5168,34 +5038,32 @@ define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI84_0 at toc@ha
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI84_0 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI84_1 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 63, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI84_1 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI84_2 at toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    lfd 1, .LCPI84_2 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI84_3 at toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI84_3 at toc@l(3)
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 62
 ; PC64LE9-NEXT:    vmr 2, 31
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0


        


More information about the llvm-commits mailing list