[llvm] 52a774f - [PowerPC] remove XXSWAPD after load from CP which is a splat value
Ting Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 16 16:21:55 PST 2023
Author: Ting Wang
Date: 2023-02-16T19:21:35-05:00
New Revision: 52a774fd4c18abaa9bbe23944843dbfde26c4369
URL: https://github.com/llvm/llvm-project/commit/52a774fd4c18abaa9bbe23944843dbfde26c4369
DIFF: https://github.com/llvm/llvm-project/commit/52a774fd4c18abaa9bbe23944843dbfde26c4369.diff
LOG: [PowerPC] remove XXSWAPD after load from CP which is a splat value
If the value from constant-pool is a splat value of vector type, do not
need swap after load from constant-pool.
Reviewed By: shchenz
Differential Revision: https://reviews.llvm.org/D139491
Added:
Modified:
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
llvm/test/CodeGen/PowerPC/build-vector-tests.ll
llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
llvm/test/CodeGen/PowerPC/combine-fneg.ll
llvm/test/CodeGen/PowerPC/fma-combine.ll
llvm/test/CodeGen/PowerPC/fp-classify.ll
llvm/test/CodeGen/PowerPC/mul-const-vector.ll
llvm/test/CodeGen/PowerPC/pr25080.ll
llvm/test/CodeGen/PowerPC/pr47891.ll
llvm/test/CodeGen/PowerPC/recipest.ll
llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
llvm/test/CodeGen/PowerPC/sat-add.ll
llvm/test/CodeGen/PowerPC/signbit-shift.ll
llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
llvm/test/CodeGen/PowerPC/vec-itofp.ll
llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
llvm/test/CodeGen/PowerPC/vector-extend-sign.ll
llvm/test/CodeGen/PowerPC/vsx.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 7c95f3ca2b4cf..667b6c23f8edd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -279,11 +279,9 @@ class PPCInstrInfo : public PPCGenInstrInfo {
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
- bool isLoadFromConstantPool(MachineInstr *I) const;
Register
generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty,
SmallVectorImpl<MachineInstr *> &InsInstrs) const;
- const Constant *getConstantFromConstantPool(MachineInstr *I) const;
virtual void anchor();
protected:
@@ -304,6 +302,9 @@ class PPCInstrInfo : public PPCGenInstrInfo {
public:
explicit PPCInstrInfo(PPCSubtarget &STI);
+ bool isLoadFromConstantPool(MachineInstr *I) const;
+ const Constant *getConstantFromConstantPool(MachineInstr *I) const;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index af35669d0fab6..b9cb0a29a9511 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -653,6 +653,20 @@ bool PPCMIPeephole::simplifyCode() {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
.add(MI.getOperand(1));
+ } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
+ TII->isLoadFromConstantPool(DefMI)) {
+ const Constant *C = TII->getConstantFromConstantPool(DefMI);
+ if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
+ ToErase = &MI;
+ Simplified = true;
+ LLVM_DEBUG(dbgs()
+ << "Optimizing swap(splat pattern from constant-pool) "
+ "=> copy(splat pattern from constant-pool): ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ }
}
break;
}
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index fd41c430ede70..691738c298d8a 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -3804,8 +3804,7 @@ define <2 x i64> @spltConst1ll() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI65_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI65_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 1, i64 1>
@@ -3837,8 +3836,7 @@ define <2 x i64> @spltConst16kll() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI66_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI66_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 32767, i64 32767>
@@ -3870,8 +3868,7 @@ define <2 x i64> @spltConst32kll() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI67_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI67_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 65535, i64 65535>
@@ -4267,8 +4264,7 @@ define <2 x i64> @spltCnstConvftoll() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI78_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI78_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 4, i64 4>
@@ -4629,8 +4625,7 @@ define <2 x i64> @spltCnstConvdtoll() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI87_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI87_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 4, i64 4>
@@ -4991,8 +4986,7 @@ define <2 x i64> @spltConst1ull() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI97_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI97_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 1, i64 1>
@@ -5024,8 +5018,7 @@ define <2 x i64> @spltConst16kull() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI98_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI98_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 32767, i64 32767>
@@ -5057,8 +5050,7 @@ define <2 x i64> @spltConst32kull() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI99_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI99_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 65535, i64 65535>
@@ -5454,8 +5446,7 @@ define <2 x i64> @spltCnstConvftoull() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI110_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI110_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 4, i64 4>
@@ -5816,8 +5807,7 @@ define <2 x i64> @spltCnstConvdtoull() {
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r3, r2, .LCPI119_0 at toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI119_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r3
-; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: lxvd2x v2, 0, r3
; P8LE-NEXT: blr
entry:
ret <2 x i64> <i64 4, i64 4>
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index f5881c37e0333..070d058dceb0b 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -731,8 +731,7 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0 at toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0 at toc@l
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: lxvd2x v3, 0, r3
; CHECK-P8-NEXT: vmrgow v2, v3, v2
; CHECK-P8-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index 771c05f184a04..8c9e1c21474a0 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -5,20 +5,19 @@
define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
; CHECK-LABEL: fneg_fdiv_splat:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxspltd 0, 1, 0
+; CHECK-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-NEXT: lxvd2x 1, 0, 3
-; CHECK-NEXT: xvredp 2, 0
-; CHECK-NEXT: xxswapd 1, 1
-; CHECK-NEXT: xxlor 3, 1, 1
-; CHECK-NEXT: xvnmsubadp 3, 0, 2
-; CHECK-NEXT: xvmaddadp 2, 2, 3
-; CHECK-NEXT: xvnmsubadp 1, 0, 2
-; CHECK-NEXT: xvnmaddadp 2, 2, 1
-; CHECK-NEXT: xvmuldp 34, 34, 2
-; CHECK-NEXT: xvmuldp 35, 35, 2
+; CHECK-NEXT: lxvd2x 2, 0, 3
+; CHECK-NEXT: xvredp 1, 0
+; CHECK-NEXT: xxlor 3, 2, 2
+; CHECK-NEXT: xvnmsubadp 3, 0, 1
+; CHECK-NEXT: xvmaddadp 1, 1, 3
+; CHECK-NEXT: xvnmsubadp 2, 0, 1
+; CHECK-NEXT: xvnmaddadp 1, 1, 2
+; CHECK-NEXT: xvmuldp 34, 34, 1
+; CHECK-NEXT: xvmuldp 35, 35, 1
; CHECK-NEXT: blr
entry:
%splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0
diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll
index 08cd940ae4445..5423ecf798266 100644
--- a/llvm/test/CodeGen/PowerPC/fma-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll
@@ -291,7 +291,6 @@ define <2 x double> @vec_neg_fma_flag_propagation(<2 x double> %a) {
; CHECK-FAST-NEXT: addis 3, 2, .LCPI8_0 at toc@ha
; CHECK-FAST-NEXT: addi 3, 3, .LCPI8_0 at toc@l
; CHECK-FAST-NEXT: lxvd2x 0, 0, 3
-; CHECK-FAST-NEXT: xxswapd 0, 0
; CHECK-FAST-NEXT: xvmaddadp 34, 34, 0
; CHECK-FAST-NEXT: blr
;
@@ -307,7 +306,6 @@ define <2 x double> @vec_neg_fma_flag_propagation(<2 x double> %a) {
; CHECK-NEXT: addis 3, 2, .LCPI8_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI8_0 at toc@l
; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 0, 0
; CHECK-NEXT: xvmaddadp 34, 34, 0
; CHECK-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/PowerPC/fp-classify.ll b/llvm/test/CodeGen/PowerPC/fp-classify.ll
index d518b29949242..796a9be06bf9c 100644
--- a/llvm/test/CodeGen/PowerPC/fp-classify.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-classify.ll
@@ -109,11 +109,10 @@ entry:
define <4 x i1> @abs_isinfv4f32(<4 x float> %x) {
; P8-LABEL: abs_isinfv4f32:
; P8: # %bb.0: # %entry
-; P8-NEXT: addis 3, 2, .LCPI3_0 at toc@ha
; P8-NEXT: xvabssp 0, 34
+; P8-NEXT: addis 3, 2, .LCPI3_0 at toc@ha
; P8-NEXT: addi 3, 3, .LCPI3_0 at toc@l
; P8-NEXT: lxvd2x 1, 0, 3
-; P8-NEXT: xxswapd 1, 1
; P8-NEXT: xvcmpeqsp 34, 0, 1
; P8-NEXT: blr
;
@@ -134,11 +133,10 @@ entry:
define <2 x i1> @abs_isinfv2f64(<2 x double> %x) {
; P8-LABEL: abs_isinfv2f64:
; P8: # %bb.0: # %entry
-; P8-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
; P8-NEXT: xvabsdp 0, 34
+; P8-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
; P8-NEXT: addi 3, 3, .LCPI4_0 at toc@l
; P8-NEXT: lxvd2x 1, 0, 3
-; P8-NEXT: xxswapd 1, 1
; P8-NEXT: xvcmpeqdp 34, 0, 1
; P8-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/mul-const-vector.ll b/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
index 3b1f463821920..e2ddef8b49758 100644
--- a/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
@@ -275,8 +275,7 @@ define <2 x i64> @test1_v2i64(<2 x i64> %a) {
ret <2 x i64> %tmp.1
}
; CHECK-LABEL: test1_v2i64:
-; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v{{[0-9]+}}, v2, v[[REG2]]
@@ -287,8 +286,7 @@ define <2 x i64> @test2_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test2_v2i64:
-; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
@@ -300,8 +298,7 @@ define <2 x i64> @test3_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test3_v2i64:
-; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
@@ -315,8 +312,7 @@ define <2 x i64> @test4_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test4_v2i64:
-; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
@@ -330,8 +326,7 @@ define <2 x i64> @test5_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test5_v2i64:
-; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
@@ -346,8 +341,7 @@ define <2 x i64> @test6_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test6_v2i64:
-; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
@@ -362,8 +356,7 @@ define <2 x i64> @test7_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test7_v2i64:
-; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG4:[0-9]+]], v2, v[[REG2]]
@@ -374,8 +367,7 @@ define <2 x i64> @test8_v2i64(<2 x i64> %a) {
}
; CHECK-LABEL: test8_v2i64:
-; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
-; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
; CHECK-NOT: vmul
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll
index 0f78f80866137..4d9dc128a0c6d 100644
--- a/llvm/test/CodeGen/PowerPC/pr25080.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25080.ll
@@ -6,14 +6,13 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
; LE-LABEL: pr25080:
; LE: # %bb.0: # %entry
; LE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
-; LE-NEXT: xxlxor 37, 37, 37
+; LE-NEXT: xxlxor 36, 36, 36
; LE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
; LE-NEXT: lxvd2x 0, 0, 3
-; LE-NEXT: xxswapd 36, 0
-; LE-NEXT: xxland 34, 34, 36
-; LE-NEXT: xxland 35, 35, 36
-; LE-NEXT: vcmpequw 2, 2, 5
-; LE-NEXT: vcmpequw 3, 3, 5
+; LE-NEXT: xxland 34, 34, 0
+; LE-NEXT: xxland 35, 35, 0
+; LE-NEXT: vcmpequw 2, 2, 4
+; LE-NEXT: vcmpequw 3, 3, 4
; LE-NEXT: xxswapd 0, 34
; LE-NEXT: mfvsrwz 3, 34
; LE-NEXT: xxsldwi 1, 34, 34, 1
diff --git a/llvm/test/CodeGen/PowerPC/pr47891.ll b/llvm/test/CodeGen/PowerPC/pr47891.ll
index 17ddf8d6cbd5a..13626638639a5 100644
--- a/llvm/test/CodeGen/PowerPC/pr47891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47891.ll
@@ -8,56 +8,54 @@ define dso_local void @poly2_lshift1(ptr nocapture %p) local_unnamed_addr #0 {
; CHECK-LABEL: poly2_lshift1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li r4, 72
-; CHECK-NEXT: addis r5, r2, .LCPI0_0 at toc@ha
-; CHECK-NEXT: addis r6, r2, .LCPI0_1 at toc@ha
-; CHECK-NEXT: ld r7, 64(r3)
-; CHECK-NEXT: ld r8, 16(r3)
+; CHECK-NEXT: ld r5, 64(r3)
+; CHECK-NEXT: addis r6, r2, .LCPI0_0 at toc@ha
+; CHECK-NEXT: addis r7, r2, .LCPI0_1 at toc@ha
+; CHECK-NEXT: ld r8, 0(r3)
; CHECK-NEXT: ld r10, 24(r3)
; CHECK-NEXT: ld r11, 32(r3)
; CHECK-NEXT: lxvd2x vs0, r3, r4
-; CHECK-NEXT: addi r5, r5, .LCPI0_0 at toc@l
-; CHECK-NEXT: addi r6, r6, .LCPI0_1 at toc@l
+; CHECK-NEXT: addi r6, r6, .LCPI0_0 at toc@l
+; CHECK-NEXT: addi r7, r7, .LCPI0_1 at toc@l
; CHECK-NEXT: ld r12, 56(r3)
-; CHECK-NEXT: lxvd2x vs1, 0, r5
-; CHECK-NEXT: mtfprd f2, r7
-; CHECK-NEXT: ld r5, 0(r3)
-; CHECK-NEXT: xxswapd v2, vs0
-; CHECK-NEXT: lxvd2x vs0, 0, r6
+; CHECK-NEXT: lxvd2x v3, 0, r6
+; CHECK-NEXT: lxvd2x v5, 0, r7
; CHECK-NEXT: ld r6, 8(r3)
-; CHECK-NEXT: rotldi r9, r5, 1
-; CHECK-NEXT: sldi r5, r5, 1
-; CHECK-NEXT: xxswapd v3, vs1
-; CHECK-NEXT: std r5, 0(r3)
-; CHECK-NEXT: rotldi r5, r10, 1
+; CHECK-NEXT: ld r7, 16(r3)
+; CHECK-NEXT: rotldi r9, r8, 1
+; CHECK-NEXT: sldi r8, r8, 1
+; CHECK-NEXT: std r8, 0(r3)
+; CHECK-NEXT: rotldi r8, r10, 1
+; CHECK-NEXT: xxswapd v2, vs0
+; CHECK-NEXT: mtfprd f0, r5
; CHECK-NEXT: rldimi r9, r6, 1, 0
; CHECK-NEXT: rotldi r6, r6, 1
-; CHECK-NEXT: xxpermdi v4, v2, vs2, 2
-; CHECK-NEXT: xxswapd v5, vs0
-; CHECK-NEXT: rldimi r6, r8, 1, 0
-; CHECK-NEXT: rotldi r8, r8, 1
+; CHECK-NEXT: rldimi r6, r7, 1, 0
+; CHECK-NEXT: rotldi r7, r7, 1
; CHECK-NEXT: std r9, 8(r3)
; CHECK-NEXT: ld r9, 40(r3)
-; CHECK-NEXT: rldimi r8, r10, 1, 0
-; CHECK-NEXT: rldimi r5, r11, 1, 0
+; CHECK-NEXT: rldimi r7, r10, 1, 0
+; CHECK-NEXT: rldimi r8, r11, 1, 0
; CHECK-NEXT: std r6, 16(r3)
+; CHECK-NEXT: xxpermdi v4, v2, vs0, 2
+; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: rotldi r10, r11, 1
; CHECK-NEXT: ld r11, 48(r3)
-; CHECK-NEXT: std r5, 32(r3)
-; CHECK-NEXT: rotldi r6, r12, 1
-; CHECK-NEXT: vsrd v3, v4, v3
+; CHECK-NEXT: std r7, 24(r3)
+; CHECK-NEXT: rotldi r7, r12, 1
; CHECK-NEXT: rldimi r10, r9, 1, 0
; CHECK-NEXT: rotldi r9, r9, 1
-; CHECK-NEXT: std r8, 24(r3)
-; CHECK-NEXT: vsld v2, v2, v5
-; CHECK-NEXT: rotldi r5, r11, 1
+; CHECK-NEXT: std r8, 32(r3)
+; CHECK-NEXT: rotldi r6, r11, 1
; CHECK-NEXT: rldimi r9, r11, 1, 0
; CHECK-NEXT: std r10, 40(r3)
-; CHECK-NEXT: rldimi r5, r12, 1, 0
-; CHECK-NEXT: rldimi r6, r7, 1, 0
+; CHECK-NEXT: vsrd v3, v4, v3
+; CHECK-NEXT: rldimi r6, r12, 1, 0
+; CHECK-NEXT: rldimi r7, r5, 1, 0
; CHECK-NEXT: std r9, 48(r3)
+; CHECK-NEXT: std r6, 56(r3)
+; CHECK-NEXT: std r7, 64(r3)
; CHECK-NEXT: xxlor vs0, v2, v3
-; CHECK-NEXT: std r5, 56(r3)
-; CHECK-NEXT: std r6, 64(r3)
; CHECK-NEXT: xxswapd vs0, vs0
; CHECK-NEXT: stxvd2x vs0, r3, r4
; CHECK-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index ca13ce0216723..63f9c73a9ff77 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -487,15 +487,13 @@ define <4 x float> @hoo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-P8-NEXT: addis 3, 2, .LCPI12_0 at toc@ha
; CHECK-P8-NEXT: addis 4, 2, .LCPI12_1 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI12_0 at toc@l
-; CHECK-P8-NEXT: lxvd2x 1, 0, 3
+; CHECK-P8-NEXT: lxvd2x 2, 0, 3
; CHECK-P8-NEXT: addi 3, 4, .LCPI12_1 at toc@l
; CHECK-P8-NEXT: lxvd2x 3, 0, 3
-; CHECK-P8-NEXT: xxswapd 1, 1
-; CHECK-P8-NEXT: xvmulsp 2, 35, 0
-; CHECK-P8-NEXT: xxswapd 35, 3
-; CHECK-P8-NEXT: xvmaddasp 1, 2, 0
-; CHECK-P8-NEXT: xvmulsp 0, 0, 35
-; CHECK-P8-NEXT: xvmulsp 0, 0, 1
+; CHECK-P8-NEXT: xvmulsp 1, 35, 0
+; CHECK-P8-NEXT: xvmaddasp 2, 1, 0
+; CHECK-P8-NEXT: xvmulsp 0, 0, 3
+; CHECK-P8-NEXT: xvmulsp 0, 0, 2
; CHECK-P8-NEXT: xvmulsp 34, 34, 0
; CHECK-P8-NEXT: blr
;
@@ -1046,15 +1044,13 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
; CHECK-P8-NEXT: addis 3, 2, .LCPI25_0 at toc@ha
; CHECK-P8-NEXT: addis 4, 2, .LCPI25_1 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI25_0 at toc@l
-; CHECK-P8-NEXT: lxvd2x 1, 0, 3
+; CHECK-P8-NEXT: lxvd2x 2, 0, 3
; CHECK-P8-NEXT: addi 3, 4, .LCPI25_1 at toc@l
; CHECK-P8-NEXT: lxvd2x 3, 0, 3
-; CHECK-P8-NEXT: xxswapd 1, 1
-; CHECK-P8-NEXT: xvmulsp 2, 34, 0
-; CHECK-P8-NEXT: xxswapd 34, 3
-; CHECK-P8-NEXT: xvmaddasp 1, 2, 0
-; CHECK-P8-NEXT: xvmulsp 0, 2, 34
-; CHECK-P8-NEXT: xvmulsp 34, 0, 1
+; CHECK-P8-NEXT: xvmulsp 1, 34, 0
+; CHECK-P8-NEXT: xvmaddasp 2, 1, 0
+; CHECK-P8-NEXT: xvmulsp 0, 1, 3
+; CHECK-P8-NEXT: xvmulsp 34, 0, 2
; CHECK-P8-NEXT: blr
; CHECK-P8-NEXT: .LBB25_2:
; CHECK-P8-NEXT: xvsqrtsp 34, 34
@@ -1166,21 +1162,19 @@ define <2 x double> @hoo4_fmf(<2 x double> %a) #1 {
; CHECK-P8-NEXT: xvrsqrtedp 0, 34
; CHECK-P8-NEXT: addis 3, 2, .LCPI27_0 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI27_0 at toc@l
-; CHECK-P8-NEXT: lxvd2x 1, 0, 3
+; CHECK-P8-NEXT: lxvd2x 2, 0, 3
; CHECK-P8-NEXT: addis 3, 2, .LCPI27_1 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI27_1 at toc@l
; CHECK-P8-NEXT: lxvd2x 3, 0, 3
-; CHECK-P8-NEXT: xxswapd 1, 1
-; CHECK-P8-NEXT: xvmuldp 2, 34, 0
-; CHECK-P8-NEXT: xxswapd 3, 3
-; CHECK-P8-NEXT: xxlor 4, 1, 1
-; CHECK-P8-NEXT: xvmaddadp 4, 2, 0
+; CHECK-P8-NEXT: xxlor 4, 2, 2
+; CHECK-P8-NEXT: xvmuldp 1, 34, 0
+; CHECK-P8-NEXT: xvmaddadp 4, 1, 0
; CHECK-P8-NEXT: xvmuldp 0, 0, 3
; CHECK-P8-NEXT: xvmuldp 0, 0, 4
-; CHECK-P8-NEXT: xvmuldp 2, 34, 0
-; CHECK-P8-NEXT: xvmaddadp 1, 2, 0
-; CHECK-P8-NEXT: xvmuldp 0, 2, 3
-; CHECK-P8-NEXT: xvmuldp 34, 0, 1
+; CHECK-P8-NEXT: xvmuldp 1, 34, 0
+; CHECK-P8-NEXT: xvmaddadp 2, 1, 0
+; CHECK-P8-NEXT: xvmuldp 0, 1, 3
+; CHECK-P8-NEXT: xvmuldp 34, 0, 2
; CHECK-P8-NEXT: blr
; CHECK-P8-NEXT: .LBB27_2:
; CHECK-P8-NEXT: xvsqrtdp 34, 34
diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
index 17dafc1caef52..68db90ad2e198 100644
--- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
+++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
@@ -11,10 +11,9 @@ define <4 x float> @repeated_fp_divisor_noest(float %a, <4 x float> %b) {
; CHECK-NEXT: lxvd2x 1, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0 at toc@l
-; CHECK-NEXT: xxswapd 35, 1
-; CHECK-NEXT: lxvd2x 1, 0, 3
; CHECK-NEXT: xxspltw 0, 0, 0
-; CHECK-NEXT: xvdivsp 0, 35, 0
+; CHECK-NEXT: xvdivsp 0, 1, 0
+; CHECK-NEXT: lxvd2x 1, 0, 3
; CHECK-NEXT: xxswapd 35, 1
; CHECK-NEXT: xvmulsp 1, 34, 35
; CHECK-NEXT: xvmulsp 34, 1, 0
@@ -32,18 +31,17 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
; CHECK-NEXT: xscvdpspn 0, 1
; CHECK-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI1_0 at toc@l
-; CHECK-NEXT: lxvd2x 1, 0, 3
+; CHECK-NEXT: lxvd2x 2, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI1_1 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI1_1 at toc@l
-; CHECK-NEXT: xxswapd 1, 1
; CHECK-NEXT: xxspltw 0, 0, 0
-; CHECK-NEXT: xvresp 2, 0
-; CHECK-NEXT: xvmaddasp 1, 0, 2
+; CHECK-NEXT: xvresp 1, 0
+; CHECK-NEXT: xvmaddasp 2, 0, 1
; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: xxswapd 35, 0
-; CHECK-NEXT: xvnmsubasp 2, 2, 1
+; CHECK-NEXT: xvnmsubasp 1, 1, 2
; CHECK-NEXT: xvmulsp 0, 34, 35
-; CHECK-NEXT: xvmulsp 34, 0, 2
+; CHECK-NEXT: xvmulsp 34, 0, 1
; CHECK-NEXT: blr
%ins = insertelement <4 x float> undef, float %a, i32 0
%splat = shufflevector <4 x float> %ins, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index fef7de598fca3..766f48809304c 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -379,13 +379,11 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI24_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI24_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI24_1 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI24_1 at toc@l
-; CHECK-NEXT: xxswapd 35, 0
-; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vminub 2, 2, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vaddubm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
@@ -399,8 +397,7 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI25_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI25_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vaddubs 2, 2, 3
; CHECK-NEXT: blr
%a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
@@ -414,8 +411,7 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI26_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI26_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vaddubs 2, 2, 3
; CHECK-NEXT: blr
%a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
@@ -429,13 +425,11 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI27_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI27_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI27_1 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI27_1 at toc@l
-; CHECK-NEXT: xxswapd 35, 0
-; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vminuh 2, 2, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduhm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
@@ -449,8 +443,7 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI28_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI28_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduhs 2, 2, 3
; CHECK-NEXT: blr
%a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
@@ -464,8 +457,7 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI29_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI29_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduhs 2, 2, 3
; CHECK-NEXT: blr
%a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
@@ -479,13 +471,11 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI30_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI30_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI30_1 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI30_1 at toc@l
-; CHECK-NEXT: xxswapd 35, 0
-; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vminuw 2, 2, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
@@ -499,8 +489,7 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI31_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI31_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduws 2, 2, 3
; CHECK-NEXT: blr
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
@@ -514,8 +503,7 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI32_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI32_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduws 2, 2, 3
; CHECK-NEXT: blr
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
@@ -529,13 +517,11 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI33_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI33_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI33_1 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI33_1 at toc@l
-; CHECK-NEXT: xxswapd 35, 0
-; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vminud 2, 2, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vaddudm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp ult <2 x i64> %x, <i64 -43, i64 -43>
@@ -548,10 +534,9 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI34_0 at toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI34_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
; CHECK-NEXT: xxleqv 0, 0, 0
+; CHECK-NEXT: addi 3, 3, .LCPI34_0 at toc@l
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vaddudm 3, 2, 3
; CHECK-NEXT: vcmpgtud 2, 2, 3
; CHECK-NEXT: xxsel 34, 35, 0, 34
@@ -566,14 +551,12 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) {
; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval:
; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LCPI35_1 at toc@ha
+; CHECK-NEXT: xxleqv 0, 0, 0
; CHECK-NEXT: addi 3, 3, .LCPI35_1 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI35_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI35_0 at toc@l
-; CHECK-NEXT: lxvd2x 1, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
-; CHECK-NEXT: xxleqv 0, 0, 0
-; CHECK-NEXT: xxswapd 36, 1
+; CHECK-NEXT: lxvd2x 36, 0, 3
; CHECK-NEXT: vcmpgtud 3, 2, 3
; CHECK-NEXT: vaddudm 2, 2, 4
; CHECK-NEXT: xxsel 34, 34, 0, 35
diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
index 030892b14e2cc..4f1009c00edde 100644
--- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
@@ -33,8 +33,7 @@ define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI2_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI2_0 at toc@l
; CHECK-NEXT: vcmpgtsw 2, 2, 3
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vsubuwm 2, 3, 2
; CHECK-NEXT: blr
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -85,8 +84,7 @@ define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI6_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI6_0 at toc@l
; CHECK-NEXT: vcmpgtsw 2, 2, 3
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -194,10 +192,9 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI15_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI15_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vsraw 2, 2, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -225,10 +222,9 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI17_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI17_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vsrw 2, 2, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -280,10 +276,9 @@ define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI21_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI21_0 at toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vsraw 2, 2, 3
-; CHECK-NEXT: xxswapd 35, 0
+; CHECK-NEXT: lxvd2x 35, 0, 3
; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
diff --git a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
index bc959ea396832..e7a0f149ac976 100644
--- a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
@@ -188,62 +188,61 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; PPC64LE-NEXT: lis 6, 1820
; PPC64LE-NEXT: sldi 3, 3, 31
; PPC64LE-NEXT: ori 6, 6, 29127
-; PPC64LE-NEXT: sldi 5, 5, 31
-; PPC64LE-NEXT: rldic 6, 6, 34, 3
; PPC64LE-NEXT: sldi 4, 4, 31
-; PPC64LE-NEXT: oris 6, 6, 29127
+; PPC64LE-NEXT: rldic 6, 6, 34, 3
; PPC64LE-NEXT: sradi 3, 3, 31
+; PPC64LE-NEXT: oris 6, 6, 29127
+; PPC64LE-NEXT: sradi 4, 4, 31
; PPC64LE-NEXT: ori 7, 6, 7282
-; PPC64LE-NEXT: sradi 5, 5, 31
+; PPC64LE-NEXT: sldi 5, 5, 31
; PPC64LE-NEXT: ori 6, 6, 7281
-; PPC64LE-NEXT: sradi 4, 4, 31
; PPC64LE-NEXT: mulhd 8, 3, 7
; PPC64LE-NEXT: mulhd 7, 4, 7
+; PPC64LE-NEXT: sradi 5, 5, 31
; PPC64LE-NEXT: mulhd 6, 5, 6
; PPC64LE-NEXT: rldicl 9, 8, 1, 63
; PPC64LE-NEXT: rldicl 10, 7, 1, 63
-; PPC64LE-NEXT: sub 6, 6, 5
; PPC64LE-NEXT: add 8, 8, 9
-; PPC64LE-NEXT: rldicl 9, 6, 1, 63
; PPC64LE-NEXT: add 7, 7, 10
-; PPC64LE-NEXT: sradi 6, 6, 3
; PPC64LE-NEXT: sldi 10, 8, 3
-; PPC64LE-NEXT: add 6, 6, 9
+; PPC64LE-NEXT: sub 6, 6, 5
; PPC64LE-NEXT: add 8, 8, 10
-; PPC64LE-NEXT: addis 9, 2, .LCPI3_0 at toc@ha
; PPC64LE-NEXT: sldi 10, 7, 3
-; PPC64LE-NEXT: sub 3, 3, 8
-; PPC64LE-NEXT: addi 9, 9, .LCPI3_0 at toc@l
+; PPC64LE-NEXT: rldicl 9, 6, 1, 63
; PPC64LE-NEXT: add 7, 7, 10
-; PPC64LE-NEXT: sldi 8, 6, 3
-; PPC64LE-NEXT: lxvd2x 0, 0, 9
-; PPC64LE-NEXT: mtfprd 1, 3
+; PPC64LE-NEXT: sub 3, 3, 8
+; PPC64LE-NEXT: addis 8, 2, .LCPI3_1 at toc@ha
+; PPC64LE-NEXT: sradi 6, 6, 3
; PPC64LE-NEXT: sub 4, 4, 7
-; PPC64LE-NEXT: add 6, 6, 8
-; PPC64LE-NEXT: addis 7, 2, .LCPI3_1 at toc@ha
-; PPC64LE-NEXT: mtfprd 2, 4
-; PPC64LE-NEXT: add 4, 5, 6
-; PPC64LE-NEXT: addi 3, 7, .LCPI3_1 at toc@l
-; PPC64LE-NEXT: addis 5, 2, .LCPI3_2 at toc@ha
-; PPC64LE-NEXT: mtfprd 4, 4
+; PPC64LE-NEXT: mtfprd 0, 3
+; PPC64LE-NEXT: addis 3, 2, .LCPI3_0 at toc@ha
+; PPC64LE-NEXT: addi 7, 8, .LCPI3_1 at toc@l
+; PPC64LE-NEXT: add 6, 6, 9
+; PPC64LE-NEXT: mtfprd 1, 4
+; PPC64LE-NEXT: addi 3, 3, .LCPI3_0 at toc@l
+; PPC64LE-NEXT: lxvd2x 2, 0, 7
+; PPC64LE-NEXT: sldi 8, 6, 3
; PPC64LE-NEXT: lxvd2x 3, 0, 3
-; PPC64LE-NEXT: addi 3, 5, .LCPI3_2 at toc@l
-; PPC64LE-NEXT: xxswapd 34, 0
-; PPC64LE-NEXT: xxmrghd 35, 2, 1
-; PPC64LE-NEXT: lxvd2x 0, 0, 3
-; PPC64LE-NEXT: xxswapd 36, 4
-; PPC64LE-NEXT: xxswapd 37, 3
-; PPC64LE-NEXT: xxland 35, 35, 34
-; PPC64LE-NEXT: xxland 34, 36, 34
+; PPC64LE-NEXT: add 4, 6, 8
+; PPC64LE-NEXT: addis 6, 2, .LCPI3_2 at toc@ha
+; PPC64LE-NEXT: xxmrghd 34, 1, 0
+; PPC64LE-NEXT: add 3, 5, 4
+; PPC64LE-NEXT: addi 4, 6, .LCPI3_2 at toc@l
+; PPC64LE-NEXT: xxswapd 35, 2
+; PPC64LE-NEXT: mtfprd 0, 3
+; PPC64LE-NEXT: lxvd2x 1, 0, 4
+; PPC64LE-NEXT: xxland 34, 34, 3
; PPC64LE-NEXT: xxswapd 36, 0
-; PPC64LE-NEXT: vcmpequd 3, 3, 5
-; PPC64LE-NEXT: vcmpequd 2, 2, 4
-; PPC64LE-NEXT: xxlnor 0, 35, 35
-; PPC64LE-NEXT: xxlnor 34, 34, 34
+; PPC64LE-NEXT: vcmpequd 2, 2, 3
+; PPC64LE-NEXT: xxswapd 35, 1
+; PPC64LE-NEXT: xxland 36, 36, 3
+; PPC64LE-NEXT: vcmpequd 3, 4, 3
+; PPC64LE-NEXT: xxlnor 0, 34, 34
; PPC64LE-NEXT: xxswapd 1, 0
; PPC64LE-NEXT: mffprwz 4, 0
-; PPC64LE-NEXT: xxswapd 2, 34
+; PPC64LE-NEXT: xxlnor 34, 35, 35
; PPC64LE-NEXT: mffprwz 3, 1
+; PPC64LE-NEXT: xxswapd 2, 34
; PPC64LE-NEXT: mffprwz 5, 2
; PPC64LE-NEXT: blr
%srem = srem <3 x i33> %X, <i33 9, i33 9, i33 -9>
diff --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index e4a4761ffec9e..51f0d75d27bb5 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -249,8 +249,6 @@ define void @stest8(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, .LCPI3_1 at toc@l
; CHECK-P8-NEXT: addi r5, r5, .LCPI3_4 at toc@l
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: lxvd2x vs4, 0, r5
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: xxswapd v5, vs3
@@ -261,7 +259,8 @@ define void @stest8(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: vperm v4, v2, v2, v4
; CHECK-P8-NEXT: vperm v5, v2, v2, v5
; CHECK-P8-NEXT: vperm v2, v2, v2, v0
-; CHECK-P8-NEXT: xxswapd v0, vs0
+; CHECK-P8-NEXT: lxvd2x v0, 0, r4
+; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: vsld v3, v3, v0
; CHECK-P8-NEXT: vsld v4, v4, v0
; CHECK-P8-NEXT: vsld v5, v5, v0
@@ -370,13 +369,12 @@ define void @stest4(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: addis r4, r2, .LCPI4_1 at toc@ha
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: addi r4, r4, .LCPI4_1 at toc@l
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: xxswapd v4, vs2
; CHECK-P8-NEXT: vperm v3, v2, v2, v3
; CHECK-P8-NEXT: vperm v2, v2, v2, v4
-; CHECK-P8-NEXT: xxswapd v4, vs0
+; CHECK-P8-NEXT: lxvd2x v4, 0, r4
+; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vsld v3, v3, v4
; CHECK-P8-NEXT: vsld v2, v2, v4
; CHECK-P8-NEXT: vsrad v3, v3, v4
@@ -443,10 +441,9 @@ define void @stest2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
; CHECK-P8-NEXT: addi r4, r4, .LCPI5_1 at toc@l
; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: xxswapd v2, vs0
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: xxswapd v3, vs1
; CHECK-P8-NEXT: vperm v2, v2, v2, v3
-; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: lxvd2x v3, 0, r4
; CHECK-P8-NEXT: vsld v2, v2, v3
; CHECK-P8-NEXT: vsrad v2, v2, v3
; CHECK-P8-NEXT: xvcvsxddp vs0, v2
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index a2c1523c97ee8..0cc5248af634a 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -386,9 +386,8 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1 at toc@l
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: lxvd2x v3, 0, r3
; CHECK-P8-NEXT: vsld v2, v2, v3
; CHECK-P8-NEXT: vsrad v2, v2, v3
; CHECK-P8-NEXT: xvcvsxddp v2, v2
@@ -434,12 +433,11 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2 at toc@l
; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: xxswapd v2, vs0
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xxswapd v4, vs1
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v3, v3, v3, v4
-; CHECK-P8-NEXT: xxswapd v4, vs0
+; CHECK-P8-NEXT: lxvd2x v4, 0, r4
+; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vsld v2, v2, v4
; CHECK-P8-NEXT: vsld v3, v3, v4
; CHECK-P8-NEXT: vsrad v2, v2, v4
@@ -516,15 +514,14 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
; CHECK-P8-NEXT: xxswapd v3, vs0
; CHECK-P8-NEXT: xxswapd v4, vs1
; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1 at toc@l
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xxswapd v5, vs2
; CHECK-P8-NEXT: xxswapd v0, vs3
; CHECK-P8-NEXT: vperm v3, v2, v2, v3
; CHECK-P8-NEXT: vperm v4, v2, v2, v4
; CHECK-P8-NEXT: vperm v5, v2, v2, v5
; CHECK-P8-NEXT: vperm v2, v2, v2, v0
-; CHECK-P8-NEXT: xxswapd v0, vs0
+; CHECK-P8-NEXT: lxvd2x v0, 0, r4
+; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: vsld v3, v3, v0
; CHECK-P8-NEXT: vsld v4, v4, v0
; CHECK-P8-NEXT: vsld v5, v5, v0
@@ -643,14 +640,13 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: addi r4, r4, .LCPI7_1 at toc@l
; CHECK-P8-NEXT: xxswapd v5, vs3
; CHECK-P8-NEXT: xxswapd v0, vs4
+; CHECK-P8-NEXT: lxvd2x v9, 0, r4
+; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xxswapd v6, vs0
; CHECK-P8-NEXT: vperm v1, v2, v2, v3
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: vperm v7, v2, v2, v4
; CHECK-P8-NEXT: vperm v8, v2, v2, v5
; CHECK-P8-NEXT: vperm v2, v2, v2, v0
-; CHECK-P8-NEXT: xxswapd v9, vs0
; CHECK-P8-NEXT: vperm v5, v6, v6, v5
; CHECK-P8-NEXT: vperm v0, v6, v6, v0
; CHECK-P8-NEXT: vperm v3, v6, v6, v3
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 434d0f660f2e9..08730dace391c 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -421,9 +421,8 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1 at toc@l
; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
-; CHECK-P8-NEXT: xxswapd v3, vs0
+; CHECK-P8-NEXT: lxvd2x v3, 0, r3
; CHECK-P8-NEXT: vsld v2, v2, v3
; CHECK-P8-NEXT: vsrad v2, v2, v3
; CHECK-P8-NEXT: xvcvsxddp v2, v2
@@ -469,12 +468,11 @@ define void @test4elt_signed(ptr noalias nocapture sret(<4 x double>) %agg.resul
; CHECK-P8-NEXT: addi r5, r6, .LCPI5_2 at toc@l
; CHECK-P8-NEXT: lxvd2x vs1, 0, r5
; CHECK-P8-NEXT: xxswapd v2, vs0
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: xxswapd v4, vs1
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v3, v3, v3, v4
-; CHECK-P8-NEXT: xxswapd v4, vs0
+; CHECK-P8-NEXT: lxvd2x v4, 0, r4
+; CHECK-P8-NEXT: li r4, 16
; CHECK-P8-NEXT: vsld v2, v2, v4
; CHECK-P8-NEXT: vsld v3, v3, v4
; CHECK-P8-NEXT: vsrad v2, v2, v4
@@ -537,8 +535,10 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_0 at toc@ha
; CHECK-P8-NEXT: addis r6, r2, .LCPI6_2 at toc@ha
; CHECK-P8-NEXT: mtvsrd v3, r4
+; CHECK-P8-NEXT: addis r4, r2, .LCPI6_1 at toc@ha
; CHECK-P8-NEXT: addi r5, r5, .LCPI6_0 at toc@l
; CHECK-P8-NEXT: addi r6, r6, .LCPI6_2 at toc@l
+; CHECK-P8-NEXT: addi r4, r4, .LCPI6_1 at toc@l
; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
; CHECK-P8-NEXT: addis r5, r2, .LCPI6_3 at toc@ha
; CHECK-P8-NEXT: lxvd2x vs1, 0, r6
@@ -549,18 +549,15 @@ define void @test8elt_signed(ptr noalias nocapture sret(<8 x double>) %agg.resul
; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: xxswapd v4, vs1
-; CHECK-P8-NEXT: addis r5, r2, .LCPI6_1 at toc@ha
-; CHECK-P8-NEXT: addi r4, r5, .LCPI6_1 at toc@l
; CHECK-P8-NEXT: li r5, 32
; CHECK-P8-NEXT: xxswapd v5, vs2
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
-; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: xxswapd v0, vs3
; CHECK-P8-NEXT: vperm v2, v3, v3, v2
; CHECK-P8-NEXT: vperm v4, v3, v3, v4
; CHECK-P8-NEXT: vperm v5, v3, v3, v5
; CHECK-P8-NEXT: vperm v3, v3, v3, v0
-; CHECK-P8-NEXT: xxswapd v0, vs0
+; CHECK-P8-NEXT: lxvd2x v0, 0, r4
+; CHECK-P8-NEXT: li r4, 48
; CHECK-P8-NEXT: vsld v2, v2, v0
; CHECK-P8-NEXT: vsld v4, v4, v0
; CHECK-P8-NEXT: vsld v5, v5, v0
@@ -687,20 +684,19 @@ define void @test16elt_signed(ptr noalias nocapture sret(<16 x double>) %agg.res
; CHECK-P8-NEXT: xxswapd v0, vs3
; CHECK-P8-NEXT: lxvd2x vs2, 0, r4
; CHECK-P8-NEXT: lxvd2x vs3, 0, r5
-; CHECK-P8-NEXT: addis r4, r2, .LCPI7_1 at toc@ha
; CHECK-P8-NEXT: vperm v3, v2, v2, v3
+; CHECK-P8-NEXT: addis r4, r2, .LCPI7_1 at toc@ha
; CHECK-P8-NEXT: li r5, 96
; CHECK-P8-NEXT: xxswapd v1, vs0
; CHECK-P8-NEXT: xxswapd v6, vs1
; CHECK-P8-NEXT: addi r4, r4, .LCPI7_1 at toc@l
; CHECK-P8-NEXT: vperm v4, v2, v2, v4
-; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-P8-NEXT: lxvd2x v9, 0, r4
; CHECK-P8-NEXT: li r4, 112
; CHECK-P8-NEXT: xxswapd v7, vs2
; CHECK-P8-NEXT: xxswapd v8, vs3
; CHECK-P8-NEXT: vperm v5, v2, v2, v5
; CHECK-P8-NEXT: vperm v0, v2, v2, v0
-; CHECK-P8-NEXT: xxswapd v9, vs0
; CHECK-P8-NEXT: vperm v1, v2, v2, v1
; CHECK-P8-NEXT: vperm v6, v2, v2, v6
; CHECK-P8-NEXT: vperm v7, v2, v2, v7
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index bc0390b1ed81d..25fcd227be687 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -7255,9 +7255,8 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i16(<2 x i16> %x) #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI155_1 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI155_1 at toc@l
; PC64LE-NEXT: xxswapd 35, 0
-; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: vperm 2, 2, 2, 3
-; PC64LE-NEXT: xxswapd 35, 0
+; PC64LE-NEXT: lxvd2x 35, 0, 3
; PC64LE-NEXT: vsld 2, 2, 3
; PC64LE-NEXT: vsrad 2, 2, 3
; PC64LE-NEXT: xvcvsxddp 34, 34
diff --git a/llvm/test/CodeGen/PowerPC/vector-extend-sign.ll b/llvm/test/CodeGen/PowerPC/vector-extend-sign.ll
index 6ac90eda99e6a..540a00fa84c5d 100644
--- a/llvm/test/CodeGen/PowerPC/vector-extend-sign.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-extend-sign.ll
@@ -79,8 +79,7 @@ define <2 x i64> @test_vextsb2d(<2 x i64> %m) {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis 3, 2, .LCPI2_0 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI2_0 at toc@l
-; CHECK-P8-NEXT: lxvd2x 0, 0, 3
-; CHECK-P8-NEXT: xxswapd 35, 0
+; CHECK-P8-NEXT: lxvd2x 35, 0, 3
; CHECK-P8-NEXT: vsld 2, 2, 3
; CHECK-P8-NEXT: vsrad 2, 2, 3
; CHECK-P8-NEXT: blr
@@ -106,8 +105,7 @@ define <2 x i64> @test_vextsh2d(<2 x i64> %m) {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis 3, 2, .LCPI3_0 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI3_0 at toc@l
-; CHECK-P8-NEXT: lxvd2x 0, 0, 3
-; CHECK-P8-NEXT: xxswapd 35, 0
+; CHECK-P8-NEXT: lxvd2x 35, 0, 3
; CHECK-P8-NEXT: vsld 2, 2, 3
; CHECK-P8-NEXT: vsrad 2, 2, 3
; CHECK-P8-NEXT: blr
@@ -133,8 +131,7 @@ define <2 x i64> @test_vextsw2d(<2 x i64> %m) {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI4_0 at toc@l
-; CHECK-P8-NEXT: lxvd2x 0, 0, 3
-; CHECK-P8-NEXT: xxswapd 35, 0
+; CHECK-P8-NEXT: lxvd2x 35, 0, 3
; CHECK-P8-NEXT: vsld 2, 2, 3
; CHECK-P8-NEXT: vsrad 2, 2, 3
; CHECK-P8-NEXT: blr
@@ -166,8 +163,7 @@ define <2 x i64> @test_none(<2 x i64> %m) {
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis 3, 2, .LCPI5_0 at toc@ha
; CHECK-P8-NEXT: addi 3, 3, .LCPI5_0 at toc@l
-; CHECK-P8-NEXT: lxvd2x 0, 0, 3
-; CHECK-P8-NEXT: xxswapd 35, 0
+; CHECK-P8-NEXT: lxvd2x 35, 0, 3
; CHECK-P8-NEXT: vsld 2, 2, 3
; CHECK-P8-NEXT: vsrad 2, 2, 3
; CHECK-P8-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 3f056970e7038..e42e2ae243326 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -2290,9 +2290,8 @@ define <2 x double> @test69(<2 x i16> %a) {
; CHECK-LE-NEXT: addis r3, r2, .LCPI63_1 at toc@ha
; CHECK-LE-NEXT: addi r3, r3, .LCPI63_1 at toc@l
; CHECK-LE-NEXT: xxswapd v3, vs0
-; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
-; CHECK-LE-NEXT: xxswapd v3, vs0
+; CHECK-LE-NEXT: lxvd2x v3, 0, r3
; CHECK-LE-NEXT: vsld v2, v2, v3
; CHECK-LE-NEXT: vsrad v2, v2, v3
; CHECK-LE-NEXT: xvcvsxddp v2, v2
@@ -2371,9 +2370,8 @@ define <2 x double> @test70(<2 x i8> %a) {
; CHECK-LE-NEXT: addis r3, r2, .LCPI64_1 at toc@ha
; CHECK-LE-NEXT: addi r3, r3, .LCPI64_1 at toc@l
; CHECK-LE-NEXT: xxswapd v3, vs0
-; CHECK-LE-NEXT: lxvd2x vs0, 0, r3
; CHECK-LE-NEXT: vperm v2, v2, v2, v3
-; CHECK-LE-NEXT: xxswapd v3, vs0
+; CHECK-LE-NEXT: lxvd2x v3, 0, r3
; CHECK-LE-NEXT: vsld v2, v2, v3
; CHECK-LE-NEXT: vsrad v2, v2, v3
; CHECK-LE-NEXT: xvcvsxddp v2, v2
More information about the llvm-commits
mailing list