[llvm] 7614ba0 - [PowerPC] Fix vperm codegen
Maryam Moghadas via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 29 13:47:38 PST 2022
Author: Maryam Moghadas
Date: 2022-11-29T15:47:32-06:00
New Revision: 7614ba0a5db8a3503dfec8b0450bcdef1d0d5929
URL: https://github.com/llvm/llvm-project/commit/7614ba0a5db8a3503dfec8b0450bcdef1d0d5929
DIFF: https://github.com/llvm/llvm-project/commit/7614ba0a5db8a3503dfec8b0450bcdef1d0d5929.diff
LOG: [PowerPC] Fix vperm codegen
Commit rG934d5fa2b8672695c335deed0e19d0e777c98403 changed the vperm codegen
for cases that vperm is not replaced by xxperm, this patch is to revert that.
Reviewed By: stefanp
Differential Revision: https://reviews.llvm.org/D138736
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/build-vector-tests.ll
llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
llvm/test/CodeGen/PowerPC/vec-itofp.ll
llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 31e6ddac6552..5f51bdd8c1ea 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10186,9 +10186,6 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
}
}
- bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
- bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
-
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
@@ -10199,6 +10196,9 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
EVT EltVT = V1.getValueType().getVectorElementType();
unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
+ bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
+ bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
+
/*
Vectors will be appended like so: [ V1 | v2 ]
XXSWAPD on V1:
@@ -10219,24 +10219,27 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
- if (V1HasXXSWAPD) {
- if (SrcElt < 8)
- SrcElt += 8;
- else if (SrcElt < 16)
- SrcElt -= 8;
- }
- if (V2HasXXSWAPD) {
- if (SrcElt > 23)
- SrcElt -= 8;
- else if (SrcElt > 15)
- SrcElt += 8;
- }
- if (NeedSwap) {
- if (SrcElt < 16)
- SrcElt += 16;
- else
- SrcElt -= 16;
+ if (Opcode == PPCISD::XXPERM) {
+ if (V1HasXXSWAPD) {
+ if (SrcElt < 8)
+ SrcElt += 8;
+ else if (SrcElt < 16)
+ SrcElt -= 8;
+ }
+ if (V2HasXXSWAPD) {
+ if (SrcElt > 23)
+ SrcElt -= 8;
+ else if (SrcElt > 15)
+ SrcElt += 8;
+ }
+ if (NeedSwap) {
+ if (SrcElt < 16)
+ SrcElt += 16;
+ else
+ SrcElt -= 16;
+ }
}
+
for (unsigned j = 0; j != BytesPerElement; ++j)
if (isLittleEndian)
ResultMask.push_back(
@@ -10246,16 +10249,15 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
}
- if (V1HasXXSWAPD) {
- dl = SDLoc(V1->getOperand(0));
- V1 = V1->getOperand(0)->getOperand(1);
- }
- if (V2HasXXSWAPD) {
- dl = SDLoc(V2->getOperand(0));
- V2 = V2->getOperand(0)->getOperand(1);
- }
-
- if (V1HasXXSWAPD || V2HasXXSWAPD || Opcode == PPCISD::XXPERM) {
+ if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
+ if (V1HasXXSWAPD) {
+ dl = SDLoc(V1->getOperand(0));
+ V1 = V1->getOperand(0)->getOperand(1);
+ }
+ if (V2HasXXSWAPD) {
+ dl = SDLoc(V2->getOperand(0));
+ V2 = V2->getOperand(0)->getOperand(1);
+ }
if (isPPC64 && ValType != MVT::v2f64)
V1 = DAG.getBitcast(MVT::v2f64, V1);
if (isPPC64 && V2.getValueType() != MVT::v2f64)
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 411b991dc29c..a40aeda0e3f6 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1058,14 +1058,15 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
;
; P8LE-LABEL: fromDiffMemVarDi:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r5, r2, .LCPI9_0 at toc@ha
; P8LE-NEXT: sldi r4, r4, 2
-; P8LE-NEXT: addi r5, r5, .LCPI9_0 at toc@l
+; P8LE-NEXT: addis r5, r2, .LCPI9_0 at toc@ha
; P8LE-NEXT: add r3, r3, r4
-; P8LE-NEXT: lxvd2x vs0, 0, r5
+; P8LE-NEXT: addi r4, r5, .LCPI9_0 at toc@l
; P8LE-NEXT: addi r3, r3, -12
-; P8LE-NEXT: lxvd2x v2, 0, r3
-; P8LE-NEXT: xxswapd v3, vs0
+; P8LE-NEXT: lxvd2x vs1, 0, r4
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v3, vs1
+; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
@@ -1478,10 +1479,11 @@ define <4 x i32> @fromDiffMemConsDConvftoi(ptr nocapture readonly %ptr) {
; P8LE-LABEL: fromDiffMemConsDConvftoi:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI18_0 at toc@ha
-; P8LE-NEXT: lxvd2x v2, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI18_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r4
-; P8LE-NEXT: xxswapd v3, vs0
+; P8LE-NEXT: lxvd2x vs1, 0, r4
+; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: xvcvspsxws v2, v2
; P8LE-NEXT: blr
@@ -2578,14 +2580,15 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
;
; P8LE-LABEL: fromDiffMemVarDui:
; P8LE: # %bb.0: # %entry
-; P8LE-NEXT: addis r5, r2, .LCPI41_0 at toc@ha
; P8LE-NEXT: sldi r4, r4, 2
-; P8LE-NEXT: addi r5, r5, .LCPI41_0 at toc@l
+; P8LE-NEXT: addis r5, r2, .LCPI41_0 at toc@ha
; P8LE-NEXT: add r3, r3, r4
-; P8LE-NEXT: lxvd2x vs0, 0, r5
+; P8LE-NEXT: addi r4, r5, .LCPI41_0 at toc@l
; P8LE-NEXT: addi r3, r3, -12
-; P8LE-NEXT: lxvd2x v2, 0, r3
-; P8LE-NEXT: xxswapd v3, vs0
+; P8LE-NEXT: lxvd2x vs1, 0, r4
+; P8LE-NEXT: lxvd2x vs0, 0, r3
+; P8LE-NEXT: xxswapd v3, vs1
+; P8LE-NEXT: xxswapd v2, vs0
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: blr
entry:
@@ -2998,10 +3001,11 @@ define <4 x i32> @fromDiffMemConsDConvftoui(ptr nocapture readonly %ptr) {
; P8LE-LABEL: fromDiffMemConsDConvftoui:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addis r4, r2, .LCPI50_0 at toc@ha
-; P8LE-NEXT: lxvd2x v2, 0, r3
+; P8LE-NEXT: lxvd2x vs0, 0, r3
; P8LE-NEXT: addi r4, r4, .LCPI50_0 at toc@l
-; P8LE-NEXT: lxvd2x vs0, 0, r4
-; P8LE-NEXT: xxswapd v3, vs0
+; P8LE-NEXT: lxvd2x vs1, 0, r4
+; P8LE-NEXT: xxswapd v2, vs0
+; P8LE-NEXT: xxswapd v3, vs1
; P8LE-NEXT: vperm v2, v2, v2, v3
; P8LE-NEXT: xvcvspuxws v2, v2
; P8LE-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 5d48a873295f..b6fe31cf7080 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -491,10 +491,10 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
; CHECK-P9-BE: # %bb.0: # %entry
; CHECK-P9-BE-NEXT: lxsd v2, 0(r3)
; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI12_0 at toc@ha
-; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-P9-BE-NEXT: xxlxor vs1, vs1, vs1
; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI12_0 at toc@l
-; CHECK-P9-BE-NEXT: lxv vs1, 0(r3)
-; CHECK-P9-BE-NEXT: xxperm v2, vs0, vs1
+; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT: xxperm v2, vs1, vs0
; CHECK-P9-BE-NEXT: blr
;
; CHECK-NOVSX-LABEL: testmrglb3:
diff --git a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
index 9cef5c37a2b8..562fb605040a 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
@@ -66,11 +66,11 @@ define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
; BE-LABEL: shufflevector_combine:
; BE: # %bb.0: # %newFuncRoot
; BE-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; BE-NEXT: xxlxor vs0, vs0, vs0
+; BE-NEXT: xxlxor vs1, vs1, vs1
; BE-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; BE-NEXT: lxv vs1, 0(r3)
+; BE-NEXT: lxv vs0, 0(r3)
; BE-NEXT: li r3, 0
-; BE-NEXT: xxperm v2, vs0, vs1
+; BE-NEXT: xxperm v2, vs1, vs0
; BE-NEXT: vinsw v2, r3, 8
; BE-NEXT: vpkuwum v2, v2, v2
; BE-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 204b3f1bde8f..5d49bcae0cea 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -44,15 +44,15 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: add 5, 3, 4
; P9BE-NEXT: lxsdx 2, 3, 4
; P9BE-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
-; P9BE-NEXT: xxlxor 0, 0, 0
+; P9BE-NEXT: xxlxor 1, 1, 1
; P9BE-NEXT: vspltisw 4, 8
; P9BE-NEXT: lxsd 3, 4(5)
; P9BE-NEXT: addi 3, 3, .LCPI0_0 at toc@l
; P9BE-NEXT: vadduwm 4, 4, 4
-; P9BE-NEXT: lxv 1, 0(3)
+; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: addis 3, 2, .LCPI0_1 at toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI0_1 at toc@l
-; P9BE-NEXT: xxperm 2, 0, 1
+; P9BE-NEXT: xxperm 2, 1, 0
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: xxperm 3, 3, 0
; P9BE-NEXT: vnegw 3, 3
@@ -285,10 +285,10 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-NEXT: addis 3, 2, .LCPI2_1 at toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI2_1 at toc@l
; P9BE-NEXT: xxperm 2, 0, 1
-; P9BE-NEXT: lxv 1, 0(3)
+; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: li 3, 0
-; P9BE-NEXT: xxmrghw 0, 4, 2
-; P9BE-NEXT: xxperm 3, 0, 1
+; P9BE-NEXT: xxmrghw 2, 4, 2
+; P9BE-NEXT: xxperm 3, 2, 0
; P9BE-NEXT: xxspltw 2, 3, 1
; P9BE-NEXT: vadduwm 2, 3, 2
; P9BE-NEXT: vextuwlx 3, 3, 2
@@ -312,10 +312,10 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX-NEXT: lxsihzx 0, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1
; P9BE-AIX-NEXT: xxperm 2, 0, 1
-; P9BE-AIX-NEXT: lxv 1, 0(3)
+; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: li 3, 0
-; P9BE-AIX-NEXT: xxmrghw 0, 4, 2
-; P9BE-AIX-NEXT: xxperm 3, 0, 1
+; P9BE-AIX-NEXT: xxmrghw 2, 4, 2
+; P9BE-AIX-NEXT: xxperm 3, 2, 0
; P9BE-AIX-NEXT: xxspltw 2, 3, 1
; P9BE-AIX-NEXT: vadduwm 2, 3, 2
; P9BE-AIX-NEXT: vextuwlx 3, 3, 2
@@ -395,13 +395,13 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9LE-NEXT: vmrghb 2, 3, 2
; P9LE-NEXT: addi 3, 3, .LCPI3_0 at toc@l
; P9LE-NEXT: vmrglh 2, 2, 4
-; P9LE-NEXT: lxv 1, 0(3)
+; P9LE-NEXT: lxv 0, 0(3)
; P9LE-NEXT: li 3, 0
; P9LE-NEXT: vmrghb 3, 3, 5
; P9LE-NEXT: xxmrglw 2, 2, 4
; P9LE-NEXT: vmrglh 3, 3, 4
-; P9LE-NEXT: xxmrglw 0, 4, 3
-; P9LE-NEXT: xxperm 2, 0, 1
+; P9LE-NEXT: xxmrglw 3, 4, 3
+; P9LE-NEXT: xxperm 2, 3, 0
; P9LE-NEXT: xxspltw 3, 2, 2
; P9LE-NEXT: vadduwm 2, 2, 3
; P9LE-NEXT: vextuwrx 3, 3, 2
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 50b4a079d151..b90b9aed44e7 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -184,11 +184,12 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v16i8:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: mtvsrd v4, r3
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
@@ -431,11 +432,12 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v8i16:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-LE-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: mtvsrd v4, r3
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index 10131f8f6931..ad4b112bebe0 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -389,10 +389,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1
+; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P9-NEXT: lxv vs2, 0(r3)
-; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2
+; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -411,10 +411,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1
+; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
-; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
@@ -470,14 +470,15 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-LE-P8-LABEL: test_none_v2i64:
; CHECK-LE-P8: # %bb.0: # %entry
; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3
-; CHECK-LE-P8-NEXT: lxvd2x v3, 0, r4
; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI4_1 at toc@ha
; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0 at toc@l
; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI4_1 at toc@l
-; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v4, vs1
; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
; CHECK-LE-P8-NEXT: xxswapd v3, vs0
; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4
@@ -544,10 +545,10 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3
-; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-AIX-64-P9-NEXT: xxperm v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
@@ -603,10 +604,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-LE-P9-NEXT: xxlxor vs1, vs1, vs1
+; CHECK-LE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-LE-P9-NEXT: lxv vs2, 0(r3)
-; CHECK-LE-P9-NEXT: xxperm vs0, vs1, vs2
+; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: blr
;
@@ -625,10 +626,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-P9-NEXT: xxlxor vs1, vs1, vs1
+; CHECK-BE-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-P9-NEXT: lxv vs2, 0(r3)
-; CHECK-BE-P9-NEXT: xxperm vs0, vs1, vs2
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm vs0, vs2, vs1
; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index a54c704aa040..e4a4761ffec9 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -203,13 +203,13 @@ define void @test2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
;
; CHECK-P9-LABEL: test2:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-P9-NEXT: lxv vs0, 0(r4)
; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P9-NEXT: lxv vs2, 0(r4)
-; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
-; CHECK-P9-NEXT: xvcvuxddp vs0, vs1
+; CHECK-P9-NEXT: lxv vs1, 0(r4)
+; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
+; CHECK-P9-NEXT: xvcvuxddp vs0, vs0
; CHECK-P9-NEXT: stxv vs0, 0(r3)
; CHECK-P9-NEXT: blr
;
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index 64022b5503e0..7a689f43dfa9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -80,13 +80,13 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mtfprd f1, r3
+; CHECK-BE-NEXT: mtfprd f0, r3
; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT: lxv vs2, 0(r3)
-; CHECK-BE-NEXT: xxperm vs1, vs0, vs2
-; CHECK-BE-NEXT: xvcvuxwsp v2, vs1
+; CHECK-BE-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-NEXT: xxperm vs0, vs2, vs1
+; CHECK-BE-NEXT: xvcvuxwsp v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i64 %a.coerce to <4 x i16>
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index 408953ad519e..a2c1523c97ee 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -24,24 +24,24 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: mtfprwz f1, r3
+; CHECK-P9-NEXT: mtfprwz f0, r3
; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-P9-NEXT: lxv vs2, 0(r3)
-; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
-; CHECK-P9-NEXT: xvcvuxddp v2, vs1
+; CHECK-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
+; CHECK-P9-NEXT: xvcvuxddp v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mtfprwz f1, r3
+; CHECK-BE-NEXT: mtfprwz f0, r3
; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT: lxv vs2, 0(r3)
-; CHECK-BE-NEXT: xxperm vs1, vs0, vs2
-; CHECK-BE-NEXT: xvcvuxddp v2, vs1
+; CHECK-BE-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-NEXT: xxperm vs0, vs2, vs1
+; CHECK-BE-NEXT: xvcvuxddp v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i32 %a.coerce to <2 x i16>
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index 31436c5baa50..37e986d17e6f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -76,24 +76,24 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
;
; CHECK-P9-LABEL: test4elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: mtfprwz f1, r3
+; CHECK-P9-NEXT: mtfprwz f0, r3
; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-P9-NEXT: lxv vs2, 0(r3)
-; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
-; CHECK-P9-NEXT: xvcvuxwsp v2, vs1
+; CHECK-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
+; CHECK-P9-NEXT: xvcvuxwsp v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test4elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mtfprwz f1, r3
+; CHECK-BE-NEXT: mtfprwz f0, r3
; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT: lxv vs2, 0(r3)
-; CHECK-BE-NEXT: xxperm vs1, vs0, vs2
-; CHECK-BE-NEXT: xvcvuxwsp v2, vs1
+; CHECK-BE-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-NEXT: xxperm vs0, vs2, vs1
+; CHECK-BE-NEXT: xvcvuxwsp v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i32 %a.coerce to <4 x i8>
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 024c85cd7bcf..5dabe65fd68d 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -24,24 +24,24 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
;
; CHECK-P9-LABEL: test2elt:
; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9-NEXT: mtfprwz f1, r3
+; CHECK-P9-NEXT: mtfprwz f0, r3
; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2
; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-P9-NEXT: lxv vs2, 0(r3)
-; CHECK-P9-NEXT: xxperm vs1, vs0, vs2
-; CHECK-P9-NEXT: xvcvuxddp v2, vs1
+; CHECK-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-P9-NEXT: xxperm vs0, vs2, vs1
+; CHECK-P9-NEXT: xvcvuxddp v2, vs0
; CHECK-P9-NEXT: blr
;
; CHECK-BE-LABEL: test2elt:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mtfprwz f1, r3
+; CHECK-BE-NEXT: mtfprwz f0, r3
; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0
+; CHECK-BE-NEXT: xxlxor vs2, vs2, vs2
; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT: lxv vs2, 0(r3)
-; CHECK-BE-NEXT: xxperm vs1, vs0, vs2
-; CHECK-BE-NEXT: xvcvuxddp v2, vs1
+; CHECK-BE-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-NEXT: xxperm vs0, vs2, vs1
+; CHECK-BE-NEXT: xvcvuxddp v2, vs0
; CHECK-BE-NEXT: blr
entry:
%0 = bitcast i16 %a.coerce to <2 x i8>
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 9721231174f1..bc0390b1ed81 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -7833,10 +7833,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i16(<2 x i16> %x) #0 {
; PC64LE9-LABEL: constrained_vector_uitofp_v2f64_v2i16:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI173_0 at toc@ha
-; PC64LE9-NEXT: xxlxor 0, 0, 0
+; PC64LE9-NEXT: xxlxor 1, 1, 1
; PC64LE9-NEXT: addi 3, 3, .LCPI173_0 at toc@l
-; PC64LE9-NEXT: lxv 1, 0(3)
-; PC64LE9-NEXT: xxperm 34, 0, 1
+; PC64LE9-NEXT: lxv 0, 0(3)
+; PC64LE9-NEXT: xxperm 34, 1, 0
; PC64LE9-NEXT: xvcvuxddp 34, 34
; PC64LE9-NEXT: blr
entry:
More information about the llvm-commits
mailing list