[llvm] 7614ba0 - [PowerPC] Fix vperm codegen

Maryam Moghadas via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 29 13:47:38 PST 2022


Author: Maryam Moghadas
Date: 2022-11-29T15:47:32-06:00
New Revision: 7614ba0a5db8a3503dfec8b0450bcdef1d0d5929

URL: https://github.com/llvm/llvm-project/commit/7614ba0a5db8a3503dfec8b0450bcdef1d0d5929
DIFF: https://github.com/llvm/llvm-project/commit/7614ba0a5db8a3503dfec8b0450bcdef1d0d5929.diff

LOG: [PowerPC] Fix vperm codegen

Commit rG934d5fa2b8672695c335deed0e19d0e777c98403 changed the vperm codegen
for cases that vperm is not replaced by xxperm, this patch is to revert that.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D138736

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/build-vector-tests.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
    llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
    llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
    llvm/test/CodeGen/PowerPC/vec-itofp.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 31e6ddac6552..5f51bdd8c1ea 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10186,9 +10186,6 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
     }
   }
 
-  bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
-  bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
-
   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
   // that it is in input element units, not in bytes.  Convert now.
 
@@ -10199,6 +10196,9 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
   EVT EltVT = V1.getValueType().getVectorElementType();
   unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
 
+  bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
+  bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
+
   /*
   Vectors will be appended like so: [ V1 | v2 ]
   XXSWAPD on V1:
@@ -10219,24 +10219,27 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
 
-    if (V1HasXXSWAPD) {
-      if (SrcElt < 8)
-        SrcElt += 8;
-      else if (SrcElt < 16)
-        SrcElt -= 8;
-    }
-    if (V2HasXXSWAPD) {
-      if (SrcElt > 23)
-        SrcElt -= 8;
-      else if (SrcElt > 15)
-        SrcElt += 8;
-    }
-    if (NeedSwap) {
-      if (SrcElt < 16)
-        SrcElt += 16;
-      else
-        SrcElt -= 16;
+    if (Opcode == PPCISD::XXPERM) {
+      if (V1HasXXSWAPD) {
+        if (SrcElt < 8)
+          SrcElt += 8;
+        else if (SrcElt < 16)
+          SrcElt -= 8;
+      }
+      if (V2HasXXSWAPD) {
+        if (SrcElt > 23)
+          SrcElt -= 8;
+        else if (SrcElt > 15)
+          SrcElt += 8;
+      }
+      if (NeedSwap) {
+        if (SrcElt < 16)
+          SrcElt += 16;
+        else
+          SrcElt -= 16;
+      }
     }
+
     for (unsigned j = 0; j != BytesPerElement; ++j)
       if (isLittleEndian)
         ResultMask.push_back(
@@ -10246,16 +10249,15 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
             DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
   }
 
-  if (V1HasXXSWAPD) {
-    dl = SDLoc(V1->getOperand(0));
-    V1 = V1->getOperand(0)->getOperand(1);
-  }
-  if (V2HasXXSWAPD) {
-    dl = SDLoc(V2->getOperand(0));
-    V2 = V2->getOperand(0)->getOperand(1);
-  }
-
-  if (V1HasXXSWAPD || V2HasXXSWAPD || Opcode == PPCISD::XXPERM) {
+  if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
+    if (V1HasXXSWAPD) {
+      dl = SDLoc(V1->getOperand(0));
+      V1 = V1->getOperand(0)->getOperand(1);
+    }
+    if (V2HasXXSWAPD) {
+      dl = SDLoc(V2->getOperand(0));
+      V2 = V2->getOperand(0)->getOperand(1);
+    }
     if (isPPC64 && ValType != MVT::v2f64)
       V1 = DAG.getBitcast(MVT::v2f64, V1);
     if (isPPC64 && V2.getValueType() != MVT::v2f64)

diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 411b991dc29c..a40aeda0e3f6 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1058,14 +1058,15 @@ define <4 x i32> @fromDiffMemVarDi(ptr nocapture readonly %arr, i32 signext %ele
 ;
 ; P8LE-LABEL: fromDiffMemVarDi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
 ; P8LE-NEXT:    sldi r4, r4, 2
-; P8LE-NEXT:    addi r5, r5, .LCPI9_0 at toc@l
+; P8LE-NEXT:    addis r5, r2, .LCPI9_0 at toc@ha
 ; P8LE-NEXT:    add r3, r3, r4
-; P8LE-NEXT:    lxvd2x vs0, 0, r5
+; P8LE-NEXT:    addi r4, r5, .LCPI9_0 at toc@l
 ; P8LE-NEXT:    addi r3, r3, -12
-; P8LE-NEXT:    lxvd2x v2, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
@@ -1478,10 +1479,11 @@ define <4 x i32> @fromDiffMemConsDConvftoi(ptr nocapture readonly %ptr) {
 ; P8LE-LABEL: fromDiffMemConsDConvftoi:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI18_0 at toc@ha
-; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
 ; P8LE-NEXT:    addi r4, r4, .LCPI18_0 at toc@l
-; P8LE-NEXT:    lxvd2x vs0, 0, r4
-; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    xvcvspsxws v2, v2
 ; P8LE-NEXT:    blr
@@ -2578,14 +2580,15 @@ define <4 x i32> @fromDiffMemVarDui(ptr nocapture readonly %arr, i32 signext %el
 ;
 ; P8LE-LABEL: fromDiffMemVarDui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    addis r5, r2, .LCPI41_0 at toc@ha
 ; P8LE-NEXT:    sldi r4, r4, 2
-; P8LE-NEXT:    addi r5, r5, .LCPI41_0 at toc@l
+; P8LE-NEXT:    addis r5, r2, .LCPI41_0 at toc@ha
 ; P8LE-NEXT:    add r3, r3, r4
-; P8LE-NEXT:    lxvd2x vs0, 0, r5
+; P8LE-NEXT:    addi r4, r5, .LCPI41_0 at toc@l
 ; P8LE-NEXT:    addi r3, r3, -12
-; P8LE-NEXT:    lxvd2x v2, 0, r3
-; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
+; P8LE-NEXT:    xxswapd v3, vs1
+; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    blr
 entry:
@@ -2998,10 +3001,11 @@ define <4 x i32> @fromDiffMemConsDConvftoui(ptr nocapture readonly %ptr) {
 ; P8LE-LABEL: fromDiffMemConsDConvftoui:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    addis r4, r2, .LCPI50_0 at toc@ha
-; P8LE-NEXT:    lxvd2x v2, 0, r3
+; P8LE-NEXT:    lxvd2x vs0, 0, r3
 ; P8LE-NEXT:    addi r4, r4, .LCPI50_0 at toc@l
-; P8LE-NEXT:    lxvd2x vs0, 0, r4
-; P8LE-NEXT:    xxswapd v3, vs0
+; P8LE-NEXT:    lxvd2x vs1, 0, r4
+; P8LE-NEXT:    xxswapd v2, vs0
+; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    vperm v2, v2, v2, v3
 ; P8LE-NEXT:    xvcvspuxws v2, v2
 ; P8LE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 5d48a873295f..b6fe31cf7080 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -491,10 +491,10 @@ define dso_local <8 x i16> @testmrglb3(ptr nocapture readonly %a) local_unnamed_
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    lxsd v2, 0(r3)
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI12_0 at toc@ha
-; CHECK-P9-BE-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-P9-BE-NEXT:    xxlxor vs1, vs1, vs1
 ; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI12_0 at toc@l
-; CHECK-P9-BE-NEXT:    lxv vs1, 0(r3)
-; CHECK-P9-BE-NEXT:    xxperm v2, vs0, vs1
+; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-BE-NEXT:    xxperm v2, vs1, vs0
 ; CHECK-P9-BE-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: testmrglb3:

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
index 9cef5c37a2b8..562fb605040a 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
@@ -66,11 +66,11 @@ define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
 ; BE-LABEL: shufflevector_combine:
 ; BE:       # %bb.0: # %newFuncRoot
 ; BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; BE-NEXT:    xxlxor vs0, vs0, vs0
+; BE-NEXT:    xxlxor vs1, vs1, vs1
 ; BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; BE-NEXT:    lxv vs1, 0(r3)
+; BE-NEXT:    lxv vs0, 0(r3)
 ; BE-NEXT:    li r3, 0
-; BE-NEXT:    xxperm v2, vs0, vs1
+; BE-NEXT:    xxperm v2, vs1, vs0
 ; BE-NEXT:    vinsw v2, r3, 8
 ; BE-NEXT:    vpkuwum v2, v2, v2
 ; BE-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 204b3f1bde8f..5d49bcae0cea 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -44,15 +44,15 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-NEXT:    add 5, 3, 4
 ; P9BE-NEXT:    lxsdx 2, 3, 4
 ; P9BE-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
-; P9BE-NEXT:    xxlxor 0, 0, 0
+; P9BE-NEXT:    xxlxor 1, 1, 1
 ; P9BE-NEXT:    vspltisw 4, 8
 ; P9BE-NEXT:    lxsd 3, 4(5)
 ; P9BE-NEXT:    addi 3, 3, .LCPI0_0 at toc@l
 ; P9BE-NEXT:    vadduwm 4, 4, 4
-; P9BE-NEXT:    lxv 1, 0(3)
+; P9BE-NEXT:    lxv 0, 0(3)
 ; P9BE-NEXT:    addis 3, 2, .LCPI0_1 at toc@ha
 ; P9BE-NEXT:    addi 3, 3, .LCPI0_1 at toc@l
-; P9BE-NEXT:    xxperm 2, 0, 1
+; P9BE-NEXT:    xxperm 2, 1, 0
 ; P9BE-NEXT:    lxv 0, 0(3)
 ; P9BE-NEXT:    xxperm 3, 3, 0
 ; P9BE-NEXT:    vnegw 3, 3
@@ -285,10 +285,10 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-NEXT:    addis 3, 2, .LCPI2_1 at toc@ha
 ; P9BE-NEXT:    addi 3, 3, .LCPI2_1 at toc@l
 ; P9BE-NEXT:    xxperm 2, 0, 1
-; P9BE-NEXT:    lxv 1, 0(3)
+; P9BE-NEXT:    lxv 0, 0(3)
 ; P9BE-NEXT:    li 3, 0
-; P9BE-NEXT:    xxmrghw 0, 4, 2
-; P9BE-NEXT:    xxperm 3, 0, 1
+; P9BE-NEXT:    xxmrghw 2, 4, 2
+; P9BE-NEXT:    xxperm 3, 2, 0
 ; P9BE-NEXT:    xxspltw 2, 3, 1
 ; P9BE-NEXT:    vadduwm 2, 3, 2
 ; P9BE-NEXT:    vextuwlx 3, 3, 2
@@ -312,10 +312,10 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
 ; P9BE-AIX-NEXT:    lxsihzx 0, 3, 4
 ; P9BE-AIX-NEXT:    ld 3, L..C4(2) # %const.1
 ; P9BE-AIX-NEXT:    xxperm 2, 0, 1
-; P9BE-AIX-NEXT:    lxv 1, 0(3)
+; P9BE-AIX-NEXT:    lxv 0, 0(3)
 ; P9BE-AIX-NEXT:    li 3, 0
-; P9BE-AIX-NEXT:    xxmrghw 0, 4, 2
-; P9BE-AIX-NEXT:    xxperm 3, 0, 1
+; P9BE-AIX-NEXT:    xxmrghw 2, 4, 2
+; P9BE-AIX-NEXT:    xxperm 3, 2, 0
 ; P9BE-AIX-NEXT:    xxspltw 2, 3, 1
 ; P9BE-AIX-NEXT:    vadduwm 2, 3, 2
 ; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
@@ -395,13 +395,13 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9LE-NEXT:    vmrghb 2, 3, 2
 ; P9LE-NEXT:    addi 3, 3, .LCPI3_0 at toc@l
 ; P9LE-NEXT:    vmrglh 2, 2, 4
-; P9LE-NEXT:    lxv 1, 0(3)
+; P9LE-NEXT:    lxv 0, 0(3)
 ; P9LE-NEXT:    li 3, 0
 ; P9LE-NEXT:    vmrghb 3, 3, 5
 ; P9LE-NEXT:    xxmrglw 2, 2, 4
 ; P9LE-NEXT:    vmrglh 3, 3, 4
-; P9LE-NEXT:    xxmrglw 0, 4, 3
-; P9LE-NEXT:    xxperm 2, 0, 1
+; P9LE-NEXT:    xxmrglw 3, 4, 3
+; P9LE-NEXT:    xxperm 2, 3, 0
 ; P9LE-NEXT:    xxspltw 3, 2, 2
 ; P9LE-NEXT:    vadduwm 2, 2, 3
 ; P9LE-NEXT:    vextuwrx 3, 3, 2

diff  --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
index 50b4a079d151..b90b9aed44e7 100644
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -184,11 +184,12 @@ define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_none_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI2_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxvd2x v2, 0, r4
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI2_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
@@ -431,11 +432,12 @@ define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
 ; CHECK-LE-P8-LABEL: test_none_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI5_0 at toc@ha
-; CHECK-LE-P8-NEXT:    lxvd2x v2, 0, r4
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI5_0 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
+; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
index 10131f8f6931..ad4b112bebe0 100644
--- a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -389,10 +389,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-LE-P9-NEXT:    xxlxor vs1, vs1, vs1
+; CHECK-LE-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv vs2, 0(r3)
-; CHECK-LE-P9-NEXT:    xxperm vs0, vs1, vs2
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm vs0, vs2, vs1
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -411,10 +411,10 @@ define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-P9-NEXT:    xxlxor vs1, vs1, vs1
+; CHECK-BE-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI3_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-P9-NEXT:    xxperm vs0, vs1, vs2
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs0, vs2, vs1
 ; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;
@@ -470,14 +470,15 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-LE-P8-LABEL: test_none_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI4_0 at toc@ha
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    lxsdx v2, 0, r3
-; CHECK-LE-P8-NEXT:    lxvd2x v3, 0, r4
 ; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI4_1 at toc@ha
 ; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI4_0 at toc@l
 ; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI4_1 at toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
+; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
@@ -544,10 +545,10 @@ define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonl
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
-; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r3)
-; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    blr
@@ -603,10 +604,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-LE-P9-NEXT:    xxlxor vs1, vs1, vs1
+; CHECK-LE-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-LE-P9-NEXT:    lxv vs2, 0(r3)
-; CHECK-LE-P9-NEXT:    xxperm vs0, vs1, vs2
+; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT:    xxperm vs0, vs2, vs1
 ; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -625,10 +626,10 @@ define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
 ; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-P9-NEXT:    xxlxor vs1, vs1, vs1
+; CHECK-BE-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI5_0 at toc@l
-; CHECK-BE-P9-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-P9-NEXT:    xxperm vs0, vs1, vs2
+; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT:    xxperm vs0, vs2, vs1
 ; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-P9-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index a54c704aa040..e4a4761ffec9 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -203,13 +203,13 @@ define void @test2(ptr nocapture %Sink, ptr nocapture readonly %SrcPtr) {
 ;
 ; CHECK-P9-LABEL: test2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    lxv vs0, 0(r4)
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0 at toc@ha
-; CHECK-P9-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0 at toc@l
-; CHECK-P9-NEXT:    lxv vs2, 0(r4)
-; CHECK-P9-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-P9-NEXT:    xvcvuxddp vs0, vs1
+; CHECK-P9-NEXT:    lxv vs1, 0(r4)
+; CHECK-P9-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-P9-NEXT:    xvcvuxddp vs0, vs0
 ; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index 64022b5503e0..7a689f43dfa9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -80,13 +80,13 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtfprd f1, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-BE-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-BE-NEXT:    xvcvuxwsp v2, vs1
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-BE-NEXT:    xvcvuxwsp v2, vs0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <4 x i16>

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index 408953ad519e..a2c1523c97ee 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -24,24 +24,24 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtfprwz f1, r3
+; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-P9-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-P9-NEXT:    lxv vs2, 0(r3)
-; CHECK-P9-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-P9-NEXT:    xvcvuxddp v2, vs1
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-P9-NEXT:    xvcvuxddp v2, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-BE-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-BE-NEXT:    xvcvuxddp v2, vs1
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-BE-NEXT:    xvcvuxddp v2, vs0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <2 x i16>

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index 31436c5baa50..37e986d17e6f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -76,24 +76,24 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtfprwz f1, r3
+; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-P9-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-P9-NEXT:    lxv vs2, 0(r3)
-; CHECK-P9-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-P9-NEXT:    xvcvuxwsp v2, vs1
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-P9-NEXT:    xvcvuxwsp v2, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-BE-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-BE-NEXT:    xvcvuxwsp v2, vs1
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-BE-NEXT:    xvcvuxwsp v2, vs0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <4 x i8>

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 024c85cd7bcf..5dabe65fd68d 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -24,24 +24,24 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtfprwz f1, r3
+; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-P9-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-P9-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-P9-NEXT:    lxv vs2, 0(r3)
-; CHECK-P9-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-P9-NEXT:    xvcvuxddp v2, vs1
+; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-P9-NEXT:    xvcvuxddp v2, vs0
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtfprwz f1, r3
+; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor vs0, vs0, vs0
+; CHECK-BE-NEXT:    xxlxor vs2, vs2, vs2
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-NEXT:    lxv vs2, 0(r3)
-; CHECK-BE-NEXT:    xxperm vs1, vs0, vs2
-; CHECK-BE-NEXT:    xvcvuxddp v2, vs1
+; CHECK-BE-NEXT:    lxv vs1, 0(r3)
+; CHECK-BE-NEXT:    xxperm vs0, vs2, vs1
+; CHECK-BE-NEXT:    xvcvuxddp v2, vs0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i16 %a.coerce to <2 x i8>

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 9721231174f1..bc0390b1ed81 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -7833,10 +7833,10 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i16(<2 x i16> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_uitofp_v2f64_v2i16:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI173_0 at toc@ha
-; PC64LE9-NEXT:    xxlxor 0, 0, 0
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI173_0 at toc@l
-; PC64LE9-NEXT:    lxv 1, 0(3)
-; PC64LE9-NEXT:    xxperm 34, 0, 1
+; PC64LE9-NEXT:    lxv 0, 0(3)
+; PC64LE9-NEXT:    xxperm 34, 1, 0
 ; PC64LE9-NEXT:    xvcvuxddp 34, 34
 ; PC64LE9-NEXT:    blr
 entry:


        


More information about the llvm-commits mailing list