[llvm] 4388f4f - [DAG] Don't convert undef to 0 when creating buildvector

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 6 10:35:41 PST 2022


Author: David Green
Date: 2022-03-06T18:35:34Z
New Revision: 4388f4f77694579b088e302204ecfa5588486f06

URL: https://github.com/llvm/llvm-project/commit/4388f4f77694579b088e302204ecfa5588486f06
DIFF: https://github.com/llvm/llvm-project/commit/4388f4f77694579b088e302204ecfa5588486f06.diff

LOG: [DAG] Don't convert undef to 0 when creating buildvector

When inserting undef into buildvectors created from shuffles of
buildvectors, we convert elements to the largest needed type. This had
the effect of converting undef into 0, which isn't needed as the
buildvector implicitly truncates and trunc(zext(undef)) == undef.

Differential Revision: https://reviews.llvm.org/D121002

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
    llvm/test/CodeGen/PowerPC/vec-itofp.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
    llvm/test/CodeGen/Thumb2/mve-vst3.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 299ba51d606d6..b76f4711322f5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21639,9 +21639,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
   if (SVT != VT.getScalarType())
     for (SDValue &Op : Ops)
-      Op = TLI.isZExtFree(Op.getValueType(), SVT)
-               ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
-               : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
+      Op = Op.isUndef() ? DAG.getUNDEF(SVT)
+                        : (TLI.isZExtFree(Op.getValueType(), SVT)
+                               ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+                               : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
index fdd7cad78536b..51a1f02407e9f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s
 
 
-; CHECK: test1
+; CHECK-LABEL: test1
 ; CHECK: movi.16b v[[REG0:[0-9]+]], #0
 define <8 x i1> @test1() {
 entry:
@@ -14,16 +14,16 @@ entry:
   ret <8 x i1> %Shuff
 }
 
-; CHECK: lCPI1_0:
-; CHECK:          .byte   0                       ; 0x0
-; CHECK:          .byte   0                       ; 0x0
+; CHECK-LABEL: lCPI1_0:
 ; CHECK:          .byte   0                       ; 0x0
+; CHECK:          .space  1
 ; CHECK:          .byte   0                       ; 0x0
+; CHECK:          .space  1
 ; CHECK:          .byte   1                       ; 0x1
 ; CHECK:          .byte   0                       ; 0x0
 ; CHECK:          .byte   0                       ; 0x0
 ; CHECK:          .byte   0                       ; 0x0
-; CHECK: test2
+; CHECK-LABEL: test2
 ; CHECK: adrp    x[[REG2:[0-9]+]], lCPI1_0 at PAGE
 ; CHECK: ldr     d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0 at PAGEOFF]
 define <8 x i1>@test2() {
@@ -35,7 +35,7 @@ bb:
   ret <8 x i1> %Shuff
 }
 
-; CHECK: test3
+; CHECK-LABEL: test3
 ; CHECK: movi.4s v{{[0-9]+}}, #1
 define <16 x i1> @test3(i1* %ptr, i32 %v) {
 bb:
@@ -45,7 +45,7 @@ bb:
                  i32 14, i32 0>
   ret <16 x i1> %Shuff
 }
-; CHECK: lCPI3_0:
+; CHECK-LABEL: lCPI3_0:
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0
@@ -62,7 +62,7 @@ bb:
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0
-; CHECK: _test4:
+; CHECK-LABEL: _test4:
 ; CHECK:         adrp    x[[REG3:[0-9]+]], lCPI3_0 at PAGE
 ; CHECK:         ldr     q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0 at PAGEOFF]
 define <16 x i1> @test4(i1* %ptr, i32 %v) {

diff  --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
index b8544cb8aaf13..8274c493d6a28 100644
--- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll
@@ -307,34 +307,33 @@ define void @stest8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0 at toc@l
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_2 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_3 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_3 at toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
-; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
@@ -395,20 +394,19 @@ define void @stest4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_0 at toc@l
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v2
-; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index 594ed990711ad..f3454e9282e54 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -459,20 +459,19 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret(<4 x double>)
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v2
-; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <4 x i16>
@@ -564,34 +563,33 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret(<8 x double>)
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_2 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_2 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_3 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_3 at toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
-; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <8 x i16> %a to <8 x double>
@@ -730,52 +728,51 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret(<16 x double
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
-; CHECK-BE-NEXT:    lxv v1, 16(r4)
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
+; CHECK-BE-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
-; CHECK-BE-NEXT:    lxv v2, 0(r5)
+; CHECK-BE-NEXT:    lxv v3, 0(r5)
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_1 at toc@l
-; CHECK-BE-NEXT:    lxv v3, 0(r5)
-; CHECK-BE-NEXT:    vperm v0, v5, v4, v2
-; CHECK-BE-NEXT:    vperm v2, v5, v1, v2
-; CHECK-BE-NEXT:    vextsh2d v2, v2
-; CHECK-BE-NEXT:    vextsh2d v0, v0
-; CHECK-BE-NEXT:    xvcvsxddp vs2, v2
-; CHECK-BE-NEXT:    vperm v2, v5, v1, v3
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v0
-; CHECK-BE-NEXT:    vperm v0, v5, v4, v3
+; CHECK-BE-NEXT:    lxv v5, 0(r5)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_2 at toc@ha
+; CHECK-BE-NEXT:    vperm v4, v2, v2, v3
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_2 at toc@l
+; CHECK-BE-NEXT:    vextsh2d v4, v4
+; CHECK-BE-NEXT:    lxv v0, 0(r5)
+; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_3 at toc@ha
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v4
+; CHECK-BE-NEXT:    vperm v4, v2, v2, v5
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_3 at toc@l
+; CHECK-BE-NEXT:    lxv v1, 0(r5)
+; CHECK-BE-NEXT:    vextsh2d v4, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
+; CHECK-BE-NEXT:    vperm v4, v2, v2, v0
+; CHECK-BE-NEXT:    vperm v2, v2, v2, v1
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    vextsh2d v4, v4
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v4
+; CHECK-BE-NEXT:    lxv v4, 16(r4)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
-; CHECK-BE-NEXT:    vextsh2d v0, v0
 ; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
-; CHECK-BE-NEXT:    lxv v2, 0(r4)
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
-; CHECK-BE-NEXT:    xvcvsxddp vs1, v0
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
-; CHECK-BE-NEXT:    stxv vs2, 80(r3)
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
-; CHECK-BE-NEXT:    vperm v3, v4, v4, v2
-; CHECK-BE-NEXT:    vperm v2, v1, v1, v2
-; CHECK-BE-NEXT:    stxv vs3, 112(r3)
-; CHECK-BE-NEXT:    stxv vs1, 48(r3)
-; CHECK-BE-NEXT:    vextsh2d v3, v3
+; CHECK-BE-NEXT:    vperm v2, v4, v4, v3
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
+; CHECK-BE-NEXT:    xvcvsxddp vs4, v2
+; CHECK-BE-NEXT:    vperm v2, v4, v4, v5
+; CHECK-BE-NEXT:    vextsh2d v2, v2
+; CHECK-BE-NEXT:    xvcvsxddp vs5, v2
+; CHECK-BE-NEXT:    vperm v2, v4, v4, v0
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
-; CHECK-BE-NEXT:    xvcvsxddp vs4, v3
-; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    xvcvsxddp vs6, v2
-; CHECK-BE-NEXT:    vperm v4, v4, v4, v3
-; CHECK-BE-NEXT:    vperm v2, v1, v1, v3
-; CHECK-BE-NEXT:    stxv vs6, 64(r3)
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
-; CHECK-BE-NEXT:    vextsh2d v4, v4
+; CHECK-BE-NEXT:    vperm v2, v4, v4, v1
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
-; CHECK-BE-NEXT:    xvcvsxddp vs5, v4
 ; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
-; CHECK-BE-NEXT:    stxv vs7, 96(r3)
-; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a = load <16 x i16>, <16 x i16>* %0, align 32

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index 9493e571a8cc3..d26f1f2d29d8a 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -404,20 +404,19 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %a
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsb2w v3, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    vextsb2w v2, v2
 ; CHECK-BE-NEXT:    xvcvsxwsp vs1, v2
-; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -503,34 +502,33 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsb2w v3, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    vextsb2w v3, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs1, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    vextsb2w v3, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    vextsb2w v2, v2
 ; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
-; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <16 x i8> %a to <16 x float>

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index fdf42d2580c0b..23adab16eda91 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -492,20 +492,19 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret(<4 x double>)
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrwz v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0 at toc@l
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    vextsb2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v2
-; CHECK-BE-NEXT:    stxv vs1, 0(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i32 %a.coerce to <4 x i8>
@@ -600,34 +599,33 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret(<8 x double>)
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0 at toc@l
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_2 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_2 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_3 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_3 at toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs1, 48(r3)
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs2, 0(r3)
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    vextsb2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
-; CHECK-BE-NEXT:    stxv vs3, 32(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <8 x i8>
@@ -787,62 +785,61 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret(<16 x double
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0 at toc@ha
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0 at toc@l
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_1 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_1 at toc@l
-; CHECK-BE-NEXT:    vperm v4, v3, v2, v4
-; CHECK-BE-NEXT:    vextsb2d v4, v4
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v4
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2 at toc@l
-; CHECK-BE-NEXT:    vperm v4, v3, v2, v4
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
-; CHECK-BE-NEXT:    vextsb2d v4, v4
-; CHECK-BE-NEXT:    xvcvsxddp vs1, v4
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3 at toc@l
-; CHECK-BE-NEXT:    vperm v4, v3, v2, v4
-; CHECK-BE-NEXT:    stxv vs1, 48(r3)
-; CHECK-BE-NEXT:    vextsb2d v4, v4
-; CHECK-BE-NEXT:    xvcvsxddp vs2, v4
-; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    vextsb2d v3, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
+; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_4 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_4 at toc@l
-; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
-; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
+; CHECK-BE-NEXT:    stxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs3, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_5 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_5 at toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs3, 112(r3)
+; CHECK-BE-NEXT:    stxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs4, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_6 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_6 at toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    stxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs5, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_7 at toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_7 at toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs5, 32(r3)
+; CHECK-BE-NEXT:    stxv vs5, 80(r3)
 ; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs6, v3
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
-; CHECK-BE-NEXT:    stxv vs6, 64(r3)
+; CHECK-BE-NEXT:    stxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    vextsb2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
-; CHECK-BE-NEXT:    stxv vs7, 96(r3)
+; CHECK-BE-NEXT:    stxv vs7, 112(r3)
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = sitofp <16 x i8> %a to <16 x double>

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index d3e042a601205..5f29a419fd0ef 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -533,30 +533,27 @@ entry:
 define void @vst3_v2i8(<2 x i8> *%src, <6 x i8> *%dst) {
 ; CHECK-LABEL: vst3_v2i8:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-NEXT:    .pad #16
 ; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    ldrb r2, [r0]
-; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, sp
 ; CHECK-NEXT:    ldrb r3, [r0, #1]
 ; CHECK-NEXT:    ldrb.w r12, [r0, #2]
 ; CHECK-NEXT:    vmov q0[2], q0[0], r2, r3
 ; CHECK-NEXT:    ldrb.w lr, [r0, #3]
-; CHECK-NEXT:    vmov r4, s0
+; CHECK-NEXT:    vmov r2, s0
 ; CHECK-NEXT:    ldrb r5, [r0, #5]
-; CHECK-NEXT:    vmov.16 q0[0], r4
+; CHECK-NEXT:    vmov.16 q0[0], r2
 ; CHECK-NEXT:    ldrb r0, [r0, #4]
 ; CHECK-NEXT:    vmov.16 q0[1], r12
-; CHECK-NEXT:    mov r2, sp
 ; CHECK-NEXT:    vmov.16 q0[2], r0
 ; CHECK-NEXT:    add r0, sp, #8
 ; CHECK-NEXT:    vmov.16 q0[3], r3
 ; CHECK-NEXT:    vmov.16 q0[4], lr
 ; CHECK-NEXT:    vmov.16 q0[5], r5
-; CHECK-NEXT:    vmov.16 q0[6], r6
-; CHECK-NEXT:    vmov.16 q0[7], r6
-; CHECK-NEXT:    vstrb.16 q0, [r2]
+; CHECK-NEXT:    vstrb.16 q0, [r4]
 ; CHECK-NEXT:    vstrb.16 q0, [r0]
 ; CHECK-NEXT:    vldrh.u32 q0, [r0]
 ; CHECK-NEXT:    ldr r2, [sp]
@@ -564,7 +561,7 @@ define void @vst3_v2i8(<2 x i8> *%src, <6 x i8> *%dst) {
 ; CHECK-NEXT:    vmov r0, s2
 ; CHECK-NEXT:    strh r0, [r1, #4]
 ; CHECK-NEXT:    add sp, #16
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %s1 = getelementptr <2 x i8>, <2 x i8>* %src, i32 0
   %l1 = load <2 x i8>, <2 x i8>* %s1, align 4


        


More information about the llvm-commits mailing list