[llvm] a71ad6a - [DAG] visitINSERT_VECTOR_ELT - fold insert_vector_elt(scalar_to_vector(x),v,i) -> build_vector()

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 11 07:29:28 PDT 2022


Author: Simon Pilgrim
Date: 2022-06-11T15:29:22+01:00
New Revision: a71ad6a3c80d2a8526976c03d11bcb97f736ba52

URL: https://github.com/llvm/llvm-project/commit/a71ad6a3c80d2a8526976c03d11bcb97f736ba52
DIFF: https://github.com/llvm/llvm-project/commit/a71ad6a3c80d2a8526976c03d11bcb97f736ba52.diff

LOG: [DAG] visitINSERT_VECTOR_ELT - fold insert_vector_elt(scalar_to_vector(x),v,i) -> build_vector()

Allow scalar_to_vector nodes to be used for the start of a build_vector creation

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/load-and-splat.ll
    llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
    llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
    llvm/test/CodeGen/Thumb2/mve-vld3.ll
    llvm/test/CodeGen/Thumb2/mve-vst3.ll
    llvm/test/CodeGen/Thumb2/mve-vst4.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 35c39038d0128..6ba9a07952f30 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19406,6 +19406,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
       return UpdateBuildVector(Ops);
     }
 
+    if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) {
+      Ops.push_back(InVec.getOperand(0));
+      Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType()));
+      return UpdateBuildVector(Ops);
+    }
+
     if (InVec.isUndef()) {
       Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
       return UpdateBuildVector(Ops);

diff  --git a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
index b0716a57b318d..9b0b1e2b5bf09 100644
--- a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
+++ b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
@@ -30,15 +30,13 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 ;
 ; AIX-P8-32-LABEL: test_f2:
 ; AIX-P8-32:       # %bb.0:
-; AIX-P8-32-NEXT:    lwz r6, L..C0(r2) # %const.0
-; AIX-P8-32-NEXT:    li r7, 4
+; AIX-P8-32-NEXT:    li r6, 4
 ; AIX-P8-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-P8-32-NEXT:    lxsiwzx v0, 0, r4
-; AIX-P8-32-NEXT:    lxsiwzx v2, r3, r7
-; AIX-P8-32-NEXT:    lxsiwzx v5, r4, r7
-; AIX-P8-32-NEXT:    lxvw4x v4, 0, r6
-; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
-; AIX-P8-32-NEXT:    vperm v3, v0, v5, v4
+; AIX-P8-32-NEXT:    lxsiwzx v5, 0, r4
+; AIX-P8-32-NEXT:    lxsiwzx v2, r3, r6
+; AIX-P8-32-NEXT:    lxsiwzx v4, r4, r6
+; AIX-P8-32-NEXT:    vmrgow v2, v3, v2
+; AIX-P8-32-NEXT:    vmrgow v3, v5, v4
 ; AIX-P8-32-NEXT:    xvaddsp vs0, v2, v3
 ; AIX-P8-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-P8-32-NEXT:    xscvspdpn f0, vs0
@@ -57,17 +55,14 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
 ;
 ; AIX-P9-32-LABEL: test_f2:
 ; AIX-P9-32:       # %bb.0:
-; AIX-P9-32-NEXT:    lfiwzx f0, 0, r3
-; AIX-P9-32-NEXT:    lwz r3, 4(r3)
-; AIX-P9-32-NEXT:    xxsldwi vs0, f0, f0, 1
-; AIX-P9-32-NEXT:    mtfprwz f1, r3
-; AIX-P9-32-NEXT:    lwz r3, 4(r4)
-; AIX-P9-32-NEXT:    xxinsertw vs0, vs1, 4
-; AIX-P9-32-NEXT:    lfiwzx f1, 0, r4
-; AIX-P9-32-NEXT:    mtfprwz f2, r3
-; AIX-P9-32-NEXT:    xxsldwi vs1, f1, f1, 1
-; AIX-P9-32-NEXT:    xxinsertw vs1, vs2, 4
-; AIX-P9-32-NEXT:    xvaddsp vs0, vs0, vs1
+; AIX-P9-32-NEXT:    li r6, 4
+; AIX-P9-32-NEXT:    lxsiwzx v3, 0, r3
+; AIX-P9-32-NEXT:    lxsiwzx v4, 0, r4
+; AIX-P9-32-NEXT:    lxsiwzx v2, r3, r6
+; AIX-P9-32-NEXT:    vmrgow v2, v3, v2
+; AIX-P9-32-NEXT:    lxsiwzx v3, r4, r6
+; AIX-P9-32-NEXT:    vmrgow v3, v4, v3
+; AIX-P9-32-NEXT:    xvaddsp vs0, v2, v3
 ; AIX-P9-32-NEXT:    xscvspdpn f1, vs0
 ; AIX-P9-32-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; AIX-P9-32-NEXT:    xscvspdpn f0, vs0

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 43400d458485d..633befec208de 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -529,18 +529,16 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un
 ;
 ; P8-AIX-32-LABEL: testmrglb3:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r5, 4(r3)
-; P8-AIX-32-NEXT:    lwz r4, L..C0(r2) # %const.0
-; P8-AIX-32-NEXT:    stw r5, -32(r1)
-; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX-32-NEXT:    addi r4, r1, -16
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
-; P8-AIX-32-NEXT:    addi r3, r1, -32
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX-32-NEXT:    vperm v2, v4, v3, v2
+; P8-AIX-32-NEXT:    lwz r4, 4(r3)
 ; P8-AIX-32-NEXT:    xxlxor v3, v3, v3
+; P8-AIX-32-NEXT:    stw r4, -16(r1)
+; P8-AIX-32-NEXT:    addi r4, r1, -32
+; P8-AIX-32-NEXT:    lwz r3, 0(r3)
+; P8-AIX-32-NEXT:    stw r3, -32(r1)
+; P8-AIX-32-NEXT:    addi r3, r1, -16
+; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
+; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r4
+; P8-AIX-32-NEXT:    xxmrghw v2, vs1, vs0
 ; P8-AIX-32-NEXT:    vmrghb v2, v3, v2
 ; P8-AIX-32-NEXT:    blr
 entry:
@@ -706,7 +704,7 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) {
 ;
 ; P8-AIX-32-LABEL: no_crash_bitcast:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, L..C1(r2) # %const.0
+; P8-AIX-32-NEXT:    lwz r4, L..C0(r2) # %const.0
 ; P8-AIX-32-NEXT:    stw r3, -16(r1)
 ; P8-AIX-32-NEXT:    addi r3, r1, -16
 ; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
@@ -780,8 +778,8 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
 ;
 ; P8-AIX-32-LABEL: replace_undefs_in_splat:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r3, L..C2(r2) # %const.0
-; P8-AIX-32-NEXT:    lwz r4, L..C3(r2) # %const.1
+; P8-AIX-32-NEXT:    lwz r3, L..C1(r2) # %const.0
+; P8-AIX-32-NEXT:    lwz r4, L..C2(r2) # %const.1
 ; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
 ; P8-AIX-32-NEXT:    lxvw4x v4, 0, r4
 ; P8-AIX-32-NEXT:    vperm v2, v2, v4, v3
@@ -1025,18 +1023,16 @@ define dso_local <2 x i64> @testSplat8(<8 x i8>* nocapture readonly %ptr) local_
 ;
 ; P8-AIX-32-LABEL: testSplat8:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r5, 4(r3)
-; P8-AIX-32-NEXT:    lwz r4, L..C4(r2) # %const.0
-; P8-AIX-32-NEXT:    stw r5, -32(r1)
+; P8-AIX-32-NEXT:    lwz r4, 4(r3)
+; P8-AIX-32-NEXT:    stw r4, -16(r1)
+; P8-AIX-32-NEXT:    addi r4, r1, -32
 ; P8-AIX-32-NEXT:    lwz r3, 0(r3)
-; P8-AIX-32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX-32-NEXT:    addi r4, r1, -16
-; P8-AIX-32-NEXT:    stw r3, -16(r1)
-; P8-AIX-32-NEXT:    addi r3, r1, -32
-; P8-AIX-32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX-32-NEXT:    vperm v2, v4, v3, v2
-; P8-AIX-32-NEXT:    xxmrghd v2, v2, v2
+; P8-AIX-32-NEXT:    stw r3, -32(r1)
+; P8-AIX-32-NEXT:    addi r3, r1, -16
+; P8-AIX-32-NEXT:    lxvw4x vs0, 0, r3
+; P8-AIX-32-NEXT:    lxvw4x vs1, 0, r4
+; P8-AIX-32-NEXT:    xxmrghw vs0, vs1, vs0
+; P8-AIX-32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX-32-NEXT:    blr
 entry:
   %0 = load <8 x i8>, <8 x i8>* %ptr, align 8
@@ -1082,7 +1078,7 @@ define <2 x i64> @testSplati64_0(<1 x i64>* nocapture readonly %ptr) #0 {
 ;
 ; P8-AIX-32-LABEL: testSplati64_0:
 ; P8-AIX-32:       # %bb.0: # %entry
-; P8-AIX-32-NEXT:    lwz r4, L..C5(r2) # %const.0
+; P8-AIX-32-NEXT:    lwz r4, L..C3(r2) # %const.0
 ; P8-AIX-32-NEXT:    lwz r5, 4(r3)
 ; P8-AIX-32-NEXT:    lwz r3, 0(r3)
 ; P8-AIX-32-NEXT:    stw r5, -16(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index 699f5a8c60b7b..5eb1810ac55d2 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -811,45 +811,42 @@ define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
 ;
 ; P9-AIX32-LABEL: unadjusted_lxvdsx:
 ; P9-AIX32:       # %bb.0: # %entry
-; P9-AIX32-NEXT:    lwz r4, 0(r3)
+; P9-AIX32-NEXT:    lwz r4, 4(r3)
 ; P9-AIX32-NEXT:    stw r4, -16(r1)
-; P9-AIX32-NEXT:    lwz r3, 4(r3)
-; P9-AIX32-NEXT:    lxv vs1, -16(r1)
-; P9-AIX32-NEXT:    mtfprwz f0, r3
-; P9-AIX32-NEXT:    xxinsertw vs1, vs0, 4
-; P9-AIX32-NEXT:    xxmrghd v2, vs1, vs1
+; P9-AIX32-NEXT:    lwz r3, 0(r3)
+; P9-AIX32-NEXT:    lxv vs0, -16(r1)
+; P9-AIX32-NEXT:    stw r3, -32(r1)
+; P9-AIX32-NEXT:    lxv vs1, -32(r1)
+; P9-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
+; P9-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P9-AIX32-NEXT:    blr
 ;
 ; P8-AIX32-LABEL: unadjusted_lxvdsx:
 ; P8-AIX32:       # %bb.0: # %entry
-; P8-AIX32-NEXT:    lwz r5, 4(r3)
-; P8-AIX32-NEXT:    lwz r4, L..C3(r2) # %const.0
-; P8-AIX32-NEXT:    stw r5, -32(r1)
+; P8-AIX32-NEXT:    lwz r4, 4(r3)
+; P8-AIX32-NEXT:    stw r4, -16(r1)
+; P8-AIX32-NEXT:    addi r4, r1, -32
 ; P8-AIX32-NEXT:    lwz r3, 0(r3)
-; P8-AIX32-NEXT:    lxvw4x v2, 0, r4
-; P8-AIX32-NEXT:    addi r4, r1, -16
-; P8-AIX32-NEXT:    stw r3, -16(r1)
-; P8-AIX32-NEXT:    addi r3, r1, -32
-; P8-AIX32-NEXT:    lxvw4x v3, 0, r3
-; P8-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P8-AIX32-NEXT:    vperm v2, v4, v3, v2
-; P8-AIX32-NEXT:    xxmrghd v2, v2, v2
+; P8-AIX32-NEXT:    stw r3, -32(r1)
+; P8-AIX32-NEXT:    addi r3, r1, -16
+; P8-AIX32-NEXT:    lxvw4x vs0, 0, r3
+; P8-AIX32-NEXT:    lxvw4x vs1, 0, r4
+; P8-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
+; P8-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P8-AIX32-NEXT:    blr
 ;
 ; P7-AIX32-LABEL: unadjusted_lxvdsx:
 ; P7-AIX32:       # %bb.0: # %entry
 ; P7-AIX32-NEXT:    lwz r5, 4(r3)
-; P7-AIX32-NEXT:    lwz r4, L..C3(r2) # %const.0
-; P7-AIX32-NEXT:    stw r5, -32(r1)
-; P7-AIX32-NEXT:    lwz r3, 0(r3)
-; P7-AIX32-NEXT:    lxvw4x v2, 0, r4
 ; P7-AIX32-NEXT:    addi r4, r1, -16
-; P7-AIX32-NEXT:    stw r3, -16(r1)
+; P7-AIX32-NEXT:    stw r5, -16(r1)
+; P7-AIX32-NEXT:    lwz r3, 0(r3)
+; P7-AIX32-NEXT:    stw r3, -32(r1)
 ; P7-AIX32-NEXT:    addi r3, r1, -32
-; P7-AIX32-NEXT:    lxvw4x v3, 0, r3
-; P7-AIX32-NEXT:    lxvw4x v4, 0, r4
-; P7-AIX32-NEXT:    vperm v2, v4, v3, v2
-; P7-AIX32-NEXT:    xxmrghd v2, v2, v2
+; P7-AIX32-NEXT:    lxvw4x vs0, 0, r4
+; P7-AIX32-NEXT:    lxvw4x vs1, 0, r3
+; P7-AIX32-NEXT:    xxmrghw vs0, vs1, vs0
+; P7-AIX32-NEXT:    xxmrghd v2, vs0, vs0
 ; P7-AIX32-NEXT:    blr
   entry:
     %0 = bitcast i64* %s to <8 x i8>*

diff  --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index d0db193d08426..ad7891c691ea3 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -86,22 +86,22 @@ define void @test64(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-AIX32-LABEL: test64:
 ; P9BE-AIX32:       # %bb.0: # %entry
 ; P9BE-AIX32-NEXT:    lwzux 4, 3, 4
-; P9BE-AIX32-NEXT:    lwz 5, L..C0(2) # %const.0
 ; P9BE-AIX32-NEXT:    xxlxor 4, 4, 4
-; P9BE-AIX32-NEXT:    lxv 3, 0(5)
-; P9BE-AIX32-NEXT:    stw 4, -32(1)
+; P9BE-AIX32-NEXT:    stw 4, -48(1)
 ; P9BE-AIX32-NEXT:    lwz 4, 4(3)
-; P9BE-AIX32-NEXT:    lxv 2, -32(1)
-; P9BE-AIX32-NEXT:    stw 4, -16(1)
-; P9BE-AIX32-NEXT:    mtfprwz 0, 4
+; P9BE-AIX32-NEXT:    lxv 0, -48(1)
+; P9BE-AIX32-NEXT:    stw 4, -32(1)
+; P9BE-AIX32-NEXT:    lwz 4, L..C0(2) # %const.0
 ; P9BE-AIX32-NEXT:    lwz 3, 8(3)
-; P9BE-AIX32-NEXT:    xxinsertw 2, 0, 4
-; P9BE-AIX32-NEXT:    mtfprwz 0, 3
+; P9BE-AIX32-NEXT:    lxv 1, -32(1)
+; P9BE-AIX32-NEXT:    lxv 3, 0(4)
+; P9BE-AIX32-NEXT:    stw 3, -16(1)
 ; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT:    xxmrghw 2, 0, 1
+; P9BE-AIX32-NEXT:    lxv 0, -16(1)
 ; P9BE-AIX32-NEXT:    vperm 2, 4, 2, 3
-; P9BE-AIX32-NEXT:    lxv 3, -16(1)
 ; P9BE-AIX32-NEXT:    lxv 4, 0(3)
-; P9BE-AIX32-NEXT:    xxinsertw 3, 0, 4
+; P9BE-AIX32-NEXT:    xxmrghw 3, 1, 0
 ; P9BE-AIX32-NEXT:    vperm 3, 3, 3, 4
 ; P9BE-AIX32-NEXT:    vspltisw 4, 8
 ; P9BE-AIX32-NEXT:    vnegw 3, 3

diff  --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
index 5034778592a5c..2c5bc80b79659 100644
--- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
+++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
@@ -68,15 +68,13 @@ define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x fl
 ;
 ; AIX-32-LABEL: test2:
 ; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    lwz r5, L..C0(r2) # %const.0
-; AIX-32-NEXT:    li r6, 4
+; AIX-32-NEXT:    li r5, 4
 ; AIX-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-32-NEXT:    lxsiwzx v0, 0, r4
-; AIX-32-NEXT:    lxsiwzx v2, r3, r6
-; AIX-32-NEXT:    lxsiwzx v5, r4, r6
-; AIX-32-NEXT:    lxvw4x v4, 0, r5
-; AIX-32-NEXT:    vperm v2, v3, v2, v4
-; AIX-32-NEXT:    vperm v3, v0, v5, v4
+; AIX-32-NEXT:    lxsiwzx v5, 0, r4
+; AIX-32-NEXT:    lxsiwzx v2, r3, r5
+; AIX-32-NEXT:    lxsiwzx v4, r4, r5
+; AIX-32-NEXT:    vmrgow v2, v3, v2
+; AIX-32-NEXT:    vmrgow v3, v5, v4
 ; AIX-32-NEXT:    xvsubsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0
@@ -114,15 +112,13 @@ define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x fl
 ;
 ; AIX-32-LABEL: test3:
 ; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    lwz r5, L..C1(r2) # %const.0
-; AIX-32-NEXT:    li r6, 4
+; AIX-32-NEXT:    li r5, 4
 ; AIX-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-32-NEXT:    lxsiwzx v0, 0, r4
-; AIX-32-NEXT:    lxsiwzx v2, r3, r6
-; AIX-32-NEXT:    lxsiwzx v5, r4, r6
-; AIX-32-NEXT:    lxvw4x v4, 0, r5
-; AIX-32-NEXT:    vperm v2, v3, v2, v4
-; AIX-32-NEXT:    vperm v3, v0, v5, v4
+; AIX-32-NEXT:    lxsiwzx v5, 0, r4
+; AIX-32-NEXT:    lxsiwzx v2, r3, r5
+; AIX-32-NEXT:    lxsiwzx v4, r4, r5
+; AIX-32-NEXT:    vmrgow v2, v3, v2
+; AIX-32-NEXT:    vmrgow v3, v5, v4
 ; AIX-32-NEXT:    xvaddsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0
@@ -160,15 +156,13 @@ define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x fl
 ;
 ; AIX-32-LABEL: test4:
 ; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    lwz r5, L..C2(r2) # %const.0
-; AIX-32-NEXT:    li r6, 4
+; AIX-32-NEXT:    li r5, 4
 ; AIX-32-NEXT:    lxsiwzx v3, 0, r3
-; AIX-32-NEXT:    lxsiwzx v0, 0, r4
-; AIX-32-NEXT:    lxsiwzx v2, r3, r6
-; AIX-32-NEXT:    lxsiwzx v5, r4, r6
-; AIX-32-NEXT:    lxvw4x v4, 0, r5
-; AIX-32-NEXT:    vperm v2, v3, v2, v4
-; AIX-32-NEXT:    vperm v3, v0, v5, v4
+; AIX-32-NEXT:    lxsiwzx v5, 0, r4
+; AIX-32-NEXT:    lxsiwzx v2, r3, r5
+; AIX-32-NEXT:    lxsiwzx v4, r4, r5
+; AIX-32-NEXT:    vmrgow v2, v3, v2
+; AIX-32-NEXT:    vmrgow v3, v5, v4
 ; AIX-32-NEXT:    xvmulsp vs0, v2, v3
 ; AIX-32-NEXT:    xxsldwi vs1, vs0, vs0, 1
 ; AIX-32-NEXT:    xscvspdpn f0, vs0
@@ -215,7 +209,7 @@ define dso_local <2 x double> @test5(<2 x double> %a) {
 ;
 ; AIX-32-LABEL: test5:
 ; AIX-32:       # %bb.0: # %entry
-; AIX-32-NEXT:    lwz r3, L..C3(r2) # @G
+; AIX-32-NEXT:    lwz r3, L..C0(r2) # @G
 ; AIX-32-NEXT:    lfs f0, 4(r3)
 ; AIX-32-NEXT:    lfs f1, 0(r3)
 ; AIX-32-NEXT:    xxmrghd vs0, vs1, vs0
@@ -284,7 +278,7 @@ define dso_local i32 @test6() #0 {
 ;
 ; AIX-32-LABEL: test6:
 ; AIX-32:       # %bb.0: # %bb
-; AIX-32-NEXT:    lwz r3, L..C4(r2) # @Glob1
+; AIX-32-NEXT:    lwz r3, L..C1(r2) # @Glob1
 ; AIX-32-NEXT:    lis r4, 8
 ; AIX-32-NEXT:    ori r4, r4, 38248
 ; AIX-32-NEXT:    lfsux f0, r3, r4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vld3.ll b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
index 6d14b7020a1af..888053d994f4f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vld3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
@@ -1025,9 +1025,8 @@ define void @vld3_v2f16(<6 x half> *%src, <2 x half> *%dst) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    ldrd r2, r3, [r0]
 ; CHECK-NEXT:    ldr r0, [r0, #8]
-; CHECK-NEXT:    vmov.32 q0[0], r2
 ; CHECK-NEXT:    vmov.32 q0[1], r3
-; CHECK-NEXT:    vmov.32 q0[2], r0
+; CHECK-NEXT:    vmov q0[2], q0[0], r2, r0
 ; CHECK-NEXT:    vmovx.f16 s8, s0
 ; CHECK-NEXT:    vmovx.f16 s4, s2
 ; CHECK-NEXT:    vins.f16 s8, s2

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index 40efd047e98a0..4b28c2b07cacc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -1235,39 +1235,36 @@ entry:
 define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
 ; CHECK-LABEL: vst3_v4f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    ldrd r2, r12, [r0]
-; CHECK-NEXT:    ldrd r3, lr, [r0, #8]
-; CHECK-NEXT:    vmov.32 q0[0], r2
-; CHECK-NEXT:    ldrd r2, r0, [r0, #16]
-; CHECK-NEXT:    vmov.32 q1[0], r3
-; CHECK-NEXT:    vmov.32 q0[1], r12
-; CHECK-NEXT:    vmov.32 q1[1], lr
-; CHECK-NEXT:    vmov.f32 s8, s1
-; CHECK-NEXT:    vmov.f32 s3, s5
-; CHECK-NEXT:    vins.f16 s8, s5
-; CHECK-NEXT:    vmov.f32 s2, s4
-; CHECK-NEXT:    vmov.32 q1[0], r2
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    ldrd lr, r12, [r0]
+; CHECK-NEXT:    ldrd r3, r2, [r0, #8]
+; CHECK-NEXT:    ldrd r4, r0, [r0, #16]
+; CHECK-NEXT:    vmov q0[2], q0[0], lr, r3
+; CHECK-NEXT:    vmov.32 q1[0], r4
+; CHECK-NEXT:    vmov q0[3], q0[1], r12, r2
 ; CHECK-NEXT:    vmov.32 q1[1], r0
-; CHECK-NEXT:    vmovx.f16 s13, s3
+; CHECK-NEXT:    vmovx.f16 s9, s3
 ; CHECK-NEXT:    vmovx.f16 s6, s0
 ; CHECK-NEXT:    vins.f16 s0, s2
-; CHECK-NEXT:    vmovx.f16 s10, s4
+; CHECK-NEXT:    vmovx.f16 s8, s4
 ; CHECK-NEXT:    vmovx.f16 s2, s2
 ; CHECK-NEXT:    vins.f16 s4, s6
 ; CHECK-NEXT:    vmovx.f16 s6, s1
-; CHECK-NEXT:    vins.f16 s2, s10
-; CHECK-NEXT:    vmovx.f16 s10, s5
+; CHECK-NEXT:    vins.f16 s2, s8
+; CHECK-NEXT:    vmovx.f16 s8, s5
 ; CHECK-NEXT:    vins.f16 s5, s6
-; CHECK-NEXT:    vins.f16 s13, s10
-; CHECK-NEXT:    vmov.f32 s12, s5
-; CHECK-NEXT:    vmov.f32 s1, s4
-; CHECK-NEXT:    vmov.f32 s3, s8
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    vmov r0, r2, d6
+; CHECK-NEXT:    vins.f16 s9, s8
+; CHECK-NEXT:    vmov.f32 s8, s5
+; CHECK-NEXT:    vins.f16 s1, s3
+; CHECK-NEXT:    vmov r0, r2, d4
+; CHECK-NEXT:    vmov q2, q0
+; CHECK-NEXT:    vmov.f32 s9, s4
+; CHECK-NEXT:    vmov.f32 s10, s2
+; CHECK-NEXT:    vmov.f32 s11, s1
+; CHECK-NEXT:    vstrw.32 q2, [r1]
 ; CHECK-NEXT:    strd r0, r2, [r1, #16]
-; CHECK-NEXT:    pop {r7, pc}
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
   %l1 = load <4 x half>, <4 x half>* %s1, align 4

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
index 8c374e77bfcd8..da969e01258d8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
@@ -1087,45 +1087,41 @@ entry:
 define void @vst4_v4f16(<4 x half> *%src, <16 x half> *%dst) {
 ; CHECK-LABEL: vst4_v4f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    add.w lr, r0, #16
-; CHECK-NEXT:    ldr r2, [r0, #28]
-; CHECK-NEXT:    ldm.w lr, {r3, r12, lr}
-; CHECK-NEXT:    vmov.32 q1[0], lr
-; CHECK-NEXT:    vmov.32 q1[1], r2
-; CHECK-NEXT:    vmov.32 q0[0], r3
-; CHECK-NEXT:    vmov.32 q0[1], r12
-; CHECK-NEXT:    ldrd r2, r12, [r0]
-; CHECK-NEXT:    ldrd r3, r0, [r0, #8]
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    add.w r6, r0, #16
+; CHECK-NEXT:    ldrd lr, r12, [r0]
+; CHECK-NEXT:    ldrd r3, r2, [r0, #8]
+; CHECK-NEXT:    ldm r6, {r4, r5, r6}
+; CHECK-NEXT:    vmov q1[2], q1[0], lr, r3
+; CHECK-NEXT:    ldr r0, [r0, #28]
+; CHECK-NEXT:    vmov q1[3], q1[1], r12, r2
+; CHECK-NEXT:    vmov q0[2], q0[0], r4, r6
+; CHECK-NEXT:    vmovx.f16 s10, s5
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r0
+; CHECK-NEXT:    vins.f16 s5, s7
 ; CHECK-NEXT:    vmovx.f16 s12, s0
-; CHECK-NEXT:    vmovx.f16 s2, s4
-; CHECK-NEXT:    vmov.f32 s3, s5
-; CHECK-NEXT:    vmov.32 q2[0], r3
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmov.32 q1[0], r2
-; CHECK-NEXT:    vmov.32 q2[1], r0
-; CHECK-NEXT:    vmov.32 q1[1], r12
-; CHECK-NEXT:    vins.f16 s12, s2
-; CHECK-NEXT:    vmovx.f16 s6, s4
-; CHECK-NEXT:    vmovx.f16 s2, s8
-; CHECK-NEXT:    vins.f16 s6, s2
+; CHECK-NEXT:    vins.f16 s0, s2
+; CHECK-NEXT:    vmovx.f16 s2, s2
 ; CHECK-NEXT:    vmovx.f16 s11, s1
+; CHECK-NEXT:    vins.f16 s12, s2
 ; CHECK-NEXT:    vmovx.f16 s2, s3
-; CHECK-NEXT:    vmovx.f16 s10, s5
 ; CHECK-NEXT:    vins.f16 s11, s2
-; CHECK-NEXT:    vmovx.f16 s2, s9
+; CHECK-NEXT:    vmovx.f16 s2, s4
+; CHECK-NEXT:    vins.f16 s4, s6
+; CHECK-NEXT:    vmovx.f16 s6, s6
 ; CHECK-NEXT:    vins.f16 s1, s3
-; CHECK-NEXT:    vins.f16 s5, s9
-; CHECK-NEXT:    vins.f16 s4, s8
+; CHECK-NEXT:    vins.f16 s2, s6
+; CHECK-NEXT:    vmovx.f16 s6, s7
 ; CHECK-NEXT:    vmov.f32 s8, s5
-; CHECK-NEXT:    vins.f16 s10, s2
+; CHECK-NEXT:    vins.f16 s10, s6
 ; CHECK-NEXT:    vmov.f32 s9, s1
 ; CHECK-NEXT:    vmov.f32 s5, s0
 ; CHECK-NEXT:    vstrh.16 q2, [r1, #16]
+; CHECK-NEXT:    vmov.f32 s6, s2
 ; CHECK-NEXT:    vmov.f32 s7, s12
 ; CHECK-NEXT:    vstrh.16 q1, [r1]
-; CHECK-NEXT:    pop {r7, pc}
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
 entry:
   %s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
   %l1 = load <4 x half>, <4 x half>* %s1, align 4


        


More information about the llvm-commits mailing list