[llvm] a71ad6a - [DAG] visitINSERT_VECTOR_ELT - fold insert_vector_elt(scalar_to_vector(x),v,i) -> build_vector()
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 11 07:29:28 PDT 2022
Author: Simon Pilgrim
Date: 2022-06-11T15:29:22+01:00
New Revision: a71ad6a3c80d2a8526976c03d11bcb97f736ba52
URL: https://github.com/llvm/llvm-project/commit/a71ad6a3c80d2a8526976c03d11bcb97f736ba52
DIFF: https://github.com/llvm/llvm-project/commit/a71ad6a3c80d2a8526976c03d11bcb97f736ba52.diff
LOG: [DAG] visitINSERT_VECTOR_ELT - fold insert_vector_elt(scalar_to_vector(x),v,i) -> build_vector()
Allow scalar_to_vector nodes to be used for the start of a build_vector creation
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
llvm/test/CodeGen/PowerPC/load-and-splat.ll
llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
llvm/test/CodeGen/Thumb2/mve-vld3.ll
llvm/test/CodeGen/Thumb2/mve-vst3.ll
llvm/test/CodeGen/Thumb2/mve-vst4.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 35c39038d0128..6ba9a07952f30 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19406,6 +19406,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return UpdateBuildVector(Ops);
}
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) {
+ Ops.push_back(InVec.getOperand(0));
+ Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType()));
+ return UpdateBuildVector(Ops);
+ }
+
if (InVec.isUndef()) {
Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
return UpdateBuildVector(Ops);
diff --git a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
index b0716a57b318d..9b0b1e2b5bf09 100644
--- a/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
+++ b/llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll
@@ -30,15 +30,13 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
;
; AIX-P8-32-LABEL: test_f2:
; AIX-P8-32: # %bb.0:
-; AIX-P8-32-NEXT: lwz r6, L..C0(r2) # %const.0
-; AIX-P8-32-NEXT: li r7, 4
+; AIX-P8-32-NEXT: li r6, 4
; AIX-P8-32-NEXT: lxsiwzx v3, 0, r3
-; AIX-P8-32-NEXT: lxsiwzx v0, 0, r4
-; AIX-P8-32-NEXT: lxsiwzx v2, r3, r7
-; AIX-P8-32-NEXT: lxsiwzx v5, r4, r7
-; AIX-P8-32-NEXT: lxvw4x v4, 0, r6
-; AIX-P8-32-NEXT: vperm v2, v3, v2, v4
-; AIX-P8-32-NEXT: vperm v3, v0, v5, v4
+; AIX-P8-32-NEXT: lxsiwzx v5, 0, r4
+; AIX-P8-32-NEXT: lxsiwzx v2, r3, r6
+; AIX-P8-32-NEXT: lxsiwzx v4, r4, r6
+; AIX-P8-32-NEXT: vmrgow v2, v3, v2
+; AIX-P8-32-NEXT: vmrgow v3, v5, v4
; AIX-P8-32-NEXT: xvaddsp vs0, v2, v3
; AIX-P8-32-NEXT: xxsldwi vs1, vs0, vs0, 1
; AIX-P8-32-NEXT: xscvspdpn f0, vs0
@@ -57,17 +55,14 @@ define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
;
; AIX-P9-32-LABEL: test_f2:
; AIX-P9-32: # %bb.0:
-; AIX-P9-32-NEXT: lfiwzx f0, 0, r3
-; AIX-P9-32-NEXT: lwz r3, 4(r3)
-; AIX-P9-32-NEXT: xxsldwi vs0, f0, f0, 1
-; AIX-P9-32-NEXT: mtfprwz f1, r3
-; AIX-P9-32-NEXT: lwz r3, 4(r4)
-; AIX-P9-32-NEXT: xxinsertw vs0, vs1, 4
-; AIX-P9-32-NEXT: lfiwzx f1, 0, r4
-; AIX-P9-32-NEXT: mtfprwz f2, r3
-; AIX-P9-32-NEXT: xxsldwi vs1, f1, f1, 1
-; AIX-P9-32-NEXT: xxinsertw vs1, vs2, 4
-; AIX-P9-32-NEXT: xvaddsp vs0, vs0, vs1
+; AIX-P9-32-NEXT: li r6, 4
+; AIX-P9-32-NEXT: lxsiwzx v3, 0, r3
+; AIX-P9-32-NEXT: lxsiwzx v4, 0, r4
+; AIX-P9-32-NEXT: lxsiwzx v2, r3, r6
+; AIX-P9-32-NEXT: vmrgow v2, v3, v2
+; AIX-P9-32-NEXT: lxsiwzx v3, r4, r6
+; AIX-P9-32-NEXT: vmrgow v3, v4, v3
+; AIX-P9-32-NEXT: xvaddsp vs0, v2, v3
; AIX-P9-32-NEXT: xscvspdpn f1, vs0
; AIX-P9-32-NEXT: xxsldwi vs0, vs0, vs0, 1
; AIX-P9-32-NEXT: xscvspdpn f0, vs0
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 43400d458485d..633befec208de 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -529,18 +529,16 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un
;
; P8-AIX-32-LABEL: testmrglb3:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r5, 4(r3)
-; P8-AIX-32-NEXT: lwz r4, L..C0(r2) # %const.0
-; P8-AIX-32-NEXT: stw r5, -32(r1)
-; P8-AIX-32-NEXT: lwz r3, 0(r3)
-; P8-AIX-32-NEXT: lxvw4x v2, 0, r4
-; P8-AIX-32-NEXT: addi r4, r1, -16
-; P8-AIX-32-NEXT: stw r3, -16(r1)
-; P8-AIX-32-NEXT: addi r3, r1, -32
-; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT: lxvw4x v4, 0, r4
-; P8-AIX-32-NEXT: vperm v2, v4, v3, v2
+; P8-AIX-32-NEXT: lwz r4, 4(r3)
; P8-AIX-32-NEXT: xxlxor v3, v3, v3
+; P8-AIX-32-NEXT: stw r4, -16(r1)
+; P8-AIX-32-NEXT: addi r4, r1, -32
+; P8-AIX-32-NEXT: lwz r3, 0(r3)
+; P8-AIX-32-NEXT: stw r3, -32(r1)
+; P8-AIX-32-NEXT: addi r3, r1, -16
+; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3
+; P8-AIX-32-NEXT: lxvw4x vs1, 0, r4
+; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0
; P8-AIX-32-NEXT: vmrghb v2, v3, v2
; P8-AIX-32-NEXT: blr
entry:
@@ -706,7 +704,7 @@ define dso_local <16 x i8> @no_crash_bitcast(i32 %a) {
;
; P8-AIX-32-LABEL: no_crash_bitcast:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r4, L..C1(r2) # %const.0
+; P8-AIX-32-NEXT: lwz r4, L..C0(r2) # %const.0
; P8-AIX-32-NEXT: stw r3, -16(r1)
; P8-AIX-32-NEXT: addi r3, r1, -16
; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
@@ -780,8 +778,8 @@ define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_
;
; P8-AIX-32-LABEL: replace_undefs_in_splat:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r3, L..C2(r2) # %const.0
-; P8-AIX-32-NEXT: lwz r4, L..C3(r2) # %const.1
+; P8-AIX-32-NEXT: lwz r3, L..C1(r2) # %const.0
+; P8-AIX-32-NEXT: lwz r4, L..C2(r2) # %const.1
; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
; P8-AIX-32-NEXT: lxvw4x v4, 0, r4
; P8-AIX-32-NEXT: vperm v2, v2, v4, v3
@@ -1025,18 +1023,16 @@ define dso_local <2 x i64> @testSplat8(<8 x i8>* nocapture readonly %ptr) local_
;
; P8-AIX-32-LABEL: testSplat8:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r5, 4(r3)
-; P8-AIX-32-NEXT: lwz r4, L..C4(r2) # %const.0
-; P8-AIX-32-NEXT: stw r5, -32(r1)
+; P8-AIX-32-NEXT: lwz r4, 4(r3)
+; P8-AIX-32-NEXT: stw r4, -16(r1)
+; P8-AIX-32-NEXT: addi r4, r1, -32
; P8-AIX-32-NEXT: lwz r3, 0(r3)
-; P8-AIX-32-NEXT: lxvw4x v2, 0, r4
-; P8-AIX-32-NEXT: addi r4, r1, -16
-; P8-AIX-32-NEXT: stw r3, -16(r1)
-; P8-AIX-32-NEXT: addi r3, r1, -32
-; P8-AIX-32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX-32-NEXT: lxvw4x v4, 0, r4
-; P8-AIX-32-NEXT: vperm v2, v4, v3, v2
-; P8-AIX-32-NEXT: xxmrghd v2, v2, v2
+; P8-AIX-32-NEXT: stw r3, -32(r1)
+; P8-AIX-32-NEXT: addi r3, r1, -16
+; P8-AIX-32-NEXT: lxvw4x vs0, 0, r3
+; P8-AIX-32-NEXT: lxvw4x vs1, 0, r4
+; P8-AIX-32-NEXT: xxmrghw vs0, vs1, vs0
+; P8-AIX-32-NEXT: xxmrghd v2, vs0, vs0
; P8-AIX-32-NEXT: blr
entry:
%0 = load <8 x i8>, <8 x i8>* %ptr, align 8
@@ -1082,7 +1078,7 @@ define <2 x i64> @testSplati64_0(<1 x i64>* nocapture readonly %ptr) #0 {
;
; P8-AIX-32-LABEL: testSplati64_0:
; P8-AIX-32: # %bb.0: # %entry
-; P8-AIX-32-NEXT: lwz r4, L..C5(r2) # %const.0
+; P8-AIX-32-NEXT: lwz r4, L..C3(r2) # %const.0
; P8-AIX-32-NEXT: lwz r5, 4(r3)
; P8-AIX-32-NEXT: lwz r3, 0(r3)
; P8-AIX-32-NEXT: stw r5, -16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index 699f5a8c60b7b..5eb1810ac55d2 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -811,45 +811,42 @@ define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
;
; P9-AIX32-LABEL: unadjusted_lxvdsx:
; P9-AIX32: # %bb.0: # %entry
-; P9-AIX32-NEXT: lwz r4, 0(r3)
+; P9-AIX32-NEXT: lwz r4, 4(r3)
; P9-AIX32-NEXT: stw r4, -16(r1)
-; P9-AIX32-NEXT: lwz r3, 4(r3)
-; P9-AIX32-NEXT: lxv vs1, -16(r1)
-; P9-AIX32-NEXT: mtfprwz f0, r3
-; P9-AIX32-NEXT: xxinsertw vs1, vs0, 4
-; P9-AIX32-NEXT: xxmrghd v2, vs1, vs1
+; P9-AIX32-NEXT: lwz r3, 0(r3)
+; P9-AIX32-NEXT: lxv vs0, -16(r1)
+; P9-AIX32-NEXT: stw r3, -32(r1)
+; P9-AIX32-NEXT: lxv vs1, -32(r1)
+; P9-AIX32-NEXT: xxmrghw vs0, vs1, vs0
+; P9-AIX32-NEXT: xxmrghd v2, vs0, vs0
; P9-AIX32-NEXT: blr
;
; P8-AIX32-LABEL: unadjusted_lxvdsx:
; P8-AIX32: # %bb.0: # %entry
-; P8-AIX32-NEXT: lwz r5, 4(r3)
-; P8-AIX32-NEXT: lwz r4, L..C3(r2) # %const.0
-; P8-AIX32-NEXT: stw r5, -32(r1)
+; P8-AIX32-NEXT: lwz r4, 4(r3)
+; P8-AIX32-NEXT: stw r4, -16(r1)
+; P8-AIX32-NEXT: addi r4, r1, -32
; P8-AIX32-NEXT: lwz r3, 0(r3)
-; P8-AIX32-NEXT: lxvw4x v2, 0, r4
-; P8-AIX32-NEXT: addi r4, r1, -16
-; P8-AIX32-NEXT: stw r3, -16(r1)
-; P8-AIX32-NEXT: addi r3, r1, -32
-; P8-AIX32-NEXT: lxvw4x v3, 0, r3
-; P8-AIX32-NEXT: lxvw4x v4, 0, r4
-; P8-AIX32-NEXT: vperm v2, v4, v3, v2
-; P8-AIX32-NEXT: xxmrghd v2, v2, v2
+; P8-AIX32-NEXT: stw r3, -32(r1)
+; P8-AIX32-NEXT: addi r3, r1, -16
+; P8-AIX32-NEXT: lxvw4x vs0, 0, r3
+; P8-AIX32-NEXT: lxvw4x vs1, 0, r4
+; P8-AIX32-NEXT: xxmrghw vs0, vs1, vs0
+; P8-AIX32-NEXT: xxmrghd v2, vs0, vs0
; P8-AIX32-NEXT: blr
;
; P7-AIX32-LABEL: unadjusted_lxvdsx:
; P7-AIX32: # %bb.0: # %entry
; P7-AIX32-NEXT: lwz r5, 4(r3)
-; P7-AIX32-NEXT: lwz r4, L..C3(r2) # %const.0
-; P7-AIX32-NEXT: stw r5, -32(r1)
-; P7-AIX32-NEXT: lwz r3, 0(r3)
-; P7-AIX32-NEXT: lxvw4x v2, 0, r4
; P7-AIX32-NEXT: addi r4, r1, -16
-; P7-AIX32-NEXT: stw r3, -16(r1)
+; P7-AIX32-NEXT: stw r5, -16(r1)
+; P7-AIX32-NEXT: lwz r3, 0(r3)
+; P7-AIX32-NEXT: stw r3, -32(r1)
; P7-AIX32-NEXT: addi r3, r1, -32
-; P7-AIX32-NEXT: lxvw4x v3, 0, r3
-; P7-AIX32-NEXT: lxvw4x v4, 0, r4
-; P7-AIX32-NEXT: vperm v2, v4, v3, v2
-; P7-AIX32-NEXT: xxmrghd v2, v2, v2
+; P7-AIX32-NEXT: lxvw4x vs0, 0, r4
+; P7-AIX32-NEXT: lxvw4x vs1, 0, r3
+; P7-AIX32-NEXT: xxmrghw vs0, vs1, vs0
+; P7-AIX32-NEXT: xxmrghd v2, vs0, vs0
; P7-AIX32-NEXT: blr
entry:
%0 = bitcast i64* %s to <8 x i8>*
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index d0db193d08426..ad7891c691ea3 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -86,22 +86,22 @@ define void @test64(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32-LABEL: test64:
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: lwzux 4, 3, 4
-; P9BE-AIX32-NEXT: lwz 5, L..C0(2) # %const.0
; P9BE-AIX32-NEXT: xxlxor 4, 4, 4
-; P9BE-AIX32-NEXT: lxv 3, 0(5)
-; P9BE-AIX32-NEXT: stw 4, -32(1)
+; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: lwz 4, 4(3)
-; P9BE-AIX32-NEXT: lxv 2, -32(1)
-; P9BE-AIX32-NEXT: stw 4, -16(1)
-; P9BE-AIX32-NEXT: mtfprwz 0, 4
+; P9BE-AIX32-NEXT: lxv 0, -48(1)
+; P9BE-AIX32-NEXT: stw 4, -32(1)
+; P9BE-AIX32-NEXT: lwz 4, L..C0(2) # %const.0
; P9BE-AIX32-NEXT: lwz 3, 8(3)
-; P9BE-AIX32-NEXT: xxinsertw 2, 0, 4
-; P9BE-AIX32-NEXT: mtfprwz 0, 3
+; P9BE-AIX32-NEXT: lxv 1, -32(1)
+; P9BE-AIX32-NEXT: lxv 3, 0(4)
+; P9BE-AIX32-NEXT: stw 3, -16(1)
; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
+; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
+; P9BE-AIX32-NEXT: lxv 0, -16(1)
; P9BE-AIX32-NEXT: vperm 2, 4, 2, 3
-; P9BE-AIX32-NEXT: lxv 3, -16(1)
; P9BE-AIX32-NEXT: lxv 4, 0(3)
-; P9BE-AIX32-NEXT: xxinsertw 3, 0, 4
+; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0
; P9BE-AIX32-NEXT: vperm 3, 3, 3, 4
; P9BE-AIX32-NEXT: vspltisw 4, 8
; P9BE-AIX32-NEXT: vnegw 3, 3
diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
index 5034778592a5c..2c5bc80b79659 100644
--- a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
+++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll
@@ -68,15 +68,13 @@ define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x fl
;
; AIX-32-LABEL: test2:
; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: lwz r5, L..C0(r2) # %const.0
-; AIX-32-NEXT: li r6, 4
+; AIX-32-NEXT: li r5, 4
; AIX-32-NEXT: lxsiwzx v3, 0, r3
-; AIX-32-NEXT: lxsiwzx v0, 0, r4
-; AIX-32-NEXT: lxsiwzx v2, r3, r6
-; AIX-32-NEXT: lxsiwzx v5, r4, r6
-; AIX-32-NEXT: lxvw4x v4, 0, r5
-; AIX-32-NEXT: vperm v2, v3, v2, v4
-; AIX-32-NEXT: vperm v3, v0, v5, v4
+; AIX-32-NEXT: lxsiwzx v5, 0, r4
+; AIX-32-NEXT: lxsiwzx v2, r3, r5
+; AIX-32-NEXT: lxsiwzx v4, r4, r5
+; AIX-32-NEXT: vmrgow v2, v3, v2
+; AIX-32-NEXT: vmrgow v3, v5, v4
; AIX-32-NEXT: xvsubsp vs0, v2, v3
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
; AIX-32-NEXT: xscvspdpn f0, vs0
@@ -114,15 +112,13 @@ define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x fl
;
; AIX-32-LABEL: test3:
; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: lwz r5, L..C1(r2) # %const.0
-; AIX-32-NEXT: li r6, 4
+; AIX-32-NEXT: li r5, 4
; AIX-32-NEXT: lxsiwzx v3, 0, r3
-; AIX-32-NEXT: lxsiwzx v0, 0, r4
-; AIX-32-NEXT: lxsiwzx v2, r3, r6
-; AIX-32-NEXT: lxsiwzx v5, r4, r6
-; AIX-32-NEXT: lxvw4x v4, 0, r5
-; AIX-32-NEXT: vperm v2, v3, v2, v4
-; AIX-32-NEXT: vperm v3, v0, v5, v4
+; AIX-32-NEXT: lxsiwzx v5, 0, r4
+; AIX-32-NEXT: lxsiwzx v2, r3, r5
+; AIX-32-NEXT: lxsiwzx v4, r4, r5
+; AIX-32-NEXT: vmrgow v2, v3, v2
+; AIX-32-NEXT: vmrgow v3, v5, v4
; AIX-32-NEXT: xvaddsp vs0, v2, v3
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
; AIX-32-NEXT: xscvspdpn f0, vs0
@@ -160,15 +156,13 @@ define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x fl
;
; AIX-32-LABEL: test4:
; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: lwz r5, L..C2(r2) # %const.0
-; AIX-32-NEXT: li r6, 4
+; AIX-32-NEXT: li r5, 4
; AIX-32-NEXT: lxsiwzx v3, 0, r3
-; AIX-32-NEXT: lxsiwzx v0, 0, r4
-; AIX-32-NEXT: lxsiwzx v2, r3, r6
-; AIX-32-NEXT: lxsiwzx v5, r4, r6
-; AIX-32-NEXT: lxvw4x v4, 0, r5
-; AIX-32-NEXT: vperm v2, v3, v2, v4
-; AIX-32-NEXT: vperm v3, v0, v5, v4
+; AIX-32-NEXT: lxsiwzx v5, 0, r4
+; AIX-32-NEXT: lxsiwzx v2, r3, r5
+; AIX-32-NEXT: lxsiwzx v4, r4, r5
+; AIX-32-NEXT: vmrgow v2, v3, v2
+; AIX-32-NEXT: vmrgow v3, v5, v4
; AIX-32-NEXT: xvmulsp vs0, v2, v3
; AIX-32-NEXT: xxsldwi vs1, vs0, vs0, 1
; AIX-32-NEXT: xscvspdpn f0, vs0
@@ -215,7 +209,7 @@ define dso_local <2 x double> @test5(<2 x double> %a) {
;
; AIX-32-LABEL: test5:
; AIX-32: # %bb.0: # %entry
-; AIX-32-NEXT: lwz r3, L..C3(r2) # @G
+; AIX-32-NEXT: lwz r3, L..C0(r2) # @G
; AIX-32-NEXT: lfs f0, 4(r3)
; AIX-32-NEXT: lfs f1, 0(r3)
; AIX-32-NEXT: xxmrghd vs0, vs1, vs0
@@ -284,7 +278,7 @@ define dso_local i32 @test6() #0 {
;
; AIX-32-LABEL: test6:
; AIX-32: # %bb.0: # %bb
-; AIX-32-NEXT: lwz r3, L..C4(r2) # @Glob1
+; AIX-32-NEXT: lwz r3, L..C1(r2) # @Glob1
; AIX-32-NEXT: lis r4, 8
; AIX-32-NEXT: ori r4, r4, 38248
; AIX-32-NEXT: lfsux f0, r3, r4
diff --git a/llvm/test/CodeGen/Thumb2/mve-vld3.ll b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
index 6d14b7020a1af..888053d994f4f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vld3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
@@ -1025,9 +1025,8 @@ define void @vld3_v2f16(<6 x half> *%src, <2 x half> *%dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r2, r3, [r0]
; CHECK-NEXT: ldr r0, [r0, #8]
-; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r3
-; CHECK-NEXT: vmov.32 q0[2], r0
+; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
; CHECK-NEXT: vmovx.f16 s8, s0
; CHECK-NEXT: vmovx.f16 s4, s2
; CHECK-NEXT: vins.f16 s8, s2
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index 40efd047e98a0..4b28c2b07cacc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -1235,39 +1235,36 @@ entry:
define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
; CHECK-LABEL: vst3_v4f16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: ldrd r2, r12, [r0]
-; CHECK-NEXT: ldrd r3, lr, [r0, #8]
-; CHECK-NEXT: vmov.32 q0[0], r2
-; CHECK-NEXT: ldrd r2, r0, [r0, #16]
-; CHECK-NEXT: vmov.32 q1[0], r3
-; CHECK-NEXT: vmov.32 q0[1], r12
-; CHECK-NEXT: vmov.32 q1[1], lr
-; CHECK-NEXT: vmov.f32 s8, s1
-; CHECK-NEXT: vmov.f32 s3, s5
-; CHECK-NEXT: vins.f16 s8, s5
-; CHECK-NEXT: vmov.f32 s2, s4
-; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: ldrd lr, r12, [r0]
+; CHECK-NEXT: ldrd r3, r2, [r0, #8]
+; CHECK-NEXT: ldrd r4, r0, [r0, #16]
+; CHECK-NEXT: vmov q0[2], q0[0], lr, r3
+; CHECK-NEXT: vmov.32 q1[0], r4
+; CHECK-NEXT: vmov q0[3], q0[1], r12, r2
; CHECK-NEXT: vmov.32 q1[1], r0
-; CHECK-NEXT: vmovx.f16 s13, s3
+; CHECK-NEXT: vmovx.f16 s9, s3
; CHECK-NEXT: vmovx.f16 s6, s0
; CHECK-NEXT: vins.f16 s0, s2
-; CHECK-NEXT: vmovx.f16 s10, s4
+; CHECK-NEXT: vmovx.f16 s8, s4
; CHECK-NEXT: vmovx.f16 s2, s2
; CHECK-NEXT: vins.f16 s4, s6
; CHECK-NEXT: vmovx.f16 s6, s1
-; CHECK-NEXT: vins.f16 s2, s10
-; CHECK-NEXT: vmovx.f16 s10, s5
+; CHECK-NEXT: vins.f16 s2, s8
+; CHECK-NEXT: vmovx.f16 s8, s5
; CHECK-NEXT: vins.f16 s5, s6
-; CHECK-NEXT: vins.f16 s13, s10
-; CHECK-NEXT: vmov.f32 s12, s5
-; CHECK-NEXT: vmov.f32 s1, s4
-; CHECK-NEXT: vmov.f32 s3, s8
-; CHECK-NEXT: vstrw.32 q0, [r1]
-; CHECK-NEXT: vmov r0, r2, d6
+; CHECK-NEXT: vins.f16 s9, s8
+; CHECK-NEXT: vmov.f32 s8, s5
+; CHECK-NEXT: vins.f16 s1, s3
+; CHECK-NEXT: vmov r0, r2, d4
+; CHECK-NEXT: vmov q2, q0
+; CHECK-NEXT: vmov.f32 s9, s4
+; CHECK-NEXT: vmov.f32 s10, s2
+; CHECK-NEXT: vmov.f32 s11, s1
+; CHECK-NEXT: vstrw.32 q2, [r1]
; CHECK-NEXT: strd r0, r2, [r1, #16]
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: pop {r4, pc}
entry:
%s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
%l1 = load <4 x half>, <4 x half>* %s1, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
index 8c374e77bfcd8..da969e01258d8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
@@ -1087,45 +1087,41 @@ entry:
define void @vst4_v4f16(<4 x half> *%src, <16 x half> *%dst) {
; CHECK-LABEL: vst4_v4f16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: add.w lr, r0, #16
-; CHECK-NEXT: ldr r2, [r0, #28]
-; CHECK-NEXT: ldm.w lr, {r3, r12, lr}
-; CHECK-NEXT: vmov.32 q1[0], lr
-; CHECK-NEXT: vmov.32 q1[1], r2
-; CHECK-NEXT: vmov.32 q0[0], r3
-; CHECK-NEXT: vmov.32 q0[1], r12
-; CHECK-NEXT: ldrd r2, r12, [r0]
-; CHECK-NEXT: ldrd r3, r0, [r0, #8]
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: add.w r6, r0, #16
+; CHECK-NEXT: ldrd lr, r12, [r0]
+; CHECK-NEXT: ldrd r3, r2, [r0, #8]
+; CHECK-NEXT: ldm r6, {r4, r5, r6}
+; CHECK-NEXT: vmov q1[2], q1[0], lr, r3
+; CHECK-NEXT: ldr r0, [r0, #28]
+; CHECK-NEXT: vmov q1[3], q1[1], r12, r2
+; CHECK-NEXT: vmov q0[2], q0[0], r4, r6
+; CHECK-NEXT: vmovx.f16 s10, s5
+; CHECK-NEXT: vmov q0[3], q0[1], r5, r0
+; CHECK-NEXT: vins.f16 s5, s7
; CHECK-NEXT: vmovx.f16 s12, s0
-; CHECK-NEXT: vmovx.f16 s2, s4
-; CHECK-NEXT: vmov.f32 s3, s5
-; CHECK-NEXT: vmov.32 q2[0], r3
-; CHECK-NEXT: vins.f16 s0, s4
-; CHECK-NEXT: vmov.32 q1[0], r2
-; CHECK-NEXT: vmov.32 q2[1], r0
-; CHECK-NEXT: vmov.32 q1[1], r12
-; CHECK-NEXT: vins.f16 s12, s2
-; CHECK-NEXT: vmovx.f16 s6, s4
-; CHECK-NEXT: vmovx.f16 s2, s8
-; CHECK-NEXT: vins.f16 s6, s2
+; CHECK-NEXT: vins.f16 s0, s2
+; CHECK-NEXT: vmovx.f16 s2, s2
; CHECK-NEXT: vmovx.f16 s11, s1
+; CHECK-NEXT: vins.f16 s12, s2
; CHECK-NEXT: vmovx.f16 s2, s3
-; CHECK-NEXT: vmovx.f16 s10, s5
; CHECK-NEXT: vins.f16 s11, s2
-; CHECK-NEXT: vmovx.f16 s2, s9
+; CHECK-NEXT: vmovx.f16 s2, s4
+; CHECK-NEXT: vins.f16 s4, s6
+; CHECK-NEXT: vmovx.f16 s6, s6
; CHECK-NEXT: vins.f16 s1, s3
-; CHECK-NEXT: vins.f16 s5, s9
-; CHECK-NEXT: vins.f16 s4, s8
+; CHECK-NEXT: vins.f16 s2, s6
+; CHECK-NEXT: vmovx.f16 s6, s7
; CHECK-NEXT: vmov.f32 s8, s5
-; CHECK-NEXT: vins.f16 s10, s2
+; CHECK-NEXT: vins.f16 s10, s6
; CHECK-NEXT: vmov.f32 s9, s1
; CHECK-NEXT: vmov.f32 s5, s0
; CHECK-NEXT: vstrh.16 q2, [r1, #16]
+; CHECK-NEXT: vmov.f32 s6, s2
; CHECK-NEXT: vmov.f32 s7, s12
; CHECK-NEXT: vstrh.16 q1, [r1]
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
%s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
%l1 = load <4 x half>, <4 x half>* %s1, align 4
More information about the llvm-commits
mailing list