[llvm] [ARM][Codegen] Fix vector data miscompilation in arm32be (PR #105519)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 2 07:48:38 PDT 2024
https://github.com/Zhenhang1213 updated https://github.com/llvm/llvm-project/pull/105519
>From ba395a7bfb56855ceae727eae68e353d9d35aa81 Mon Sep 17 00:00:00 2001
From: Austin <zhenhangwang at huawei.com>
Date: Mon, 26 Aug 2024 10:11:04 +0800
Subject: [PATCH 1/3] [ARM][Codegen] Fix vector data miscompilation in arm32be
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +-
.../ARM/big-endian-neon-fp16-bitconv.ll | 3 +-
llvm/test/CodeGen/ARM/big-endian-vmov.ll | 1 -
llvm/test/CodeGen/Thumb2/mve-be.ll | 2 -
llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll | 2 -
.../test/CodeGen/Thumb2/mve-pred-loadstore.ll | 8 +---
llvm/test/CodeGen/Thumb2/mve-pred-spill.ll | 18 ++++-----
llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 37 +++++++------------
8 files changed, 26 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4ab0433069ae66..568aa210e116ed 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7966,7 +7966,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}
// Try an immediate VMVN.
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
index 4026495a0f2b41..a4f5d1c61eae73 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
@@ -101,9 +101,8 @@ define void @conv_v4i16_to_v4f16( <4 x i16> %a, ptr %store ) {
; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vrev64.16 d18, d0
-; CHECK-NEXT: vrev64.16 d17, d17
-; CHECK-NEXT: vrev64.16 d16, d16
; CHECK-NEXT: vadd.i16 d16, d18, d16
+; CHECK-NEXT: vrev64.16 d17, d17
; CHECK-NEXT: vadd.f16 d16, d16, d17
; CHECK-NEXT: vrev64.16 d16, d16
; CHECK-NEXT: vstr d16, [r0]
diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
index 1cb7a030d58c26..3f372905a6e43b 100644
--- a/llvm/test/CodeGen/ARM/big-endian-vmov.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
@@ -177,7 +177,6 @@ define arm_aapcs_vfpcc <4 x i32> @and_v4i32_b(<4 x i32> %a) {
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmov.i64 q8, #0xffff00ff0000ff
; CHECK-BE-NEXT: vrev64.32 q9, q0
-; CHECK-BE-NEXT: vrev64.32 q8, q8
; CHECK-BE-NEXT: vand q8, q9, q8
; CHECK-BE-NEXT: vrev64.32 q0, q8
; CHECK-BE-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-be.ll b/llvm/test/CodeGen/Thumb2/mve-be.ll
index 2f2ecc76472374..e1db733b13b415 100644
--- a/llvm/test/CodeGen/Thumb2/mve-be.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-be.ll
@@ -232,7 +232,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_le(<4 x i32> %src) {
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x1
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vand q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -254,7 +253,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_be(<4 x i32> %src) {
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x1000000
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vand q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
index 470007878ec842..0d0e45956080de 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
@@ -115,7 +115,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_to_v8i1(i8 %b, <8 x i16> %a) {
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: add sp, #4
@@ -145,7 +144,6 @@ define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: rbit r0, r0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: lsrs r0, r0, #16
; CHECK-BE-NEXT: vmsr p0, r0
; CHECK-BE-NEXT: vpsel q1, q1, q0
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
index a92adf6f1a067b..ba3d5c22fc671b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
@@ -105,7 +105,6 @@ define arm_aapcs_vfpcc <8 x i16> @load_v8i1(ptr %src, <8 x i16> %a) {
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -130,7 +129,6 @@ define arm_aapcs_vfpcc <16 x i8> @load_v16i1(ptr %src, <16 x i8> %a) {
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
; CHECK-BE-NEXT: rbit r0, r0
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: lsrs r0, r0, #16
; CHECK-BE-NEXT: vmsr p0, r0
; CHECK-BE-NEXT: vpsel q1, q1, q0
@@ -416,10 +414,9 @@ define arm_aapcs_vfpcc <8 x i16> @load_predcast8(ptr %i, <8 x i16> %a) {
;
; CHECK-BE-LABEL: load_predcast8:
; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vldr p0, [r0]
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vldr p0, [r0]
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -439,10 +436,9 @@ define arm_aapcs_vfpcc <16 x i8> @load_predcast16(ptr %i, <16 x i8> %a) {
;
; CHECK-BE-LABEL: load_predcast16:
; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vldr p0, [r0]
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vldr p0, [r0]
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
index 3bc129d0fd92e5..c17066126083a9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
@@ -156,11 +156,10 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) {
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q4, q1
-; CHECK-BE-NEXT: vmov.i32 q1, #0x0
-; CHECK-BE-NEXT: vrev64.16 q2, q0
-; CHECK-BE-NEXT: vrev32.16 q1, q1
-; CHECK-BE-NEXT: vcmp.i16 eq, q2, zr
-; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vrev64.16 q1, q0
+; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
+; CHECK-BE-NEXT: vmov.i32 q0, #0x0
+; CHECK-BE-NEXT: vpsel q1, q4, q0
; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: bl ext_i16
@@ -209,11 +208,10 @@ define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) {
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.8 q4, q1
-; CHECK-BE-NEXT: vmov.i32 q1, #0x0
-; CHECK-BE-NEXT: vrev64.8 q2, q0
-; CHECK-BE-NEXT: vrev32.8 q1, q1
-; CHECK-BE-NEXT: vcmp.i8 eq, q2, zr
-; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vrev64.8 q1, q0
+; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr
+; CHECK-BE-NEXT: vmov.i32 q0, #0x0
+; CHECK-BE-NEXT: vpsel q1, q4, q0
; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bl ext_i8
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 729e4c5e89c75e..868b23b6805649 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -127,7 +127,6 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int8_32(<16 x i8> %a) {
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i32 q1, #0x1
; CHECKBE-NEXT: vrev64.8 q2, q0
-; CHECKBE-NEXT: vrev32.8 q1, q1
; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
@@ -160,9 +159,8 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int8_64(<16 x i8> %a) {
; CHECKBE-LABEL: xor_int8_64:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xff0000ffff00ffff
-; CHECKBE-NEXT: vrev64.8 q2, q1
-; CHECKBE-NEXT: vrev64.8 q1, q0
-; CHECKBE-NEXT: veor q1, q1, q2
+; CHECKBE-NEXT: vrev64.8 q2, q0
+; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -372,9 +370,8 @@ define arm_aapcs_vfpcc <8 x i16> @xor_int16_64(<8 x i16> %a) {
; CHECKBE-LABEL: xor_int16_64:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xff0000000000ff
-; CHECKBE-NEXT: vrev64.16 q2, q1
-; CHECKBE-NEXT: vrev64.16 q1, q0
-; CHECKBE-NEXT: veor q1, q1, q2
+; CHECKBE-NEXT: vrev64.16 q2, q0
+; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -756,9 +753,8 @@ define arm_aapcs_vfpcc <4 x i32> @xor_int32_64(<4 x i32> %a) {
; CHECKBE-LABEL: xor_int32_64:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff0000ff00ff
-; CHECKBE-NEXT: vrev64.32 q2, q1
-; CHECKBE-NEXT: vrev64.32 q1, q0
-; CHECKBE-NEXT: veor q1, q1, q2
+; CHECKBE-NEXT: vrev64.32 q2, q0
+; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.32 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -985,9 +981,8 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int64_0f000f0f(<16 x i8> %a) {
; CHECKBE-LABEL: xor_int64_0f000f0f:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
-; CHECKBE-NEXT: vrev64.8 q2, q1
-; CHECKBE-NEXT: vrev64.8 q1, q0
-; CHECKBE-NEXT: veor q1, q1, q2
+; CHECKBE-NEXT: vrev64.8 q2, q0
+; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -1019,9 +1014,8 @@ define arm_aapcs_vfpcc <8 x i16> @xor_int64_ff00ffff(<8 x i16> %a) {
; CHECKBE-LABEL: xor_int64_ff00ffff:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
-; CHECKBE-NEXT: vrev64.16 q2, q1
-; CHECKBE-NEXT: vrev64.16 q1, q0
-; CHECKBE-NEXT: veor q1, q1, q2
+; CHECKBE-NEXT: vrev64.16 q2, q0
+; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -1055,7 +1049,6 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int64_0f0f0f0f0f0f0f0f(<16 x i8> %a) {
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i16 q1, #0xff
; CHECKBE-NEXT: vrev64.8 q2, q0
-; CHECKBE-NEXT: vrev16.8 q1, q1
; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
@@ -1196,9 +1189,8 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) {
; CHECKBE-LABEL: test:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
-; CHECKBE-NEXT: vrev64.8 q2, q1
-; CHECKBE-NEXT: vrev64.8 q1, q0
-; CHECKBE-NEXT: vorr q1, q1, q2
+; CHECKBE-NEXT: vrev64.8 q2, q0
+; CHECKBE-NEXT: vorr q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -1216,9 +1208,8 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) {
; CHECKBE-LABEL: test2:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
-; CHECKBE-NEXT: vrev64.16 q2, q1
-; CHECKBE-NEXT: vrev64.16 q1, q0
-; CHECKBE-NEXT: vorr q1, q1, q2
+; CHECKBE-NEXT: vrev64.16 q2, q0
+; CHECKBE-NEXT: vorr q1, q2, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
entry:
>From 1a0d93806b31a872da1154beea6e01fb024a573c Mon Sep 17 00:00:00 2001
From: Austin <zhenhangwang at huawei.com>
Date: Mon, 26 Aug 2024 10:17:48 +0800
Subject: [PATCH 2/3] [Clang][Codegen] fix vector data by modifying VMVN
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +-
llvm/test/CodeGen/ARM/big-endian-vmov.ll | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 568aa210e116ed..d627544dcca85b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7976,7 +7976,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
index 3f372905a6e43b..8a4532a2ae2d1b 100644
--- a/llvm/test/CodeGen/ARM/big-endian-vmov.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
@@ -226,7 +226,6 @@ define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_m1(<8 x i16> %a) {
; CHECK-BE: @ %bb.0:
; CHECK-BE-NEXT: vmvn.i32 q8, #0x10000
; CHECK-BE-NEXT: vrev64.16 q9, q0
-; CHECK-BE-NEXT: vrev32.16 q8, q8
; CHECK-BE-NEXT: veor q8, q9, q8
; CHECK-BE-NEXT: vrev64.16 q0, q8
; CHECK-BE-NEXT: bx lr
>From 6958ffc0dee18982d4ac4bf87ef22c976126d153 Mon Sep 17 00:00:00 2001
From: Austin <zhenhangwang at huawei.com>
Date: Tue, 27 Aug 2024 09:48:01 +0800
Subject: [PATCH 3/3] fix rearranged i64 vectors in PerformBITCASTCombine and
remove the FIXME in tests
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 15 +-
llvm/test/CodeGen/ARM/big-endian-vmov.ll | 61 ++--
llvm/test/CodeGen/ARM/vmov.ll | 328 +++++++++++---------
llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll | 22 +-
llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 47 ++-
5 files changed, 258 insertions(+), 215 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d627544dcca85b..08c83ba6d4b264 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7121,19 +7121,6 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
ImmMask <<= 1;
}
- if (DAG.getDataLayout().isBigEndian()) {
- // Reverse the order of elements within the vector.
- unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
- unsigned Mask = (1 << BytesPerElem) - 1;
- unsigned NumElems = 8 / BytesPerElem;
- unsigned NewImm = 0;
- for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
- unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
- NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
- }
- Imm = NewImm;
- }
-
// Op=1, Cmode=1110.
OpCmode = 0x1e;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
@@ -18604,7 +18591,7 @@ static SDValue PerformBITCASTCombine(SDNode *N,
if ((Src.getOpcode() == ARMISD::VMOVIMM ||
Src.getOpcode() == ARMISD::VMVNIMM ||
Src.getOpcode() == ARMISD::VMOVFPIMM) &&
- SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
+ SrcVT.getScalarSizeInBits() < DstVT.getScalarSizeInBits() &&
DAG.getDataLayout().isBigEndian())
return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
index 8a4532a2ae2d1b..7e0947ccfd58e8 100644
--- a/llvm/test/CodeGen/ARM/big-endian-vmov.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
@@ -10,7 +10,8 @@ define arm_aapcs_vfpcc <8 x i8> @vmov_i8() {
;
; CHECK-BE-LABEL: vmov_i8:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xff
+; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
+; CHECK-BE-NEXT: vrev64.8 d0, d16
; CHECK-BE-NEXT: bx lr
ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1>
}
@@ -23,7 +24,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() {
;
; CHECK-BE-LABEL: vmov_i16_a:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xffff
+; CHECK-BE-NEXT: vmov.i64 d16, #0xffff000000000000
+; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: bx lr
ret <4 x i16> <i16 0, i16 0, i16 0, i16 -1>
}
@@ -36,7 +38,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() {
;
; CHECK-BE-LABEL: vmov_i16_b:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xff
+; CHECK-BE-NEXT: vmov.i64 d16, #0xff000000000000
+; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: bx lr
ret <4 x i16> <i16 0, i16 0, i16 0, i16 255>
}
@@ -49,7 +52,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() {
;
; CHECK-BE-LABEL: vmov_i16_c:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xff00
+; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
+; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: bx lr
ret <4 x i16> <i16 0, i16 0, i16 0, i16 65280>
}
@@ -62,7 +66,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() {
;
; CHECK-BE-LABEL: vmov_i32_a:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xffffffff
+; CHECK-BE-NEXT: vmov.i64 d16, #0xffffffff00000000
+; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 -1>
}
@@ -75,7 +80,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() {
;
; CHECK-BE-LABEL: vmov_i32_b:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xff
+; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000
+; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 255>
}
@@ -88,7 +94,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() {
;
; CHECK-BE-LABEL: vmov_i32_c:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xff00
+; CHECK-BE-NEXT: vmov.i64 d16, #0xff0000000000
+; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 65280>
}
@@ -101,7 +108,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() {
;
; CHECK-BE-LABEL: vmov_i32_d:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xff0000
+; CHECK-BE-NEXT: vmov.i64 d16, #0xff000000000000
+; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 16711680>
}
@@ -114,7 +122,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() {
;
; CHECK-BE-LABEL: vmov_i32_e:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 d0, #0xff000000
+; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000
+; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: bx lr
ret <2 x i32> <i32 0, i32 4278190080>
}
@@ -128,10 +137,16 @@ define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() {
}
define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() {
-; CHECK-LABEL: vmov_i64_b:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i64 d0, #0xffff00ff0000ff
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: vmov_i64_b:
+; CHECK-LE: @ %bb.0:
+; CHECK-LE-NEXT: vmov.i64 d0, #0xffff00ff0000ff
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: vmov_i64_b:
+; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: d16, #0xff0000ff00ffff00
+; CHECK-BE-NEXT: vrev64.32 d0, d16
+; CHECK-BE-NEXT: bx lr
ret <1 x i64> <i64 72056498804490495>
}
@@ -157,11 +172,18 @@ define arm_aapcs_vfpcc <4 x i32> @vmov_v4i32_b() {
}
define arm_aapcs_vfpcc <2 x i64> @and_v2i64_b(<2 x i64> %a) {
-; CHECK-LABEL: and_v2i64_b:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i64 q8, #0xffff00ff0000ff
-; CHECK-NEXT: vand q0, q0, q8
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: and_v2i64_b:
+; CHECK-LE: @ %bb.0:
+; CHECK-LE-NEXT: vmov.i64 q8, #0xffff00ff0000ff
+; CHECK-LE-NEXT: vand q0, q0, q8
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: and_v2i64_b:
+; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: q8, #0xff0000ff00ffff00
+; CHECK-BE-NEXT: vrev64.32 q8, q8
+; CHECK-BE-NEXT: vand q0, q0, q8
+; CHECK-BE-NEXT: bx lr
%b = and <2 x i64> %a, <i64 72056498804490495, i64 72056498804490495>
ret <2 x i64> %b
}
@@ -175,7 +197,7 @@ define arm_aapcs_vfpcc <4 x i32> @and_v4i32_b(<4 x i32> %a) {
;
; CHECK-BE-LABEL: and_v4i32_b:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 q8, #0xffff00ff0000ff
+; CHECK-BE-NEXT: vmov.i64 q8, #0xff0000ff00ffff00
; CHECK-BE-NEXT: vrev64.32 q9, q0
; CHECK-BE-NEXT: vand q8, q9, q8
; CHECK-BE-NEXT: vrev64.32 q0, q8
@@ -197,7 +219,6 @@ define arm_aapcs_vfpcc <8 x i16> @vmvn_v16i8_m1() {
ret <8 x i16> <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
}
-; FIXME: This is incorrect for BE
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
; CHECK-LE-LABEL: and_v8i16_m1:
; CHECK-LE: @ %bb.0:
diff --git a/llvm/test/CodeGen/ARM/vmov.ll b/llvm/test/CodeGen/ARM/vmov.ll
index 8835497669b324..616800333b02f7 100644
--- a/llvm/test/CodeGen/ARM/vmov.ll
+++ b/llvm/test/CodeGen/ARM/vmov.ll
@@ -7,7 +7,7 @@ define arm_aapcs_vfpcc <8 x i8> @v_movi8() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i8 d0, #0x8
; CHECK-NEXT: mov pc, lr
- ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
}
define arm_aapcs_vfpcc <4 x i16> @v_movi16a() nounwind {
@@ -15,7 +15,7 @@ define arm_aapcs_vfpcc <4 x i16> @v_movi16a() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i16 d0, #0x10
; CHECK-NEXT: mov pc, lr
- ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
+ ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
}
define arm_aapcs_vfpcc <4 x i16> @v_movi16b() nounwind {
@@ -23,7 +23,7 @@ define arm_aapcs_vfpcc <4 x i16> @v_movi16b() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i16 d0, #0x1000
; CHECK-NEXT: mov pc, lr
- ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
+ ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
}
define arm_aapcs_vfpcc <4 x i16> @v_mvni16a() nounwind {
@@ -31,7 +31,7 @@ define arm_aapcs_vfpcc <4 x i16> @v_mvni16a() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmvn.i16 d0, #0x10
; CHECK-NEXT: mov pc, lr
- ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
+ ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
}
define arm_aapcs_vfpcc <4 x i16> @v_mvni16b() nounwind {
@@ -39,7 +39,7 @@ define arm_aapcs_vfpcc <4 x i16> @v_mvni16b() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmvn.i16 d0, #0x1000
; CHECK-NEXT: mov pc, lr
- ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
+ ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
}
define arm_aapcs_vfpcc <2 x i32> @v_movi32a() nounwind {
@@ -47,7 +47,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_movi32a() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 d0, #0x20
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 32, i32 32 >
+ ret <2 x i32> < i32 32, i32 32 >
}
define arm_aapcs_vfpcc <2 x i32> @v_movi32b() nounwind {
@@ -55,7 +55,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_movi32b() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 d0, #0x2000
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 8192, i32 8192 >
+ ret <2 x i32> < i32 8192, i32 8192 >
}
define arm_aapcs_vfpcc <2 x i32> @v_movi32c() nounwind {
@@ -63,7 +63,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_movi32c() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 d0, #0x200000
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 2097152, i32 2097152 >
+ ret <2 x i32> < i32 2097152, i32 2097152 >
}
define arm_aapcs_vfpcc <2 x i32> @v_movi32d() nounwind {
@@ -71,7 +71,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_movi32d() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 d0, #0x20000000
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 536870912, i32 536870912 >
+ ret <2 x i32> < i32 536870912, i32 536870912 >
}
define arm_aapcs_vfpcc <2 x i32> @v_movi32e() nounwind {
@@ -79,7 +79,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_movi32e() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 d0, #0x20ff
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 8447, i32 8447 >
+ ret <2 x i32> < i32 8447, i32 8447 >
}
define arm_aapcs_vfpcc <2 x i32> @v_movi32f() nounwind {
@@ -87,7 +87,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_movi32f() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 d0, #0x20ffff
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 2162687, i32 2162687 >
+ ret <2 x i32> < i32 2162687, i32 2162687 >
}
define arm_aapcs_vfpcc <2 x i32> @v_mvni32a() nounwind {
@@ -95,7 +95,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32a() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmvn.i32 d0, #0x20
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 4294967263, i32 4294967263 >
+ ret <2 x i32> < i32 4294967263, i32 4294967263 >
}
define arm_aapcs_vfpcc <2 x i32> @v_mvni32b() nounwind {
@@ -103,7 +103,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32b() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmvn.i32 d0, #0x2000
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 4294959103, i32 4294959103 >
+ ret <2 x i32> < i32 4294959103, i32 4294959103 >
}
define arm_aapcs_vfpcc <2 x i32> @v_mvni32c() nounwind {
@@ -111,7 +111,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32c() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmvn.i32 d0, #0x200000
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 4292870143, i32 4292870143 >
+ ret <2 x i32> < i32 4292870143, i32 4292870143 >
}
define arm_aapcs_vfpcc <2 x i32> @v_mvni32d() nounwind {
@@ -119,7 +119,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32d() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmvn.i32 d0, #0x20000000
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 3758096383, i32 3758096383 >
+ ret <2 x i32> < i32 3758096383, i32 3758096383 >
}
define arm_aapcs_vfpcc <2 x i32> @v_mvni32e() nounwind {
@@ -127,7 +127,7 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32e() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmvn.i32 d0, #0x20ff
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 4294958848, i32 4294958848 >
+ ret <2 x i32> < i32 4294958848, i32 4294958848 >
}
define arm_aapcs_vfpcc <2 x i32> @v_mvni32f() nounwind {
@@ -135,15 +135,21 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32f() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmvn.i32 d0, #0x20ffff
; CHECK-NEXT: mov pc, lr
- ret <2 x i32> < i32 4292804608, i32 4292804608 >
+ ret <2 x i32> < i32 4292804608, i32 4292804608 >
}
define arm_aapcs_vfpcc <1 x i64> @v_movi64() nounwind {
-; CHECK-LABEL: v_movi64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i64 d0, #0xff0000ff0000ffff
-; CHECK-NEXT: mov pc, lr
- ret <1 x i64> < i64 18374687574888349695 >
+; CHECK-LE-LABEL: v_movi64:
+; CHECK-LE: @ %bb.0:
+; CHECK-LE-NEXT: vmov.i64 d0, #0xff0000ff0000ffff
+; CHECK-LE-NEXT: mov pc, lr
+;
+; CHECK-BE-LABEL: v_movi64:
+; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vmov.i64 d16, #0xffffff0000ff
+; CHECK-BE-NEXT: vrev64.32 d0, d16
+; CHECK-BE-NEXT: mov pc, lr
+ ret <1 x i64> < i64 18374687574888349695 >
}
define arm_aapcs_vfpcc <16 x i8> @v_movQi8() nounwind {
@@ -151,7 +157,7 @@ define arm_aapcs_vfpcc <16 x i8> @v_movQi8() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i8 q0, #0x8
; CHECK-NEXT: mov pc, lr
- ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+ ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
}
define arm_aapcs_vfpcc <8 x i16> @v_movQi16a() nounwind {
@@ -159,7 +165,7 @@ define arm_aapcs_vfpcc <8 x i16> @v_movQi16a() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i16 q0, #0x10
; CHECK-NEXT: mov pc, lr
- ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+ ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
}
define arm_aapcs_vfpcc <8 x i16> @v_movQi16b() nounwind {
@@ -167,7 +173,7 @@ define arm_aapcs_vfpcc <8 x i16> @v_movQi16b() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i16 q0, #0x1000
; CHECK-NEXT: mov pc, lr
- ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
+ ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
}
define arm_aapcs_vfpcc <4 x i32> @v_movQi32a() nounwind {
@@ -175,7 +181,7 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32a() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 q0, #0x20
; CHECK-NEXT: mov pc, lr
- ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
+ ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
}
define arm_aapcs_vfpcc <4 x i32> @v_movQi32b() nounwind {
@@ -183,7 +189,7 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32b() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 q0, #0x2000
; CHECK-NEXT: mov pc, lr
- ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
+ ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
}
define arm_aapcs_vfpcc <4 x i32> @v_movQi32c() nounwind {
@@ -191,7 +197,7 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32c() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 q0, #0x200000
; CHECK-NEXT: mov pc, lr
- ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
+ ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
}
define arm_aapcs_vfpcc <4 x i32> @v_movQi32d() nounwind {
@@ -199,7 +205,7 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32d() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 q0, #0x20000000
; CHECK-NEXT: mov pc, lr
- ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
+ ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
}
define arm_aapcs_vfpcc <4 x i32> @v_movQi32e() nounwind {
@@ -207,7 +213,7 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32e() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 q0, #0x20ff
; CHECK-NEXT: mov pc, lr
- ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
+ ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
}
define arm_aapcs_vfpcc <4 x i32> @v_movQi32f() nounwind {
@@ -215,7 +221,7 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32f() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i32 q0, #0x20ffff
; CHECK-NEXT: mov pc, lr
- ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
+ ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
}
define arm_aapcs_vfpcc <2 x i64> @v_movQi64() nounwind {
@@ -223,7 +229,7 @@ define arm_aapcs_vfpcc <2 x i64> @v_movQi64() nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
; CHECK-NEXT: mov pc, lr
- ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
+ ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
}
; Check for correct assembler printing for immediate values.
@@ -237,7 +243,7 @@ define arm_aapcs_vfpcc void @vdupn128(ptr noalias nocapture sret(%struct.int8x8_
entry:
%0 = getelementptr inbounds %struct.int8x8_t, ptr %agg.result, i32 0, i32 0 ; <ptr> [#uses=1]
store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, ptr %0, align 8
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @vdupnneg75(ptr noalias nocapture sret(%struct.int8x8_t) %agg.result) nounwind {
@@ -249,7 +255,7 @@ define arm_aapcs_vfpcc void @vdupnneg75(ptr noalias nocapture sret(%struct.int8x
entry:
%0 = getelementptr inbounds %struct.int8x8_t, ptr %agg.result, i32 0, i32 0 ; <ptr> [#uses=1]
store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, ptr %0, align 8
- ret void
+ ret void
}
define arm_aapcs_vfpcc <8 x i16> @vmovls8(ptr %A) nounwind {
@@ -265,9 +271,9 @@ define arm_aapcs_vfpcc <8 x i16> @vmovls8(ptr %A) nounwind {
; CHECK-BE-NEXT: vmovl.s8 q8, d16
; CHECK-BE-NEXT: vrev64.16 q0, q8
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <8 x i8>, ptr %A
- %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
- ret <8 x i16> %tmp2
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+ ret <8 x i16> %tmp2
}
define arm_aapcs_vfpcc <4 x i32> @vmovls16(ptr %A) nounwind {
@@ -283,9 +289,9 @@ define arm_aapcs_vfpcc <4 x i32> @vmovls16(ptr %A) nounwind {
; CHECK-BE-NEXT: vmovl.s16 q8, d16
; CHECK-BE-NEXT: vrev64.32 q0, q8
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
- ret <4 x i32> %tmp2
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
}
define arm_aapcs_vfpcc <2 x i64> @vmovls32(ptr %A) nounwind {
@@ -294,9 +300,9 @@ define arm_aapcs_vfpcc <2 x i64> @vmovls32(ptr %A) nounwind {
; CHECK-NEXT: vld1.32 {d16}, [r0:64]
; CHECK-NEXT: vmovl.s32 q0, d16
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
- ret <2 x i64> %tmp2
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
}
define arm_aapcs_vfpcc <8 x i16> @vmovlu8(ptr %A) nounwind {
@@ -312,9 +318,9 @@ define arm_aapcs_vfpcc <8 x i16> @vmovlu8(ptr %A) nounwind {
; CHECK-BE-NEXT: vmovl.u8 q8, d16
; CHECK-BE-NEXT: vrev64.16 q0, q8
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <8 x i8>, ptr %A
- %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
- ret <8 x i16> %tmp2
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+ ret <8 x i16> %tmp2
}
define arm_aapcs_vfpcc <4 x i32> @vmovlu16(ptr %A) nounwind {
@@ -330,9 +336,9 @@ define arm_aapcs_vfpcc <4 x i32> @vmovlu16(ptr %A) nounwind {
; CHECK-BE-NEXT: vmovl.u16 q8, d16
; CHECK-BE-NEXT: vrev64.32 q0, q8
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <4 x i16>, ptr %A
- %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
- ret <4 x i32> %tmp2
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+ ret <4 x i32> %tmp2
}
define arm_aapcs_vfpcc <2 x i64> @vmovlu32(ptr %A) nounwind {
@@ -341,9 +347,9 @@ define arm_aapcs_vfpcc <2 x i64> @vmovlu32(ptr %A) nounwind {
; CHECK-NEXT: vld1.32 {d16}, [r0:64]
; CHECK-NEXT: vmovl.u32 q0, d16
; CHECK-NEXT: mov pc, lr
- %tmp1 = load <2 x i32>, ptr %A
- %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
- ret <2 x i64> %tmp2
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+ ret <2 x i64> %tmp2
}
define arm_aapcs_vfpcc <8 x i8> @vmovni16(ptr %A) nounwind {
@@ -360,9 +366,9 @@ define arm_aapcs_vfpcc <8 x i8> @vmovni16(ptr %A) nounwind {
; CHECK-BE-NEXT: vmovn.i16 d16, q8
; CHECK-BE-NEXT: vrev64.8 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <8 x i16>, ptr %A
- %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
- ret <8 x i8> %tmp2
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
+ ret <8 x i8> %tmp2
}
define arm_aapcs_vfpcc <4 x i16> @vmovni32(ptr %A) nounwind {
@@ -379,9 +385,9 @@ define arm_aapcs_vfpcc <4 x i16> @vmovni32(ptr %A) nounwind {
; CHECK-BE-NEXT: vmovn.i32 d16, q8
; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <4 x i32>, ptr %A
- %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
- ret <4 x i16> %tmp2
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
+ ret <4 x i16> %tmp2
}
define arm_aapcs_vfpcc <2 x i32> @vmovni64(ptr %A) nounwind {
@@ -397,9 +403,9 @@ define arm_aapcs_vfpcc <2 x i32> @vmovni64(ptr %A) nounwind {
; CHECK-BE-NEXT: vmovn.i64 d16, q8
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <2 x i64>, ptr %A
- %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
- ret <2 x i32> %tmp2
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
+ ret <2 x i32> %tmp2
}
define arm_aapcs_vfpcc <8 x i8> @vqmovns16(ptr %A) nounwind {
@@ -416,9 +422,9 @@ define arm_aapcs_vfpcc <8 x i8> @vqmovns16(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovn.s16 d16, q8
; CHECK-BE-NEXT: vrev64.8 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <8 x i16>, ptr %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
- ret <8 x i8> %tmp2
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
}
define arm_aapcs_vfpcc <4 x i16> @vqmovns32(ptr %A) nounwind {
@@ -435,9 +441,9 @@ define arm_aapcs_vfpcc <4 x i16> @vqmovns32(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovn.s32 d16, q8
; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <4 x i32>, ptr %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
- ret <4 x i16> %tmp2
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
}
define arm_aapcs_vfpcc <2 x i32> @vqmovns64(ptr %A) nounwind {
@@ -453,9 +459,9 @@ define arm_aapcs_vfpcc <2 x i32> @vqmovns64(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovn.s64 d16, q8
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <2 x i64>, ptr %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
- ret <2 x i32> %tmp2
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
}
define arm_aapcs_vfpcc <8 x i8> @vqmovnu16(ptr %A) nounwind {
@@ -472,9 +478,9 @@ define arm_aapcs_vfpcc <8 x i8> @vqmovnu16(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovn.u16 d16, q8
; CHECK-BE-NEXT: vrev64.8 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <8 x i16>, ptr %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
- ret <8 x i8> %tmp2
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
}
define arm_aapcs_vfpcc <4 x i16> @vqmovnu32(ptr %A) nounwind {
@@ -491,9 +497,9 @@ define arm_aapcs_vfpcc <4 x i16> @vqmovnu32(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovn.u32 d16, q8
; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <4 x i32>, ptr %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
- ret <4 x i16> %tmp2
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
}
define arm_aapcs_vfpcc <2 x i32> @vqmovnu64(ptr %A) nounwind {
@@ -509,9 +515,9 @@ define arm_aapcs_vfpcc <2 x i32> @vqmovnu64(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovn.u64 d16, q8
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <2 x i64>, ptr %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
- ret <2 x i32> %tmp2
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
}
define arm_aapcs_vfpcc <8 x i8> @vqmovuns16(ptr %A) nounwind {
@@ -528,9 +534,9 @@ define arm_aapcs_vfpcc <8 x i8> @vqmovuns16(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovun.s16 d16, q8
; CHECK-BE-NEXT: vrev64.8 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <8 x i16>, ptr %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
- ret <8 x i8> %tmp2
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
+ ret <8 x i8> %tmp2
}
define arm_aapcs_vfpcc <4 x i16> @vqmovuns32(ptr %A) nounwind {
@@ -547,9 +553,9 @@ define arm_aapcs_vfpcc <4 x i16> @vqmovuns32(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovun.s32 d16, q8
; CHECK-BE-NEXT: vrev64.16 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <4 x i32>, ptr %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
- ret <4 x i16> %tmp2
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
+ ret <4 x i16> %tmp2
}
define arm_aapcs_vfpcc <2 x i32> @vqmovuns64(ptr %A) nounwind {
@@ -565,9 +571,9 @@ define arm_aapcs_vfpcc <2 x i32> @vqmovuns64(ptr %A) nounwind {
; CHECK-BE-NEXT: vqmovun.s64 d16, q8
; CHECK-BE-NEXT: vrev64.32 d0, d16
; CHECK-BE-NEXT: mov pc, lr
- %tmp1 = load <2 x i64>, ptr %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
- ret <2 x i32> %tmp2
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
+ ret <2 x i32> %tmp2
}
declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
@@ -603,7 +609,7 @@ define arm_aapcs_vfpcc void @noTruncStore(ptr %a, ptr %b) nounwind {
%tmp1 = load <4 x i32>, ptr %a, align 16
%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
store <4 x i16> %tmp2, ptr %b, align 8
- ret void
+ ret void
}
; Use vmov.f32 to materialize f32 immediate splats
@@ -616,7 +622,7 @@ define arm_aapcs_vfpcc void @v_mov_v2f32(ptr nocapture %p) nounwind {
; CHECK-NEXT: mov pc, lr
entry:
store <2 x float> <float -1.600000e+01, float -1.600000e+01>, ptr %p, align 4
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mov_v4f32(ptr nocapture %p) nounwind {
@@ -633,7 +639,7 @@ define arm_aapcs_vfpcc void @v_mov_v4f32(ptr nocapture %p) nounwind {
; CHECK-BE-NEXT: mov pc, lr
entry:
store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, ptr %p, align 4
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mov_v4f32_undef(ptr nocapture %p) nounwind {
@@ -658,7 +664,7 @@ entry:
%a = load <4 x float> , ptr %p
%b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
store <4 x float> %b, ptr %p
- ret void
+ ret void
}
; Vector any_extends must be selected as either vmovl.u or vmovl.s.
@@ -700,7 +706,7 @@ define arm_aapcs_vfpcc void @v_movi8_sti8(ptr %p) {
; CHECK-NEXT: vst1.8 {d16}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v8i8(ptr %p, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, i32 1)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi8_sti16(ptr %p) {
@@ -711,7 +717,7 @@ define arm_aapcs_vfpcc void @v_movi8_sti16(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <4 x i16>
call void @llvm.arm.neon.vst1.p0.v4i16(ptr %p, <4 x i16> %val, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi8_stf16(ptr %p) {
@@ -722,7 +728,7 @@ define arm_aapcs_vfpcc void @v_movi8_stf16(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <4 x half>
call void @llvm.arm.neon.vst1.p0.v4f16(ptr %p, <4 x half> %val, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi8_sti32(ptr %p) {
@@ -733,7 +739,7 @@ define arm_aapcs_vfpcc void @v_movi8_sti32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <2 x i32>
call void @llvm.arm.neon.vst1.p0.v2i32(ptr %p, <2 x i32> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi8_stf32(ptr %p) {
@@ -744,7 +750,7 @@ define arm_aapcs_vfpcc void @v_movi8_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <2 x float>
call void @llvm.arm.neon.vst1.p0.v2f32(ptr %p, <2 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi8_sti64(ptr %p) {
@@ -755,7 +761,7 @@ define arm_aapcs_vfpcc void @v_movi8_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <1 x i64>
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi16_sti16(ptr %p) {
@@ -765,7 +771,7 @@ define arm_aapcs_vfpcc void @v_movi16_sti16(ptr %p) {
; CHECK-NEXT: vst1.16 {d16}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v4i16(ptr %p, <4 x i16> <i16 1, i16 1, i16 1, i16 1>, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi16_stf16(ptr %p) {
@@ -776,7 +782,7 @@ define arm_aapcs_vfpcc void @v_movi16_stf16(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i16> <i16 1, i16 1, i16 1, i16 1> to <4 x half>
call void @llvm.arm.neon.vst1.p0.v4f16(ptr %p, <4 x half> %val, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi16_sti32(ptr %p) {
@@ -787,7 +793,7 @@ define arm_aapcs_vfpcc void @v_movi16_sti32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i16> <i16 1, i16 1, i16 1, i16 1> to <2 x i32>
call void @llvm.arm.neon.vst1.p0.v2i32(ptr %p, <2 x i32> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi16_stf32(ptr %p) {
@@ -798,7 +804,7 @@ define arm_aapcs_vfpcc void @v_movi16_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i16> <i16 1, i16 1, i16 1, i16 1> to <2 x float>
call void @llvm.arm.neon.vst1.p0.v2f32(ptr %p, <2 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi16_sti64(ptr %p) {
@@ -809,7 +815,7 @@ define arm_aapcs_vfpcc void @v_movi16_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i16> <i16 1, i16 1, i16 1, i16 1> to <1 x i64>
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi32_sti32(ptr %p) {
@@ -819,7 +825,7 @@ define arm_aapcs_vfpcc void @v_movi32_sti32(ptr %p) {
; CHECK-NEXT: vst1.32 {d16}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v2i32(ptr %p, <2 x i32> <i32 1, i32 1>, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi32_stf32(ptr %p) {
@@ -830,7 +836,7 @@ define arm_aapcs_vfpcc void @v_movi32_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <2 x i32> <i32 1, i32 1> to <2 x float>
call void @llvm.arm.neon.vst1.p0.v2f32(ptr %p, <2 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi32_sti64(ptr %p) {
@@ -841,7 +847,7 @@ define arm_aapcs_vfpcc void @v_movi32_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <2 x i32> <i32 1, i32 1> to <1 x i64>
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movf32_stf32(ptr %p) {
@@ -851,7 +857,7 @@ define arm_aapcs_vfpcc void @v_movf32_stf32(ptr %p) {
; CHECK-NEXT: vst1.32 {d16}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v2f32(ptr %p, <2 x float> <float 1.0, float 1.0>, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void at v_movf32_sti32(ptr %p) {
@@ -864,7 +870,7 @@ define arm_aapcs_vfpcc void at v_movf32_sti32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <2 x float> <float 1.0, float 1.0> to <2 x i32>
call void @llvm.arm.neon.vst1.p0.v2i32(ptr %p, <2 x i32> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movf32_sti64(ptr %p) {
@@ -885,17 +891,24 @@ define arm_aapcs_vfpcc void @v_movf32_sti64(ptr %p) {
; CHECK-BE-NEXT: mov pc, lr
%val = bitcast <2 x float> <float 1.0, float 1.0> to <1 x i64>
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movi64_sti64(ptr %p) {
-; CHECK-LABEL: v_movi64_sti64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i64 d16, #0xff
-; CHECK-NEXT: vst1.64 {d16}, [r0:64]
-; CHECK-NEXT: mov pc, lr
+; CHECK-LE-LABEL: v_movi64_sti64:
+; CHECK-LE: @ %bb.0:
+; CHECK-LE-NEXT: vmov.i64 d16, #0xff
+; CHECK-LE-NEXT: vst1.64 {d16}, [r0:64]
+; CHECK-LE-NEXT: mov pc, lr
+;
+; CHECK-BE-LABEL: v_movi64_sti64:
+; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000
+; CHECK-BE-NEXT: vrev64.32 d16, d16
+; CHECK-BE-NEXT: vst1.64 {d16}, [r0:64]
+; CHECK-BE-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> <i64 255>, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi8_sti8(ptr %p) {
@@ -905,7 +918,7 @@ define arm_aapcs_vfpcc void @v_movQi8_sti8(ptr %p) {
; CHECK-NEXT: vst1.8 {d16, d17}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v16i8(ptr %p, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, i32 1)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi8_sti16(ptr %p) {
@@ -916,7 +929,7 @@ define arm_aapcs_vfpcc void @v_movQi8_sti16(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <8 x i16>
call void @llvm.arm.neon.vst1.p0.v8i16(ptr %p, <8 x i16> %val, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi8_stf16(ptr %p) {
@@ -927,7 +940,7 @@ define arm_aapcs_vfpcc void @v_movQi8_stf16(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <8 x half>
call void @llvm.arm.neon.vst1.p0.v8f16(ptr %p, <8 x half> %val, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi8_sti32(ptr %p) {
@@ -938,7 +951,7 @@ define arm_aapcs_vfpcc void @v_movQi8_sti32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <4 x i32>
call void @llvm.arm.neon.vst1.p0.v4i32(ptr %p, <4 x i32> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi8_stf32(ptr %p) {
@@ -949,7 +962,7 @@ define arm_aapcs_vfpcc void @v_movQi8_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <4 x float>
call void @llvm.arm.neon.vst1.p0.v4f32(ptr %p, <4 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi8_sti64(ptr %p) {
@@ -960,7 +973,7 @@ define arm_aapcs_vfpcc void @v_movQi8_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <2 x i64>
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi16_sti16(ptr %p) {
@@ -970,7 +983,7 @@ define arm_aapcs_vfpcc void @v_movQi16_sti16(ptr %p) {
; CHECK-NEXT: vst1.16 {d16, d17}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v8i16(ptr %p, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi16_stf16(ptr %p) {
@@ -981,7 +994,7 @@ define arm_aapcs_vfpcc void @v_movQi16_stf16(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> to <8 x half>
call void @llvm.arm.neon.vst1.p0.v8f16(ptr %p, <8 x half> %val, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi16_sti32(ptr %p) {
@@ -992,7 +1005,7 @@ define arm_aapcs_vfpcc void @v_movQi16_sti32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> to <4 x i32>
call void @llvm.arm.neon.vst1.p0.v4i32(ptr %p, <4 x i32> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi16_stf32(ptr %p) {
@@ -1003,7 +1016,7 @@ define arm_aapcs_vfpcc void @v_movQi16_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> to <4 x float>
call void @llvm.arm.neon.vst1.p0.v4f32(ptr %p, <4 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi16_sti64(ptr %p) {
@@ -1014,7 +1027,7 @@ define arm_aapcs_vfpcc void @v_movQi16_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> to <2 x i64>
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi32_sti32(ptr %p) {
@@ -1024,7 +1037,7 @@ define arm_aapcs_vfpcc void @v_movQi32_sti32(ptr %p) {
; CHECK-NEXT: vst1.32 {d16, d17}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v4i32(ptr %p, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi32_stf32(ptr %p) {
@@ -1035,7 +1048,7 @@ define arm_aapcs_vfpcc void @v_movQi32_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i32> <i32 1, i32 1, i32 1, i32 1> to <4 x float>
call void @llvm.arm.neon.vst1.p0.v4f32(ptr %p, <4 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi32_sti64(ptr %p) {
@@ -1046,7 +1059,7 @@ define arm_aapcs_vfpcc void @v_movQi32_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i32> <i32 1, i32 1, i32 1, i32 1> to <2 x i64>
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQf32_stf32(ptr %p) {
@@ -1056,7 +1069,7 @@ define arm_aapcs_vfpcc void @v_movQf32_stf32(ptr %p) {
; CHECK-NEXT: vst1.32 {d16, d17}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v4f32(ptr %p, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQf32_sti32(ptr %p) {
@@ -1069,7 +1082,7 @@ define arm_aapcs_vfpcc void @v_movQf32_sti32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
call void @llvm.arm.neon.vst1.p0.v4i32(ptr %p, <4 x i32> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQf32_sti64(ptr %p) {
@@ -1090,17 +1103,24 @@ define arm_aapcs_vfpcc void @v_movQf32_sti64(ptr %p) {
; CHECK-BE-NEXT: mov pc, lr
%val = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <2 x i64>
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_movQi64_sti64(ptr %p) {
-; CHECK-LABEL: v_movQi64_sti64:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i64 q8, #0xff
-; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64]
-; CHECK-NEXT: mov pc, lr
+; CHECK-LE-LABEL: v_movQi64_sti64:
+; CHECK-LE: @ %bb.0:
+; CHECK-LE-NEXT: vmov.i64 q8, #0xff
+; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r0:64]
+; CHECK-LE-NEXT: mov pc, lr
+;
+; CHECK-BE-LABEL: v_movQi64_sti64:
+; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vmov.i64 q8, #0xff00000000
+; CHECK-BE-NEXT: vrev64.32 q8, q8
+; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r0:64]
+; CHECK-BE-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> <i64 255, i64 255>, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvni16_sti16(ptr %p) {
@@ -1110,7 +1130,7 @@ define arm_aapcs_vfpcc void @v_mvni16_sti16(ptr %p) {
; CHECK-NEXT: vst1.16 {d16}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v4i16(ptr %p, <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281>, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvni16_stf16(ptr %p) {
@@ -1121,7 +1141,7 @@ define arm_aapcs_vfpcc void @v_mvni16_stf16(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281> to <4 x half>
call void @llvm.arm.neon.vst1.p0.v4f16(ptr %p, <4 x half> %val, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvni16_sti32(ptr %p) {
@@ -1132,7 +1152,7 @@ define arm_aapcs_vfpcc void @v_mvni16_sti32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281> to <2 x i32>
call void @llvm.arm.neon.vst1.p0.v2i32(ptr %p, <2 x i32> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvni16_stf32(ptr %p) {
@@ -1143,7 +1163,7 @@ define arm_aapcs_vfpcc void @v_mvni16_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281> to <2 x float>
call void @llvm.arm.neon.vst1.p0.v2f32(ptr %p, <2 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvni16_sti64(ptr %p) {
@@ -1154,7 +1174,7 @@ define arm_aapcs_vfpcc void @v_mvni16_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281> to <1 x i64>
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvni32_sti32(ptr %p) {
@@ -1164,7 +1184,7 @@ define arm_aapcs_vfpcc void @v_mvni32_sti32(ptr %p) {
; CHECK-NEXT: vst1.32 {d16}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v2i32(ptr %p, <2 x i32> <i32 4294967041, i32 4294967041>, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvni32_stf32(ptr %p) {
@@ -1175,7 +1195,7 @@ define arm_aapcs_vfpcc void @v_mvni32_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <2 x i32> <i32 4294967041, i32 4294967041> to <2 x float>
call void @llvm.arm.neon.vst1.p0.v2f32(ptr %p, <2 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvni32_sti64(ptr %p) {
@@ -1186,7 +1206,7 @@ define arm_aapcs_vfpcc void @v_mvni32_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <2 x i32> <i32 4294967041, i32 4294967041> to <1 x i64>
call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> %val, i32 8)
- ret void
+ ret void
}
@@ -1197,7 +1217,7 @@ define arm_aapcs_vfpcc void @v_mvnQi16_sti16(ptr %p) {
; CHECK-NEXT: vst1.16 {d16, d17}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v8i16(ptr %p, <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281>, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvnQi16_stf16(ptr %p) {
@@ -1208,7 +1228,7 @@ define arm_aapcs_vfpcc void @v_mvnQi16_stf16(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281> to <8 x half>
call void @llvm.arm.neon.vst1.p0.v8f16(ptr %p, <8 x half> %val, i32 2)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvnQi16_sti32(ptr %p) {
@@ -1219,7 +1239,7 @@ define arm_aapcs_vfpcc void @v_mvnQi16_sti32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281> to <4 x i32>
call void @llvm.arm.neon.vst1.p0.v4i32(ptr %p, <4 x i32> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvnQi16_stf32(ptr %p) {
@@ -1230,7 +1250,7 @@ define arm_aapcs_vfpcc void @v_mvnQi16_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281> to <4 x float>
call void @llvm.arm.neon.vst1.p0.v4f32(ptr %p, <4 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvnQi16_sti64(ptr %p) {
@@ -1241,7 +1261,7 @@ define arm_aapcs_vfpcc void @v_mvnQi16_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281> to <2 x i64>
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> %val, i32 8)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvnQi32_sti32(ptr %p) {
@@ -1251,7 +1271,7 @@ define arm_aapcs_vfpcc void @v_mvnQi32_sti32(ptr %p) {
; CHECK-NEXT: vst1.32 {d16, d17}, [r0]
; CHECK-NEXT: mov pc, lr
call void @llvm.arm.neon.vst1.p0.v4i32(ptr %p, <4 x i32> <i32 4294967041, i32 4294967041, i32 4294967041, i32 4294967041>, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvnQi32_stf32(ptr %p) {
@@ -1262,7 +1282,7 @@ define arm_aapcs_vfpcc void @v_mvnQi32_stf32(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i32> <i32 4294967041, i32 4294967041, i32 4294967041, i32 4294967041> to <4 x float>
call void @llvm.arm.neon.vst1.p0.v4f32(ptr %p, <4 x float> %val, i32 4)
- ret void
+ ret void
}
define arm_aapcs_vfpcc void @v_mvnQi32_sti64(ptr %p) {
@@ -1273,7 +1293,7 @@ define arm_aapcs_vfpcc void @v_mvnQi32_sti64(ptr %p) {
; CHECK-NEXT: mov pc, lr
%val = bitcast <4 x i32> <i32 4294967041, i32 4294967041, i32 4294967041, i32 4294967041> to <2 x i64>
call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> %val, i32 8)
- ret void
+ ret void
}
declare void @llvm.arm.neon.vst1.p0.v8i8(ptr, <8 x i8>, i32) nounwind
diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
index c7c579f9d65362..4934d223209037 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
@@ -421,13 +421,14 @@ define void @foo_zext_v2i64_v2i32(ptr %dest, ptr %mask, ptr %src) {
; CHECK-BE-NEXT: vmov.32 q1[3], r1
; CHECK-BE-NEXT: vrev64.32 q0, q1
; CHECK-BE-NEXT: .LBB7_4: @ %else2
-; CHECK-BE-NEXT: vrev64.32 q3, q2
+; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff00000000
; CHECK-BE-NEXT: movs r1, #0
-; CHECK-BE-NEXT: vmov r2, s15
-; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff
-; CHECK-BE-NEXT: vand q0, q0, q1
+; CHECK-BE-NEXT: vrev64.32 q3, q1
+; CHECK-BE-NEXT: vrev64.32 q1, q2
+; CHECK-BE-NEXT: vmov r2, s7
+; CHECK-BE-NEXT: vand q0, q0, q3
; CHECK-BE-NEXT: rsbs r3, r2, #0
-; CHECK-BE-NEXT: vmov r3, s13
+; CHECK-BE-NEXT: vmov r3, s5
; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31
; CHECK-BE-NEXT: csetm r12, lt
; CHECK-BE-NEXT: rsbs r2, r3, #0
@@ -537,13 +538,14 @@ define void @foo_zext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) {
; CHECK-BE-NEXT: vmov.32 q1[3], r1
; CHECK-BE-NEXT: vrev64.32 q0, q1
; CHECK-BE-NEXT: .LBB8_4: @ %else2
-; CHECK-BE-NEXT: vrev64.32 q3, q2
+; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff00000000
; CHECK-BE-NEXT: movs r1, #0
-; CHECK-BE-NEXT: vmov r2, s15
-; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff
-; CHECK-BE-NEXT: vand q0, q0, q1
+; CHECK-BE-NEXT: vrev64.32 q3, q1
+; CHECK-BE-NEXT: vrev64.32 q1, q2
+; CHECK-BE-NEXT: vmov r2, s7
+; CHECK-BE-NEXT: vand q0, q0, q3
; CHECK-BE-NEXT: rsbs r3, r2, #0
-; CHECK-BE-NEXT: vmov r3, s13
+; CHECK-BE-NEXT: vmov r3, s5
; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31
; CHECK-BE-NEXT: csetm r12, lt
; CHECK-BE-NEXT: rsbs r2, r3, #0
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 868b23b6805649..fe63034c7acd37 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -115,7 +115,6 @@ entry:
ret <16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0>
}
-; FIXME: This is incorrect for BE
define arm_aapcs_vfpcc <16 x i8> @xor_int8_32(<16 x i8> %a) {
; CHECKLE-LABEL: xor_int8_32:
; CHECKLE: @ %bb.0: @ %entry
@@ -158,7 +157,7 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int8_64(<16 x i8> %a) {
;
; CHECKBE-LABEL: xor_int8_64:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q1, #0xff0000ffff00ffff
+; CHECKBE-NEXT: vmov.i64 q1, #0xffff00ffff0000ff
; CHECKBE-NEXT: vrev64.8 q2, q0
; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
@@ -752,7 +751,7 @@ define arm_aapcs_vfpcc <4 x i32> @xor_int32_64(<4 x i32> %a) {
;
; CHECKBE-LABEL: xor_int32_64:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff0000ff00ff
+; CHECKBE-NEXT: vmov.i64 q1, #0xff00ffff00ff00
; CHECKBE-NEXT: vrev64.32 q2, q0
; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.32 q0, q1
@@ -837,11 +836,18 @@ entry:
}
define arm_aapcs_vfpcc <2 x i64> @xor_int64_ff(<2 x i64> %a) {
-; CHECK-LABEL: xor_int64_ff:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.i64 q1, #0xff
-; CHECK-NEXT: veor q0, q0, q1
-; CHECK-NEXT: bx lr
+; CHECKLE-LABEL: xor_int64_ff:
+; CHECKLE: @ %bb.0: @ %entry
+; CHECKLE-NEXT: vmov.i64 q1, #0xff
+; CHECKLE-NEXT: veor q0, q0, q1
+; CHECKLE-NEXT: bx lr
+;
+; CHECKBE-LABEL: xor_int64_ff:
+; CHECKBE: @ %bb.0: @ %entry
+; CHECKBE-NEXT: vmov.i64 q1, #0xff00000000
+; CHECKBE-NEXT: vrev64.32 q2, q1
+; CHECKBE-NEXT: veor q0, q0, q2
+; CHECKBE-NEXT: bx lr
entry:
%b = xor <2 x i64> %a, <i64 255, i64 255>
ret <2 x i64> %b
@@ -882,11 +888,18 @@ entry:
}
define arm_aapcs_vfpcc <2 x i64> @xor_int64_ff0000ff0000ffff(<2 x i64> %a) {
-; CHECK-LABEL: xor_int64_ff0000ff0000ffff:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.i64 q1, #0xff0000ff0000ffff
-; CHECK-NEXT: veor q0, q0, q1
-; CHECK-NEXT: bx lr
+; CHECKLE-LABEL: xor_int64_ff0000ff0000ffff:
+; CHECKLE: @ %bb.0: @ %entry
+; CHECKLE-NEXT: vmov.i64 q1, #0xff0000ff0000ffff
+; CHECKLE-NEXT: veor q0, q0, q1
+; CHECKLE-NEXT: bx lr
+;
+; CHECKBE-LABEL: xor_int64_ff0000ff0000ffff:
+; CHECKBE: @ %bb.0: @ %entry
+; CHECKBE-NEXT: vmov.i64 q1, #0xffffff0000ff
+; CHECKBE-NEXT: vrev64.32 q2, q1
+; CHECKBE-NEXT: veor q0, q0, q2
+; CHECKBE-NEXT: bx lr
entry:
%b = xor <2 x i64> %a, <i64 18374687574888349695, i64 18374687574888349695>
ret <2 x i64> %b
@@ -980,7 +993,7 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int64_0f000f0f(<16 x i8> %a) {
;
; CHECKBE-LABEL: xor_int64_0f000f0f:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
+; CHECKBE-NEXT: vmov.i64 q1, #0xff000000ff00ff
; CHECKBE-NEXT: vrev64.8 q2, q0
; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
@@ -1013,7 +1026,7 @@ define arm_aapcs_vfpcc <8 x i16> @xor_int64_ff00ffff(<8 x i16> %a) {
;
; CHECKBE-LABEL: xor_int64_ff00ffff:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
+; CHECKBE-NEXT: vmov.i64 q1, #0xffffffff0000ffff
; CHECKBE-NEXT: vrev64.16 q2, q0
; CHECKBE-NEXT: veor q1, q2, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
@@ -1188,7 +1201,7 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) {
;
; CHECKBE-LABEL: test:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
+; CHECKBE-NEXT: vmov.i64 q1, #0xff000000ff00ff
; CHECKBE-NEXT: vrev64.8 q2, q0
; CHECKBE-NEXT: vorr q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
@@ -1207,7 +1220,7 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) {
;
; CHECKBE-LABEL: test2:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
+; CHECKBE-NEXT: vmov.i64 q1, #0xffffffff0000ffff
; CHECKBE-NEXT: vrev64.16 q2, q0
; CHECKBE-NEXT: vorr q1, q2, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
More information about the llvm-commits
mailing list