[llvm] r368304 - [ARM] MVE big endian loads/stores
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 8 08:15:20 PDT 2019
Author: dmgreen
Date: Thu Aug 8 08:15:19 2019
New Revision: 368304
URL: http://llvm.org/viewvc/llvm-project?rev=368304&view=rev
Log:
[ARM] MVE big endian loads/stores
This adds some missing patterns for big endian loads/stores, allowing unaligned
loads/stores to also be selected with an extra VREV, which produces better code
than aligning through a stack. Also moves VLDR_P0 to not be LE only, and
adjusts some of the tests to show all that working.
Differential Revision: https://reviews.llvm.org/D65583
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
llvm/trunk/test/CodeGen/Thumb2/mve-be.ll
llvm/trunk/test/CodeGen/Thumb2/mve-loadstore.ll
llvm/trunk/test/CodeGen/Thumb2/mve-pred-spill.ll
llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=368304&r1=368303&r2=368304&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Aug 8 08:15:19 2019
@@ -14075,45 +14075,21 @@ bool ARMTargetLowering::allowsMisaligned
return true;
}
- if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
- Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
- Ty != MVT::v2f64)
- return false;
-
- if (Subtarget->isLittle()) {
- // In little-endian MVE, the store instructions VSTRB.U8,
- // VSTRH.U16 and VSTRW.U32 all store the vector register in
- // exactly the same format, and differ only in the range of
- // their immediate offset field and the required alignment.
- //
- // In particular, VSTRB.U8 can store a vector at byte alignment.
- // So at this stage we can simply say that loads/stores of all
- // 128-bit wide vector types are permitted at any alignment,
- // because we know at least _one_ instruction can manage that.
- //
- // Later on we might find that some of those loads are better
- // generated as VLDRW.U32 if alignment permits, to take
- // advantage of the larger immediate range. But for the moment,
- // all that matters is that if we don't lower the load then
- // _some_ instruction can handle it.
+ // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
+ // VSTRW.U32 all store the vector register in exactly the same format, and
+ // differ only in the range of their immediate offset field and the required
+ // alignment. So there is always a store that can be used, regardless of
+ // actual type.
+ //
+ // For big endian, that is not the case. But can still emit a (VSTRB.U8;
+ // VREV64.8) pair and get the same effect. This will likely be better than
+ // aligning the vector through the stack.
+ if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
+ Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
+ Ty == MVT::v2f64) {
if (Fast)
*Fast = true;
return true;
- } else {
- // In big-endian MVE, those instructions aren't so similar
- // after all, because they reorder the bytes of the vector
- // differently. So this time we can only store a particular
- // kind of vector if its alignment is at least the element
- // type. And we can't store vectors of i64 or f64 at all
- // without having to do some postprocessing, because there's
- // no VSTRD.U64.
- if (Ty == MVT::v16i8 ||
- ((Ty == MVT::v8i16 || Ty == MVT::v8f16) && Alignment >= 2) ||
- ((Ty == MVT::v4i32 || Ty == MVT::v4f32) && Alignment >= 4)) {
- if (Fast)
- *Fast = true;
- return true;
- }
}
return false;
Modified: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrMVE.td?rev=368304&r1=368303&r2=368304&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td Thu Aug 8 08:15:19 2019
@@ -4820,13 +4820,6 @@ let Predicates = [HasMVEInt, IsLE] in {
defm : MVE_unpred_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
defm : MVE_unpred_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
defm : MVE_unpred_vector_load<MVE_VLDRWU32, alignedload32, 2>;
-
- def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
- (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
- def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
- (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
- def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
- (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
}
let Predicates = [HasMVEInt, IsBE] in {
@@ -4841,6 +4834,41 @@ let Predicates = [HasMVEInt, IsBE] in {
def : MVE_unpred_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
def : MVE_unpred_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
def : MVE_unpred_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
+
+ // Other unaligned loads/stores need to go though a VREV
+ def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)),
+ (v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)),
+ (v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)),
+ (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)),
+ (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)),
+ (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)),
+ (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>;
+ def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+ def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr),
+ (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
+}
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
+ (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+ def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
+ (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+ def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
+ (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-be.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-be.ll?rev=368304&r1=368303&r2=368304&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-be.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-be.ll Thu Aug 8 08:15:19 2019
@@ -29,47 +29,14 @@ define void @load_load_add_store_align1(
;
; CHECK-BE-LABEL: load_load_add_store_align1:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r6, r7, lr}
-; CHECK-BE-NEXT: push {r4, r6, r7, lr}
-; CHECK-BE-NEXT: .setfp r7, sp, #8
-; CHECK-BE-NEXT: add r7, sp, #8
-; CHECK-BE-NEXT: .pad #48
-; CHECK-BE-NEXT: sub sp, #48
-; CHECK-BE-NEXT: mov r4, sp
-; CHECK-BE-NEXT: bfc r4, #0, #4
-; CHECK-BE-NEXT: mov sp, r4
-; CHECK-BE-NEXT: ldr.w r12, [r1]
-; CHECK-BE-NEXT: ldr r3, [r1, #4]
-; CHECK-BE-NEXT: ldr r2, [r1, #8]
-; CHECK-BE-NEXT: ldr r1, [r1, #12]
-; CHECK-BE-NEXT: strd r2, r1, [sp, #24]
-; CHECK-BE-NEXT: mov r1, r0
-; CHECK-BE-NEXT: strd r12, r3, [sp, #16]
-; CHECK-BE-NEXT: ldr r2, [r1, #4]!
-; CHECK-BE-NEXT: str r2, [sp, #4]
-; CHECK-BE-NEXT: ldr r2, [r0]
-; CHECK-BE-NEXT: str r2, [sp]
-; CHECK-BE-NEXT: mov r2, r1
-; CHECK-BE-NEXT: ldr r3, [r2, #4]!
-; CHECK-BE-NEXT: str r3, [sp, #8]
-; CHECK-BE-NEXT: ldr r3, [r2, #4]
-; CHECK-BE-NEXT: str r3, [sp, #12]
-; CHECK-BE-NEXT: add r3, sp, #16
-; CHECK-BE-NEXT: vldrw.u32 q0, [r3]
-; CHECK-BE-NEXT: mov r3, sp
-; CHECK-BE-NEXT: vldrw.u32 q1, [r3]
-; CHECK-BE-NEXT: add r3, sp, #32
+; CHECK-BE-NEXT: vldrb.u8 q0, [r1]
+; CHECK-BE-NEXT: vldrb.u8 q1, [r0]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vrev32.8 q1, q1
; CHECK-BE-NEXT: vadd.i32 q0, q1, q0
-; CHECK-BE-NEXT: vstrw.32 q0, [r3]
-; CHECK-BE-NEXT: ldrd r3, r4, [sp, #40]
-; CHECK-BE-NEXT: ldrd r12, lr, [sp, #32]
-; CHECK-BE-NEXT: str r4, [r2, #4]
-; CHECK-BE-NEXT: sub.w r4, r7, #8
-; CHECK-BE-NEXT: str r3, [r2]
-; CHECK-BE-NEXT: str.w lr, [r1]
-; CHECK-BE-NEXT: str.w r12, [r0]
-; CHECK-BE-NEXT: mov sp, r4
-; CHECK-BE-NEXT: pop {r4, r6, r7, pc}
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: bx lr
entry:
%l1 = load <4 x i32>, <4 x i32>* %src1, align 1
%l2 = load <4 x i32>, <4 x i32>* %src2, align 1
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-loadstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-loadstore.ll?rev=368304&r1=368303&r2=368304&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-loadstore.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-loadstore.ll Thu Aug 8 08:15:19 2019
@@ -1,72 +1,138 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4(<4 x i32>* %vp) {
-; CHECK-LABEL: load_4xi32_a4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: load_4xi32_a4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0]
+; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: load_4xi32_a4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
+; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
+; CHECK-BE-NEXT: vrev64.32 q0, q1
+; CHECK-BE-NEXT: bx lr
entry:
%0 = load <4 x i32>, <4 x i32>* %vp, align 4
- ret <4 x i32> %0
+ %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %1
}
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a2(<4 x i32>* %vp) {
-; CHECK-LABEL: load_4xi32_a2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrh.u16 q0, [r0]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: load_4xi32_a2:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrh.u16 q0, [r0]
+; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: load_4xi32_a2:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
+; CHECK-BE-NEXT: vrev64.32 q0, q1
+; CHECK-BE-NEXT: bx lr
entry:
%0 = load <4 x i32>, <4 x i32>* %vp, align 2
- ret <4 x i32> %0
+ %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %1
}
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a1(<4 x i32>* %vp) {
-; CHECK-LABEL: load_4xi32_a1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldrb.u8 q0, [r0]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: load_4xi32_a1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: load_4xi32_a1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vldrb.u8 q0, [r0]
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vshr.u32 q1, q0, #1
+; CHECK-BE-NEXT: vrev64.32 q0, q1
+; CHECK-BE-NEXT: bx lr
entry:
%0 = load <4 x i32>, <4 x i32>* %vp, align 1
- ret <4 x i32> %0
+ %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %1
}
define arm_aapcs_vfpcc void @store_4xi32_a4(<4 x i32>* %vp, <4 x i32> %val) {
-; CHECK-LABEL: store_4xi32_a4:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: store_4xi32_a4:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
+; CHECK-LE-NEXT: vstrw.32 q0, [r0]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: store_4xi32_a4:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
+; CHECK-BE-NEXT: vstrw.32 q0, [r0]
+; CHECK-BE-NEXT: bx lr
entry:
- store <4 x i32> %val, <4 x i32>* %vp, align 4
+ %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %0, <4 x i32>* %vp, align 4
ret void
}
define arm_aapcs_vfpcc void @store_4xi32_a2(<4 x i32>* %vp, <4 x i32> %val) {
-; CHECK-LABEL: store_4xi32_a2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vstrh.16 q0, [r0]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: store_4xi32_a2:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
+; CHECK-LE-NEXT: vstrh.16 q0, [r0]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: store_4xi32_a2:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: bx lr
entry:
- store <4 x i32> %val, <4 x i32>* %vp, align 2
+ %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %0, <4 x i32>* %vp, align 2
ret void
}
define arm_aapcs_vfpcc void @store_4xi32_a1(<4 x i32>* %vp, <4 x i32> %val) {
-; CHECK-LABEL: store_4xi32_a1:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vstrb.8 q0, [r0]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: store_4xi32_a1:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: vshr.u32 q0, q0, #1
+; CHECK-LE-NEXT: vstrb.8 q0, [r0]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: store_4xi32_a1:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vshr.u32 q0, q1, #1
+; CHECK-BE-NEXT: vrev32.8 q0, q0
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: bx lr
entry:
- store <4 x i32> %val, <4 x i32>* %vp, align 1
+ %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %0, <4 x i32>* %vp, align 1
ret void
}
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_pos(i32* %ip) {
-; CHECK-LABEL: load_4xi32_a4_offset_pos:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: add.w r0, r0, #508
-; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: load_4xi32_a4_offset_pos:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: add.w r0, r0, #508
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: load_4xi32_a4_offset_pos:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: add.w r0, r0, #508
+; CHECK-BE-NEXT: vldrb.u8 q1, [r0]
+; CHECK-BE-NEXT: vrev64.8 q0, q1
+; CHECK-BE-NEXT: bx lr
entry:
%ipoffset = getelementptr inbounds i32, i32* %ip, i32 127
%vp = bitcast i32* %ipoffset to <4 x i32>*
@@ -75,11 +141,18 @@ entry:
}
define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_neg(i32* %ip) {
-; CHECK-LABEL: load_4xi32_a4_offset_neg:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: sub.w r0, r0, #508
-; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: load_4xi32_a4_offset_neg:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: sub.w r0, r0, #508
+; CHECK-LE-NEXT: vldrw.u32 q0, [r0]
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: load_4xi32_a4_offset_neg:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: sub.w r0, r0, #508
+; CHECK-BE-NEXT: vldrb.u8 q1, [r0]
+; CHECK-BE-NEXT: vrev64.8 q0, q1
+; CHECK-BE-NEXT: bx lr
entry:
%ipoffset = getelementptr inbounds i32, i32* %ip, i32 -127
%vp = bitcast i32* %ipoffset to <4 x i32>*
@@ -88,19 +161,34 @@ entry:
}
define arm_aapcs_vfpcc <4 x i32> @loadstore_4xi32_stack_off16() {
-; CHECK-LABEL: loadstore_4xi32_stack_off16:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: vmov.i32 q0, #0x1
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: movs r0, #3
-; CHECK-NEXT: vstrw.32 q0, [sp, #16]
-; CHECK-NEXT: str r0, [sp, #16]
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16]
-; CHECK-NEXT: add sp, #40
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: loadstore_4xi32_stack_off16:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #40
+; CHECK-LE-NEXT: sub sp, #40
+; CHECK-LE-NEXT: vmov.i32 q0, #0x1
+; CHECK-LE-NEXT: mov r0, sp
+; CHECK-LE-NEXT: vstrw.32 q0, [r0]
+; CHECK-LE-NEXT: movs r0, #3
+; CHECK-LE-NEXT: vstrw.32 q0, [sp, #16]
+; CHECK-LE-NEXT: str r0, [sp, #16]
+; CHECK-LE-NEXT: vldrw.u32 q0, [sp, #16]
+; CHECK-LE-NEXT: add sp, #40
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: loadstore_4xi32_stack_off16:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #40
+; CHECK-BE-NEXT: sub sp, #40
+; CHECK-BE-NEXT: vmov.i32 q0, #0x1
+; CHECK-BE-NEXT: mov r0, sp
+; CHECK-BE-NEXT: vstrw.32 q0, [r0]
+; CHECK-BE-NEXT: movs r0, #3
+; CHECK-BE-NEXT: vstrw.32 q0, [sp, #16]
+; CHECK-BE-NEXT: str r0, [sp, #16]
+; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
+; CHECK-BE-NEXT: vrev64.8 q0, q1
+; CHECK-BE-NEXT: add sp, #40
+; CHECK-BE-NEXT: bx lr
entry:
%c = alloca [1 x [5 x [2 x i32]]], align 4
%0 = bitcast [1 x [5 x [2 x i32]]]* %c to i8*
@@ -116,19 +204,34 @@ entry:
}
define arm_aapcs_vfpcc <8 x i16> @loadstore_8xi16_stack_off16() {
-; CHECK-LABEL: loadstore_8xi16_stack_off16:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: vmov.i16 q0, #0x1
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vstrh.16 q0, [r0]
-; CHECK-NEXT: movs r0, #3
-; CHECK-NEXT: vstrh.16 q0, [sp, #16]
-; CHECK-NEXT: strh.w r0, [sp, #16]
-; CHECK-NEXT: vldrh.u16 q0, [sp, #16]
-; CHECK-NEXT: add sp, #40
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: loadstore_8xi16_stack_off16:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #40
+; CHECK-LE-NEXT: sub sp, #40
+; CHECK-LE-NEXT: vmov.i16 q0, #0x1
+; CHECK-LE-NEXT: mov r0, sp
+; CHECK-LE-NEXT: vstrh.16 q0, [r0]
+; CHECK-LE-NEXT: movs r0, #3
+; CHECK-LE-NEXT: vstrh.16 q0, [sp, #16]
+; CHECK-LE-NEXT: strh.w r0, [sp, #16]
+; CHECK-LE-NEXT: vldrh.u16 q0, [sp, #16]
+; CHECK-LE-NEXT: add sp, #40
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: loadstore_8xi16_stack_off16:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #40
+; CHECK-BE-NEXT: sub sp, #40
+; CHECK-BE-NEXT: vmov.i16 q0, #0x1
+; CHECK-BE-NEXT: mov r0, sp
+; CHECK-BE-NEXT: vstrh.16 q0, [r0]
+; CHECK-BE-NEXT: movs r0, #3
+; CHECK-BE-NEXT: vstrh.16 q0, [sp, #16]
+; CHECK-BE-NEXT: strh.w r0, [sp, #16]
+; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
+; CHECK-BE-NEXT: vrev64.8 q0, q1
+; CHECK-BE-NEXT: add sp, #40
+; CHECK-BE-NEXT: bx lr
entry:
%c = alloca [1 x [10 x [2 x i16]]], align 2
%0 = bitcast [1 x [10 x [2 x i16]]]* %c to i8*
@@ -144,19 +247,34 @@ entry:
}
define arm_aapcs_vfpcc <16 x i8> @loadstore_16xi8_stack_off16() {
-; CHECK-LABEL: loadstore_16xi8_stack_off16:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: vmov.i8 q0, #0x1
-; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: vstrb.8 q0, [r0]
-; CHECK-NEXT: movs r0, #3
-; CHECK-NEXT: vstrb.8 q0, [sp, #16]
-; CHECK-NEXT: strb.w r0, [sp, #16]
-; CHECK-NEXT: vldrb.u8 q0, [sp, #16]
-; CHECK-NEXT: add sp, #40
-; CHECK-NEXT: bx lr
+; CHECK-LE-LABEL: loadstore_16xi8_stack_off16:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .pad #40
+; CHECK-LE-NEXT: sub sp, #40
+; CHECK-LE-NEXT: vmov.i8 q0, #0x1
+; CHECK-LE-NEXT: mov r0, sp
+; CHECK-LE-NEXT: vstrb.8 q0, [r0]
+; CHECK-LE-NEXT: movs r0, #3
+; CHECK-LE-NEXT: vstrb.8 q0, [sp, #16]
+; CHECK-LE-NEXT: strb.w r0, [sp, #16]
+; CHECK-LE-NEXT: vldrb.u8 q0, [sp, #16]
+; CHECK-LE-NEXT: add sp, #40
+; CHECK-LE-NEXT: bx lr
+;
+; CHECK-BE-LABEL: loadstore_16xi8_stack_off16:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .pad #40
+; CHECK-BE-NEXT: sub sp, #40
+; CHECK-BE-NEXT: vmov.i8 q0, #0x1
+; CHECK-BE-NEXT: mov r0, sp
+; CHECK-BE-NEXT: vstrb.8 q0, [r0]
+; CHECK-BE-NEXT: movs r0, #3
+; CHECK-BE-NEXT: vstrb.8 q0, [sp, #16]
+; CHECK-BE-NEXT: strb.w r0, [sp, #16]
+; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16]
+; CHECK-BE-NEXT: vrev64.8 q0, q1
+; CHECK-BE-NEXT: add sp, #40
+; CHECK-BE-NEXT: bx lr
entry:
%c = alloca [1 x [20 x [2 x i8]]], align 1
%0 = bitcast [1 x [20 x [2 x i8]]]* %c to i8*
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-pred-spill.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-pred-spill.ll?rev=368304&r1=368303&r2=368304&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-pred-spill.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-pred-spill.ll Thu Aug 8 08:15:19 2019
@@ -1,81 +1,165 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
-declare arm_aapcs_vfpcc <4 x i32> @ext_i32()
-declare arm_aapcs_vfpcc <8 x i16> @ext_i16()
-declare arm_aapcs_vfpcc <16 x i8> @ext_i8()
+declare arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %c)
+declare arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %c)
+declare arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %c)
define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) {
-; CHECK-LABEL: shuffle1_v4i32:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .vsave {d8, d9}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmov q4, q1
-; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: bl ext_i32
-; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: vpsel q0, q4, q0
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-LE-LABEL: shuffle1_v4i32:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .save {r7, lr}
+; CHECK-LE-NEXT: push {r7, lr}
+; CHECK-LE-NEXT: .vsave {d8, d9}
+; CHECK-LE-NEXT: vpush {d8, d9}
+; CHECK-LE-NEXT: .pad #8
+; CHECK-LE-NEXT: sub sp, #8
+; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
+; CHECK-LE-NEXT: vmov.i32 q0, #0x0
+; CHECK-LE-NEXT: vpsel q0, q1, q0
+; CHECK-LE-NEXT: vmov q4, q1
+; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
+; CHECK-LE-NEXT: bl ext_i32
+; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-LE-NEXT: vpsel q0, q4, q0
+; CHECK-LE-NEXT: add sp, #8
+; CHECK-LE-NEXT: vpop {d8, d9}
+; CHECK-LE-NEXT: pop {r7, pc}
+;
+; CHECK-BE-LABEL: shuffle1_v4i32:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .save {r7, lr}
+; CHECK-BE-NEXT: push {r7, lr}
+; CHECK-BE-NEXT: .vsave {d8, d9}
+; CHECK-BE-NEXT: vpush {d8, d9}
+; CHECK-BE-NEXT: .pad #8
+; CHECK-BE-NEXT: sub sp, #8
+; CHECK-BE-NEXT: vrev64.32 q4, q1
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
+; CHECK-BE-NEXT: vmov.i32 q0, #0x0
+; CHECK-BE-NEXT: vpsel q1, q4, q0
+; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
+; CHECK-BE-NEXT: vrev64.32 q0, q1
+; CHECK-BE-NEXT: bl ext_i32
+; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-BE-NEXT: vrev64.32 q1, q0
+; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vrev64.32 q0, q1
+; CHECK-BE-NEXT: add sp, #8
+; CHECK-BE-NEXT: vpop {d8, d9}
+; CHECK-BE-NEXT: pop {r7, pc}
entry:
%c = icmp eq <4 x i32> %src, zeroinitializer
- %ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32()
+ %s1 = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
+ %ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %s1)
%s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ext
ret <4 x i32> %s
}
define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) {
-; CHECK-LABEL: shuffle1_v8i16:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .vsave {d8, d9}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: vcmp.i16 eq, q0, zr
-; CHECK-NEXT: vmov q4, q1
-; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: bl ext_i16
-; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: vpsel q0, q4, q0
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-LE-LABEL: shuffle1_v8i16:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .save {r7, lr}
+; CHECK-LE-NEXT: push {r7, lr}
+; CHECK-LE-NEXT: .vsave {d8, d9}
+; CHECK-LE-NEXT: vpush {d8, d9}
+; CHECK-LE-NEXT: .pad #8
+; CHECK-LE-NEXT: sub sp, #8
+; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
+; CHECK-LE-NEXT: vmov.i32 q0, #0x0
+; CHECK-LE-NEXT: vpsel q0, q1, q0
+; CHECK-LE-NEXT: vmov q4, q1
+; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
+; CHECK-LE-NEXT: bl ext_i16
+; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-LE-NEXT: vpsel q0, q4, q0
+; CHECK-LE-NEXT: add sp, #8
+; CHECK-LE-NEXT: vpop {d8, d9}
+; CHECK-LE-NEXT: pop {r7, pc}
+;
+; CHECK-BE-LABEL: shuffle1_v8i16:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .save {r7, lr}
+; CHECK-BE-NEXT: push {r7, lr}
+; CHECK-BE-NEXT: .vsave {d8, d9}
+; CHECK-BE-NEXT: vpush {d8, d9}
+; CHECK-BE-NEXT: .pad #8
+; CHECK-BE-NEXT: sub sp, #8
+; CHECK-BE-NEXT: vrev64.16 q4, q1
+; CHECK-BE-NEXT: vmov.i32 q1, #0x0
+; CHECK-BE-NEXT: vrev64.16 q2, q0
+; CHECK-BE-NEXT: vrev32.16 q1, q1
+; CHECK-BE-NEXT: vcmp.i16 eq, q2, zr
+; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
+; CHECK-BE-NEXT: vrev64.16 q0, q1
+; CHECK-BE-NEXT: bl ext_i16
+; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-BE-NEXT: vrev64.16 q1, q0
+; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vrev64.16 q0, q1
+; CHECK-BE-NEXT: add sp, #8
+; CHECK-BE-NEXT: vpop {d8, d9}
+; CHECK-BE-NEXT: pop {r7, pc}
entry:
%c = icmp eq <8 x i16> %src, zeroinitializer
- %ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16()
+ %s1 = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
+ %ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %s1)
%s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ext
ret <8 x i16> %s
}
define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) {
-; CHECK-LABEL: shuffle1_v16i8:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .vsave {d8, d9}
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: vcmp.i8 eq, q0, zr
-; CHECK-NEXT: vmov q4, q1
-; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: bl ext_i8
-; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: vpsel q0, q4, q0
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: vpop {d8, d9}
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-LE-LABEL: shuffle1_v16i8:
+; CHECK-LE: @ %bb.0: @ %entry
+; CHECK-LE-NEXT: .save {r7, lr}
+; CHECK-LE-NEXT: push {r7, lr}
+; CHECK-LE-NEXT: .vsave {d8, d9}
+; CHECK-LE-NEXT: vpush {d8, d9}
+; CHECK-LE-NEXT: .pad #8
+; CHECK-LE-NEXT: sub sp, #8
+; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
+; CHECK-LE-NEXT: vmov.i32 q0, #0x0
+; CHECK-LE-NEXT: vpsel q0, q1, q0
+; CHECK-LE-NEXT: vmov q4, q1
+; CHECK-LE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
+; CHECK-LE-NEXT: bl ext_i8
+; CHECK-LE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-LE-NEXT: vpsel q0, q4, q0
+; CHECK-LE-NEXT: add sp, #8
+; CHECK-LE-NEXT: vpop {d8, d9}
+; CHECK-LE-NEXT: pop {r7, pc}
+;
+; CHECK-BE-LABEL: shuffle1_v16i8:
+; CHECK-BE: @ %bb.0: @ %entry
+; CHECK-BE-NEXT: .save {r7, lr}
+; CHECK-BE-NEXT: push {r7, lr}
+; CHECK-BE-NEXT: .vsave {d8, d9}
+; CHECK-BE-NEXT: vpush {d8, d9}
+; CHECK-BE-NEXT: .pad #8
+; CHECK-BE-NEXT: sub sp, #8
+; CHECK-BE-NEXT: vrev64.8 q4, q1
+; CHECK-BE-NEXT: vmov.i32 q1, #0x0
+; CHECK-BE-NEXT: vrev64.8 q2, q0
+; CHECK-BE-NEXT: vrev32.8 q1, q1
+; CHECK-BE-NEXT: vcmp.i8 eq, q2, zr
+; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
+; CHECK-BE-NEXT: vrev64.8 q0, q1
+; CHECK-BE-NEXT: bl ext_i8
+; CHECK-BE-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
+; CHECK-BE-NEXT: vrev64.8 q1, q0
+; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vrev64.8 q0, q1
+; CHECK-BE-NEXT: add sp, #8
+; CHECK-BE-NEXT: vpop {d8, d9}
+; CHECK-BE-NEXT: pop {r7, pc}
entry:
%c = icmp eq <16 x i8> %src, zeroinitializer
- %ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8()
+ %s1 = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
+ %ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %s1)
%s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ext
ret <16 x i8> %s
}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll?rev=368304&r1=368303&r2=368304&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-widen-narrow.ll Thu Aug 8 08:15:19 2019
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
define void @foo_int8_int32(<4 x i8>* %dest, <4 x i32>* readonly %src, i32 %n) {
; CHECK-LABEL: foo_int8_int32:
More information about the llvm-commits
mailing list