[llvm] r366106 - [ARM] MVE vector for 64bit types
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 11:42:55 PDT 2019
Author: dmgreen
Date: Mon Jul 15 11:42:54 2019
New Revision: 366106
URL: http://llvm.org/viewvc/llvm-project?rev=366106&view=rev
Log:
[ARM] MVE vector for 64bit types
We need to make sure that we are sensibly dealing with vectors of types v2i64
and v2f64, even if most of the time we cannot generate native operations for
them. This mostly adds a lot of testing, plus fixes up a couple of the issues
found. And, or and xor can be legal for v2i64, and shifts combining needs a
slight fixup.
Differential Revision: https://reviews.llvm.org/D64316
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
llvm/trunk/test/CodeGen/Thumb2/mve-abs.ll
llvm/trunk/test/CodeGen/Thumb2/mve-bitarith.ll
llvm/trunk/test/CodeGen/Thumb2/mve-div-expand.ll
llvm/trunk/test/CodeGen/Thumb2/mve-fmath.ll
llvm/trunk/test/CodeGen/Thumb2/mve-fp-negabs.ll
llvm/trunk/test/CodeGen/Thumb2/mve-frint.ll
llvm/trunk/test/CodeGen/Thumb2/mve-minmax.ll
llvm/trunk/test/CodeGen/Thumb2/mve-neg.ll
llvm/trunk/test/CodeGen/Thumb2/mve-sext.ll
llvm/trunk/test/CodeGen/Thumb2/mve-shifts.ll
llvm/trunk/test/CodeGen/Thumb2/mve-shuffle.ll
llvm/trunk/test/CodeGen/Thumb2/mve-simple-arith.ll
llvm/trunk/test/CodeGen/Thumb2/mve-soft-float-abi.ll
llvm/trunk/test/CodeGen/Thumb2/mve-vcvt.ll
llvm/trunk/test/CodeGen/Thumb2/mve-vdup.ll
llvm/trunk/test/CodeGen/Thumb2/mve-vmovimm.ll
llvm/trunk/test/CodeGen/Thumb2/mve-vmvnimm.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Jul 15 11:42:54 2019
@@ -320,6 +320,10 @@ void ARMTargetLowering::addMVEVectorType
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
+ // We can do bitwise operations on v2i64 vectors
+ setOperationAction(ISD::AND, MVT::v2i64, Legal);
+ setOperationAction(ISD::OR, MVT::v2i64, Legal);
+ setOperationAction(ISD::XOR, MVT::v2i64, Legal);
// It is legal to extload from v4i8 to v4i16 or v4i32.
addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
@@ -12855,6 +12859,8 @@ static SDValue PerformShiftCombine(SDNod
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
+ if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
+ return SDValue();
int64_t Cnt;
Modified: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrMVE.td?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td Mon Jul 15 11:42:54 2019
@@ -1035,6 +1035,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
(v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
+ def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
+ (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
}
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
@@ -1081,6 +1083,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1088,6 +1092,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1095,6 +1101,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
(v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1102,13 +1110,17 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
(v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
- def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))),
+ def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
(v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
(v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
(v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
}
class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-abs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-abs.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-abs.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-abs.ll Mon Jul 15 11:42:54 2019
@@ -36,3 +36,50 @@ entry:
%2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1
ret <4 x i32> %2
}
+
+define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
+; CHECK-LABEL: abs_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: vmov r12, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s3
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: rsbs.w lr, r12, #0
+; CHECK-NEXT: sbc.w r5, r0, r3
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r2, lr
+; CHECK-NEXT: lsrl r2, r5, #32
+; CHECK-NEXT: mov.w r5, #0
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq r2, r3
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: rsbs r4, r1, #0
+; CHECK-NEXT: mov r6, r4
+; CHECK-NEXT: sbc.w r7, r0, r3
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: lsrl r6, r7, #32
+; CHECK-NEXT: it mi
+; CHECK-NEXT: movmi r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ite eq
+; CHECK-NEXT: moveq r6, r3
+; CHECK-NEXT: movne r1, r4
+; CHECK-NEXT: vmov.32 q0[0], r1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 q0[1], r6
+; CHECK-NEXT: it eq
+; CHECK-NEXT: moveq lr, r12
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r2
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+entry:
+ %0 = icmp slt <2 x i64> %s1, zeroinitializer
+ %1 = sub nsw <2 x i64> zeroinitializer, %s1
+ %2 = select <2 x i1> %0, <2 x i64> %1, <2 x i64> %s1
+ ret <2 x i64> %2
+}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-bitarith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-bitarith.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-bitarith.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-bitarith.ll Mon Jul 15 11:42:54 2019
@@ -31,6 +31,16 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @and_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: and_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = and <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: or_int8_t:
@@ -62,6 +72,16 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @or_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: or_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = or <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: xor_int8_t:
@@ -93,6 +113,16 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @xor_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: xor_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: veor q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @v_mvn_i8(<16 x i8> %src) {
; CHECK-LABEL: v_mvn_i8:
; CHECK: @ %bb.0: @ %entry
@@ -123,6 +153,17 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @v_mvn_i64(<2 x i64> %src) {
+; CHECK-LABEL: v_mvn_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmvn q0, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src, <i64 -1, i64 -1>
+ ret <2 x i64> %0
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @v_bic_i8(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: v_bic_i8:
; CHECK: @ %bb.0: @ %entry
@@ -156,6 +197,18 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @v_bic_i64(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: v_bic_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vbic q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
+ %1 = and <2 x i64> %src2, %0
+ ret <2 x i64> %1
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @v_or_i8(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: v_or_i8:
; CHECK: @ %bb.0: @ %entry
@@ -188,3 +241,15 @@ entry:
%1 = or <4 x i32> %src2, %0
ret <4 x i32> %1
}
+
+define arm_aapcs_vfpcc <2 x i64> @v_or_i64(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: v_or_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vorn q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
+ %1 = or <2 x i64> %src2, %0
+ ret <2 x i64> %1
+}
+
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-div-expand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-div-expand.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-div-expand.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-div-expand.ll Mon Jul 15 11:42:54 2019
@@ -736,6 +736,144 @@ entry:
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: udiv_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov r12, s22
+; CHECK-NEXT: vmov lr, s23
+; CHECK-NEXT: vmov r2, s18
+; CHECK-NEXT: vmov r3, s19
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = udiv <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: sdiv_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov r12, s22
+; CHECK-NEXT: vmov lr, s23
+; CHECK-NEXT: vmov r2, s18
+; CHECK-NEXT: vmov r3, s19
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = sdiv <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: urem_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov r12, s18
+; CHECK-NEXT: vmov lr, s19
+; CHECK-NEXT: vmov.32 q4[0], r2
+; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: vmov.32 q4[1], r3
+; CHECK-NEXT: vmov r1, s23
+; CHECK-NEXT: mov r2, r12
+; CHECK-NEXT: mov r3, lr
+; CHECK-NEXT: bl __aeabi_uldivmod
+; CHECK-NEXT: vmov.32 q4[2], r2
+; CHECK-NEXT: vmov.32 q4[3], r3
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = urem <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: srem_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, s20
+; CHECK-NEXT: vmov r1, s21
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov r12, s18
+; CHECK-NEXT: vmov lr, s19
+; CHECK-NEXT: vmov.32 q4[0], r2
+; CHECK-NEXT: vmov r0, s22
+; CHECK-NEXT: vmov.32 q4[1], r3
+; CHECK-NEXT: vmov r1, s23
+; CHECK-NEXT: mov r2, r12
+; CHECK-NEXT: mov r3, lr
+; CHECK-NEXT: bl __aeabi_ldivmod
+; CHECK-NEXT: vmov.32 q4[2], r2
+; CHECK-NEXT: vmov.32 q4[3], r3
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = srem <2 x i64> %in1, %in2
+ ret <2 x i64> %out
+}
+
+
+
define arm_aapcs_vfpcc <4 x float> @fdiv_f32(<4 x float> %in1, <4 x float> %in2) {
; CHECK-LABEL: fdiv_f32:
@@ -992,3 +1130,59 @@ entry:
%out = frem <8 x half> %in1, %in2
ret <8 x half> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @fdiv_f64(<2 x double> %in1, <2 x double> %in2) {
+; CHECK-LABEL: fdiv_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl __aeabi_ddiv
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_ddiv
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fdiv <2 x double> %in1, %in2
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @frem_f64(<2 x double> %in1, <2 x double> %in2) {
+; CHECK-LABEL: frem_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl fmod
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl fmod
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = frem <2 x double> %in1, %in2
+ ret <2 x double> %out
+}
+
+
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-fmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-fmath.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-fmath.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-fmath.ll Mon Jul 15 11:42:54 2019
@@ -66,6 +66,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) {
+; CHECK-LABEL: sqrt_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl sqrt
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl sqrt
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {
; CHECK-LABEL: cos_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -198,6 +222,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) {
+; CHECK-LABEL: cos_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl cos
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl cos
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {
; CHECK-LABEL: sin_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -330,6 +378,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) {
+; CHECK-LABEL: sin_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl sin
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl sin
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -462,6 +534,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) {
+; CHECK-LABEL: exp_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl exp
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl exp
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {
; CHECK-LABEL: exp2_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -594,6 +690,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) {
+; CHECK-LABEL: exp2_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl exp2
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl exp2
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {
; CHECK-LABEL: log_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -726,6 +846,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {
; CHECK-LABEL: log2_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -858,6 +1002,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log2_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log2
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log2
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {
; CHECK-LABEL: log10_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -990,6 +1158,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log10_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl log10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl log10
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-LABEL: pow_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -1165,6 +1357,33 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: pow_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d11
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: bl pow
+; CHECK-NEXT: vmov lr, r12, d10
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl pow
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-LABEL: copysign_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -1340,6 +1559,27 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: copysign_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r0, r1, d3
+; CHECK-NEXT: vmov r0, lr, d2
+; CHECK-NEXT: vmov r0, r3, d1
+; CHECK-NEXT: vmov r12, r2, d0
+; CHECK-NEXT: lsrs r1, r1, #31
+; CHECK-NEXT: bfi r3, r1, #31, #1
+; CHECK-NEXT: lsr.w r1, lr, #31
+; CHECK-NEXT: bfi r2, r1, #31, #1
+; CHECK-NEXT: vmov d1, r0, r3
+; CHECK-NEXT: vmov d0, r12, r2
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
declare <4 x float> @llvm.cos.v4f32(<4 x float>)
declare <4 x float> @llvm.sin.v4f32(<4 x float>)
@@ -1360,4 +1600,14 @@ declare <8 x half> @llvm.log2.v8f16(<8 x
declare <8 x half> @llvm.log10.v8f16(<8 x half>)
declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <2 x double> @llvm.cos.v2f64(<2 x double>)
+declare <2 x double> @llvm.sin.v2f64(<2 x double>)
+declare <2 x double> @llvm.exp.v2f64(<2 x double>)
+declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
+declare <2 x double> @llvm.log.v2f64(<2 x double>)
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+declare <2 x double> @llvm.log10.v2f64(<2 x double>)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-fp-negabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-fp-negabs.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-fp-negabs.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-fp-negabs.ll Mon Jul 15 11:42:54 2019
@@ -76,6 +76,39 @@ entry:
ret <4 x float> %0
}
+define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fneg_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vldr d0, .LCPI2_0
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov r4, r5, d0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI2_0:
+; CHECK-NEXT: .long 0 @ double -0
+; CHECK-NEXT: .long 2147483648
+entry:
+ %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <8 x half> @fabs_float16_t(<8 x half> %src) {
; CHECK-MVE-LABEL: fabs_float16_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -150,6 +183,30 @@ entry:
ret <4 x float> %0
}
+define arm_aapcs_vfpcc <2 x double> @fabs_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fabs_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldr d2, .LCPI5_0
+; CHECK-NEXT: vmov r12, r3, d0
+; CHECK-NEXT: vmov r0, r1, d2
+; CHECK-NEXT: vmov r0, r2, d1
+; CHECK-NEXT: lsrs r1, r1, #31
+; CHECK-NEXT: bfi r2, r1, #31, #1
+; CHECK-NEXT: bfi r3, r1, #31, #1
+; CHECK-NEXT: vmov d1, r0, r2
+; CHECK-NEXT: vmov d0, r12, r3
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI5_0:
+; CHECK-NEXT: .long 0 @ double 0
+; CHECK-NEXT: .long 0
+entry:
+ %0 = call nnan ninf nsz <2 x double> @llvm.fabs.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-frint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-frint.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-frint.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-frint.ll Mon Jul 15 11:42:54 2019
@@ -76,6 +76,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fceil_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl ceil
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: ftrunc_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -150,6 +174,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) {
+; CHECK-LABEL: ftrunc_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl trunc
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: frint_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -224,6 +272,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) {
+; CHECK-LABEL: frint_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl rint
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {
; CHECK-LABEL: fnearbyint_float32_t:
; CHECK: @ %bb.0: @ %entry
@@ -288,6 +360,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fnearbyint_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl nearbyint
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: ffloor_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -362,6 +458,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) {
+; CHECK-LABEL: ffloor_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl floor
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {
; CHECK-MVE-LABEL: fround_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -436,6 +556,30 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fround_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl round
+; CHECK-NEXT: vmov r2, r3, d8
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl round
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src)
+ ret <2 x double> %0
+}
+
declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
declare <4 x float> @llvm.rint.v4f32(<4 x float>)
@@ -448,3 +592,9 @@ declare <8 x half> @llvm.rint.v8f16(<8 x
declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
declare <8 x half> @llvm.floor.v8f16(<8 x half>)
declare <8 x half> @llvm.round.v8f16(<8 x half>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-minmax.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-minmax.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-minmax.ll Mon Jul 15 11:42:54 2019
@@ -35,6 +35,49 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: smin_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r12, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: vmov lr, s1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s5
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp slt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: umin_v16i8:
; CHECK: @ %bb.0: @ %entry
@@ -68,6 +111,49 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: umin_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r12, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: vmov lr, s1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s0
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s5
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp ult <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: smax_v16i8:
@@ -102,6 +188,49 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: smax_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r12, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: vmov lr, s5
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s4
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s1
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp sgt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
; CHECK-LABEL: umax_v16i8:
; CHECK: @ %bb.0: @ %entry
@@ -135,6 +264,49 @@ entry:
ret <4 x i32> %1
}
+define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: umax_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r12, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: vmov lr, s5
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: vmov r3, s4
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: sbcs.w r1, r1, r12
+; CHECK-NEXT: vmov r12, s1
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: subs r2, r3, r2
+; CHECK-NEXT: sbcs.w r2, lr, r12
+; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov.32 q2[1], r0
+; CHECK-NEXT: vmov.32 q2[2], r1
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = icmp ugt <2 x i64> %s1, %s2
+ %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+ ret <2 x i64> %1
+}
+
define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: maxnm_float32_t:
@@ -227,3 +399,46 @@ entry:
%0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
ret <8 x half> %0
}
+
+define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: maxnm_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r0
+; CHECK-NEXT: vmov.32 q0[2], r4
+; CHECK-NEXT: vmov.32 q0[3], r4
+; CHECK-NEXT: vbic q1, q5, q0
+; CHECK-NEXT: vand q0, q4, q0
+; CHECK-NEXT: vorr q0, q0, q1
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, pc}
+entry:
+ %cmp = fcmp fast ogt <2 x double> %src2, %src1
+ %0 = select <2 x i1> %cmp, <2 x double> %src2, <2 x double> %src1
+ ret <2 x double> %0
+}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-neg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-neg.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-neg.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-neg.ll Mon Jul 15 11:42:54 2019
@@ -30,3 +30,26 @@ entry:
%0 = sub nsw <4 x i32> zeroinitializer, %s1
ret <4 x i32> %0
}
+
+define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) {
+; CHECK-LABEL: neg_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: sbc.w r0, r12, r0
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: sbc.w r3, r12, r3
+; CHECK-NEXT: vmov.32 q0[0], r2
+; CHECK-NEXT: vmov.32 q0[1], r3
+; CHECK-NEXT: vmov.32 q0[2], r1
+; CHECK-NEXT: vmov.32 q0[3], r0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = sub nsw <2 x i64> zeroinitializer, %s1
+ ret <2 x i64> %0
+}
+
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-sext.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-sext.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-sext.ll Mon Jul 15 11:42:54 2019
@@ -32,6 +32,24 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) {
+; CHECK-LABEL: sext_v2i32_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: asrs r0, r0, #31
+; CHECK-NEXT: vmov.32 q1[1], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: asrs r0, r0, #31
+; CHECK-NEXT: vmov.32 q1[3], r0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = sext <2 x i32> %src to <2 x i64>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {
; CHECK-LABEL: zext_v8i8_v8i16:
@@ -64,6 +82,25 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
+; CHECK-LABEL: zext_v2i32_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI7_0
+; CHECK-NEXT: vldrw.u32 q1, [r0]
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI7_0:
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 0 @ 0x0
+entry:
+ %0 = zext <2 x i32> %src to <2 x i64>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) {
; CHECK-LABEL: trunc_v8i16_v8i8:
@@ -91,3 +128,13 @@ entry:
%0 = trunc <4 x i32> %src to <4 x i8>
ret <4 x i8> %0
}
+
+define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) {
+; CHECK-LABEL: trunc_v2i64_v2i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %0 = trunc <2 x i64> %src to <2 x i32>
+ ret <2 x i32> %0
+}
+
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-shifts.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-shifts.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-shifts.ll Mon Jul 15 11:42:54 2019
@@ -31,6 +31,28 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shl_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 q2[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q2[2], r2
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: shru_qq_int8_t:
@@ -65,6 +87,30 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shru_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: lsll r0, r1, r2
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: vmov.32 q2[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: lsll r0, r1, r2
+; CHECK-NEXT: vmov.32 q2[2], r0
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: shrs_qq_int8_t:
@@ -99,6 +145,28 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shrs_qq_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 q2[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q2[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q2[2], r2
+; CHECK-NEXT: vmov.32 q2[3], r1
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shl_qi_int8_t:
@@ -130,6 +198,26 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shl_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: lsll r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shru_qi_int8_t:
@@ -161,6 +249,26 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shru_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: lsrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = lshr <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shrs_qi_int8_t:
@@ -192,6 +300,25 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shrs_qi_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: asrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[0], r0
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r0, r1, #4
+; CHECK-NEXT: vmov.32 q1[2], r0
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = ashr <2 x i64> %src1, <i64 4, i64 4>
+ ret <2 x i64> %0
+}
define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) {
@@ -230,6 +357,28 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shl_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = shl <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: shru_qr_int8_t:
@@ -273,6 +422,29 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shru_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: lsll r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = lshr <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: shrs_qr_int8_t:
@@ -316,17 +488,38 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shrs_qr_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov.32 q1[1], r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: asrl r2, r1, r0
+; CHECK-NEXT: vmov.32 q1[2], r2
+; CHECK-NEXT: vmov.32 q1[3], r1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+ %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %0 = ashr <2 x i64> %src1, %s
+ ret <2 x i64> %0
+}
define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {
; CHECK-LABEL: shl_qiv_int8_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI27_0
+; CHECK-NEXT: adr r0, .LCPI36_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u8 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI27_0:
+; CHECK-NEXT: .LCPI36_0:
; CHECK-NEXT: .byte 1 @ 0x1
; CHECK-NEXT: .byte 2 @ 0x2
; CHECK-NEXT: .byte 3 @ 0x3
@@ -351,13 +544,13 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) {
; CHECK-LABEL: shl_qiv_int16_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI28_0
+; CHECK-NEXT: adr r0, .LCPI37_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u16 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI28_0:
+; CHECK-NEXT: .LCPI37_0:
; CHECK-NEXT: .short 1 @ 0x1
; CHECK-NEXT: .short 2 @ 0x2
; CHECK-NEXT: .short 3 @ 0x3
@@ -374,13 +567,13 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) {
; CHECK-LABEL: shl_qiv_int32_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI29_0
+; CHECK-NEXT: adr r0, .LCPI38_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vshl.u32 q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI29_0:
+; CHECK-NEXT: .LCPI38_0:
; CHECK-NEXT: .long 1 @ 0x1
; CHECK-NEXT: .long 2 @ 0x2
; CHECK-NEXT: .long 3 @ 0x3
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-shuffle.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-shuffle.ll Mon Jul 15 11:42:54 2019
@@ -262,6 +262,38 @@ entry:
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle1_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle2_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s4, s2
+; CHECK-NEXT: vmov.f32 s5, s3
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle3_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1>
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) {
; CHECK-LABEL: shuffle1_f32:
; CHECK: @ %bb.0: @ %entry
@@ -390,6 +422,38 @@ entry:
ret <8 x half> %out
}
+define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle1_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle2_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s4, s2
+; CHECK-NEXT: vmov.f32 s5, s3
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vmov.f32 s7, s1
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle3_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1>
+ ret <2 x double> %out
+}
+
define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) {
; CHECK-LABEL: insert_i32:
@@ -421,6 +485,17 @@ entry:
ret <16 x i8> %res
}
+define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) {
+; CHECK-LABEL: insert_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: bx lr
+entry:
+ %res = insertelement <2 x i64> undef, i64 %a, i32 0
+ ret <2 x i64> %res
+}
+
define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) {
; CHECK-LABEL: insert_f32:
; CHECK: @ %bb.0: @ %entry
@@ -443,12 +518,35 @@ entry:
ret <8 x half> %res
}
+define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) {
+; CHECK-LABEL: insert_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r6, r7, lr}
+; CHECK-NEXT: push {r4, r6, r7, lr}
+; CHECK-NEXT: .setfp r7, sp, #8
+; CHECK-NEXT: add r7, sp, #8
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: mov r4, sp
+; CHECK-NEXT: bfc r4, #0, #4
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: sub.w r4, r7, #8
+; CHECK-NEXT: vstr d0, [sp]
+; CHECK-NEXT: mov r0, sp
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: pop {r4, r6, r7, pc}
+entry:
+ %res = insertelement <2 x double> undef, double %a, i32 0
+ ret <2 x double> %res
+}
+
define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
; CHECK-LABEL: scalar_to_vector_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: adr r1, .LCPI30_0
+; CHECK-NEXT: adr r1, .LCPI38_0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: vmov.32 q0[0], r0
@@ -461,7 +559,7 @@ define arm_aapcs_vfpcc i64 @scalar_to_ve
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI30_0:
+; CHECK-NEXT: .LCPI38_0:
; CHECK-NEXT: .zero 4
; CHECK-NEXT: .long 7 @ 0x7
; CHECK-NEXT: .long 1 @ 0x1
@@ -533,6 +631,28 @@ entry:
ret i8 %res
}
+define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
+; CHECK-LABEL: extract_i64_0:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x i64> %a, i32 0
+ ret i64 %res
+}
+
+define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
+; CHECK-LABEL: extract_i64_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x i64> %a, i32 1
+ ret i64 %res
+}
+
define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) {
; CHECK-LABEL: extract_f32_0:
; CHECK: @ %bb.0: @ %entry
@@ -576,3 +696,25 @@ entry:
%res = extractelement <8 x half> %a, i32 3
ret half %res
}
+
+define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) {
+; CHECK-LABEL: extract_f64_0:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x double> %a, i32 0
+ ret double %res
+}
+
+define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) {
+; CHECK-LABEL: extract_f64_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %res = extractelement <2 x double> %a, i32 1
+ ret double %res
+}
+
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-simple-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-simple-arith.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-simple-arith.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-simple-arith.ll Mon Jul 15 11:42:54 2019
@@ -32,6 +32,33 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: add_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmov r0, s7
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: adds.w lr, r3, r2
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r3, s1
+; CHECK-NEXT: adc.w r12, r1, r0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s5
+; CHECK-NEXT: adds r0, r0, r2
+; CHECK-NEXT: adcs r1, r3
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r12
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = add nsw <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: add_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -122,6 +149,33 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: add_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dadd
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dadd
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fadd nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: sub_int8_t:
@@ -153,6 +207,33 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: sub_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: vmov r1, s7
+; CHECK-NEXT: subs.w lr, r3, r2
+; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: vmov r3, s5
+; CHECK-NEXT: sbc.w r12, r1, r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: subs r0, r2, r0
+; CHECK-NEXT: sbc.w r1, r3, r1
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], lr
+; CHECK-NEXT: vmov.32 q0[3], r12
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = sub nsw <2 x i64> %src2, %src1
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) {
; CHECK-MVE-LABEL: sub_float32_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -243,6 +324,34 @@ entry:
ret <8 x half> %0
}
+define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: sub_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dsub
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fsub nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
+
define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: mul_int8_t:
; CHECK: @ %bb.0: @ %entry
@@ -273,6 +382,35 @@ entry:
ret <4 x i32> %0
}
+define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: mul_int64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: vmov r2, s5
+; CHECK-NEXT: umull r12, r3, r1, r0
+; CHECK-NEXT: mla lr, r1, r2, r3
+; CHECK-NEXT: vmov r3, s6
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: vmov r2, s7
+; CHECK-NEXT: umull r4, r5, r1, r3
+; CHECK-NEXT: mla r1, r1, r2, r5
+; CHECK-NEXT: vmov r2, s1
+; CHECK-NEXT: mla r0, r2, r0, lr
+; CHECK-NEXT: vmov r2, s3
+; CHECK-NEXT: vmov.32 q0[0], r12
+; CHECK-NEXT: vmov.32 q0[1], r0
+; CHECK-NEXT: vmov.32 q0[2], r4
+; CHECK-NEXT: mla r1, r2, r3, r1
+; CHECK-NEXT: vmov.32 q0[3], r1
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %0 = mul nsw <2 x i64> %src1, %src2
+ ret <2 x i64> %0
+}
+
define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) {
; CHECK-MVE-LABEL: mul_float16_t:
; CHECK-MVE: @ %bb.0: @ %entry
@@ -362,3 +500,31 @@ entry:
%0 = fmul nnan ninf nsz <4 x float> %src2, %src1
ret <4 x float> %0
}
+
+define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: mul_float64_t:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmov q5, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov r2, r3, d11
+; CHECK-NEXT: bl __aeabi_dmul
+; CHECK-NEXT: vmov lr, r12, d8
+; CHECK-NEXT: vmov r2, r3, d10
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r1, r12
+; CHECK-NEXT: bl __aeabi_dmul
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = fmul nnan ninf nsz <2 x double> %src2, %src1
+ ret <2 x double> %0
+}
+
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-soft-float-abi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-soft-float-abi.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-soft-float-abi.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-soft-float-abi.ll Mon Jul 15 11:42:54 2019
@@ -50,6 +50,39 @@ entry:
ret <4 x i32> %sum
}
+define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK-FP-LABEL: vector_add_i64:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov d1, r2, r3
+; CHECK-FP-NEXT: vmov d0, r0, r1
+; CHECK-FP-NEXT: add r0, sp, #8
+; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
+; CHECK-FP-NEXT: vmov r1, s2
+; CHECK-FP-NEXT: vmov r0, s3
+; CHECK-FP-NEXT: vmov r3, s6
+; CHECK-FP-NEXT: vmov r2, s7
+; CHECK-FP-NEXT: adds.w lr, r1, r3
+; CHECK-FP-NEXT: vmov r3, s0
+; CHECK-FP-NEXT: vmov r1, s4
+; CHECK-FP-NEXT: adc.w r12, r0, r2
+; CHECK-FP-NEXT: vmov r2, s1
+; CHECK-FP-NEXT: vmov r0, s5
+; CHECK-FP-NEXT: adds r1, r1, r3
+; CHECK-FP-NEXT: vmov.32 q0[0], r1
+; CHECK-FP-NEXT: adcs r0, r2
+; CHECK-FP-NEXT: vmov.32 q0[1], r0
+; CHECK-FP-NEXT: vmov.32 q0[2], lr
+; CHECK-FP-NEXT: vmov.32 q0[3], r12
+; CHECK-FP-NEXT: vmov r0, r1, d0
+; CHECK-FP-NEXT: vmov r2, r3, d1
+; CHECK-FP-NEXT: pop {r7, pc}
+entry:
+ %sum = add <2 x i64> %lhs, %rhs
+ ret <2 x i64> %sum
+}
+
define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
; CHECK-FP-LABEL: vector_add_f16:
; CHECK-FP: @ %bb.0: @ %entry
@@ -81,3 +114,38 @@ entry:
%sum = fadd <4 x float> %lhs, %rhs
ret <4 x float> %sum
}
+
+define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK-FP-LABEL: vector_add_f64:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-FP-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-FP-NEXT: .pad #4
+; CHECK-FP-NEXT: sub sp, #4
+; CHECK-FP-NEXT: .vsave {d8, d9}
+; CHECK-FP-NEXT: vpush {d8, d9}
+; CHECK-FP-NEXT: mov r5, r0
+; CHECK-FP-NEXT: add r0, sp, #40
+; CHECK-FP-NEXT: vldrw.u32 q4, [r0]
+; CHECK-FP-NEXT: mov r4, r2
+; CHECK-FP-NEXT: mov r6, r3
+; CHECK-FP-NEXT: mov r7, r1
+; CHECK-FP-NEXT: vmov r2, r3, d9
+; CHECK-FP-NEXT: mov r0, r4
+; CHECK-FP-NEXT: mov r1, r6
+; CHECK-FP-NEXT: bl __aeabi_dadd
+; CHECK-FP-NEXT: vmov r2, r3, d8
+; CHECK-FP-NEXT: vmov d9, r0, r1
+; CHECK-FP-NEXT: mov r0, r5
+; CHECK-FP-NEXT: mov r1, r7
+; CHECK-FP-NEXT: bl __aeabi_dadd
+; CHECK-FP-NEXT: vmov d8, r0, r1
+; CHECK-FP-NEXT: vmov r2, r3, d9
+; CHECK-FP-NEXT: vmov r0, r1, d8
+; CHECK-FP-NEXT: vpop {d8, d9}
+; CHECK-FP-NEXT: add sp, #4
+; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc}
+entry:
+ %sum = fadd <2 x double> %lhs, %rhs
+ ret <2 x double> %sum
+}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-vcvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-vcvt.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-vcvt.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-vcvt.ll Mon Jul 15 11:42:54 2019
@@ -317,3 +317,111 @@ entry:
%out = fptoui <8 x half> %src to <8 x i16>
ret <8 x i16> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) {
+; CHECK-LABEL: foo_float_int64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_l2d
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = sitofp <2 x i64> %src to <2 x double>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) {
+; CHECK-LABEL: foo_float_uint64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r1, s19
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: vmov r3, s17
+; CHECK-NEXT: vmov d9, r0, r1
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_ul2d
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = uitofp <2 x i64> %src to <2 x double>
+ ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
+; CHECK-LABEL: foo_int64_float:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fptosi <2 x double> %src to <2 x i64>
+ ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
+; CHECK-LABEL: foo_uint64_float:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov q4, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov r2, r3, d9
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: lsrl r0, r1, #32
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vmov q0, q4
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %out = fptoui <2 x double> %src to <2 x i64>
+ ret <2 x i64> %out
+}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-vdup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-vdup.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-vdup.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-vdup.ll Mon Jul 15 11:42:54 2019
@@ -35,6 +35,20 @@ entry:
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
+; CHECK-LABEL: vdup_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.32 q0[0], r0
+; CHECK-NEXT: vmov.32 q0[1], r1
+; CHECK-NEXT: vmov.32 q0[2], r0
+; CHECK-NEXT: vmov.32 q0[3], r1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = insertelement <2 x i64> undef, i64 %src, i32 0
+ %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
; CHECK-LABEL: vdup_f32_1:
; CHECK: @ %bb.0: @ %entry
@@ -80,6 +94,19 @@ entry:
ret <8 x half> %out
}
+define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
+; CHECK-LABEL: vdup_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: vmov.f32 s2, s0
+; CHECK-NEXT: vmov.f32 s3, s1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = insertelement <2 x double> undef, double %src, i32 0
+ %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
+ ret <2 x double> %out
+}
+
define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
@@ -115,6 +142,17 @@ entry:
ret <16 x i8> %out
}
+define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
+; CHECK-LABEL: vduplane_i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %out
+}
+
define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
; CHECK-LABEL: vduplane_f32:
; CHECK: @ %bb.0: @ %entry
@@ -136,3 +174,14 @@ entry:
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
ret <8 x half> %out
}
+
+define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
+; CHECK-LABEL: vduplane_f64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s1, s3
+; CHECK-NEXT: bx lr
+entry:
+ %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %out
+}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-vmovimm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-vmovimm.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-vmovimm.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-vmovimm.ll Mon Jul 15 11:42:54 2019
@@ -11,7 +11,7 @@ entry:
ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
}
-define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1(i8 *%dest) {
+define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1() {
; CHECK-LABEL: mov_int8_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
@@ -20,7 +20,7 @@ entry:
ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_1(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_1() {
; CHECK-LABEL: mov_int16_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x1
@@ -29,7 +29,7 @@ entry:
ret <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1() {
; CHECK-LABEL: mov_int16_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
@@ -38,7 +38,7 @@ entry:
ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_256(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_256() {
; CHECK-LABEL: mov_int16_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x100
@@ -56,7 +56,7 @@ entry:
ret <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_258(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_258() {
; CHECK-LABEL: mov_int16_258:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI6_0
@@ -73,7 +73,7 @@ entry:
ret <8 x i16> <i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_1(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_1() {
; CHECK-LABEL: mov_int32_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x1
@@ -82,7 +82,7 @@ entry:
ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_256(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_256() {
; CHECK-LABEL: mov_int32_256:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x100
@@ -91,7 +91,7 @@ entry:
ret <4 x i32> <i32 256, i32 256, i32 256, i32 256>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536() {
; CHECK-LABEL: mov_int32_65536:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x10000
@@ -100,7 +100,7 @@ entry:
ret <4 x i32> <i32 65536, i32 65536, i32 65536, i32 65536>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216() {
; CHECK-LABEL: mov_int32_16777216:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x1000000
@@ -109,7 +109,7 @@ entry:
ret <4 x i32> <i32 16777216, i32 16777216, i32 16777216, i32 16777216>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217() {
; CHECK-LABEL: mov_int32_16777217:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI11_0
@@ -126,7 +126,7 @@ entry:
ret <4 x i32> <i32 16777217, i32 16777217, i32 16777217, i32 16777217>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919() {
; CHECK-LABEL: mov_int32_17919:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x45ff
@@ -135,7 +135,7 @@ entry:
ret <4 x i32> <i32 17919, i32 17919, i32 17919, i32 17919>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519() {
; CHECK-LABEL: mov_int32_4587519:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q0, #0x45ffff
@@ -144,7 +144,7 @@ entry:
ret <4 x i32> <i32 4587519, i32 4587519, i32 4587519, i32 4587519>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1() {
; CHECK-LABEL: mov_int32_m1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i8 q0, #0xff
@@ -153,7 +153,7 @@ entry:
ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760() {
; CHECK-LABEL: mov_int32_4294901760:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xffff
@@ -162,7 +162,7 @@ entry:
ret <4 x i32> <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335() {
; CHECK-LABEL: mov_int32_4278190335:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI16_0
@@ -179,7 +179,7 @@ entry:
ret <4 x i32> <i32 4278190335, i32 4278190335, i32 4278190335, i32 4278190335>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615() {
; CHECK-LABEL: mov_int32_4278255615:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xff0000
@@ -188,8 +188,8 @@ entry:
ret <4 x i32> <i32 4278255615, i32 4278255615, i32 4278255615, i32 4278255615>
}
-define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) {
-; CHECK-LABEL: mov_float_1:
+define arm_aapcs_vfpcc <2 x i64> @mov_int64_1() {
+; CHECK-LABEL: mov_int64_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI18_0
; CHECK-NEXT: vldrw.u32 q0, [r0]
@@ -197,6 +197,32 @@ define arm_aapcs_vfpcc <4 x float> @mov_
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI18_0:
+; CHECK-NEXT: .long 1 @ double 4.9406564584124654E-324
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1 @ double 4.9406564584124654E-324
+; CHECK-NEXT: .long 0
+entry:
+ ret <2 x i64> <i64 1, i64 1>
+}
+
+define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() {
+; CHECK-LABEL: mov_int64_m1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i8 q0, #0xff
+; CHECK-NEXT: bx lr
+entry:
+ ret <2 x i64> <i64 -1, i64 -1>
+}
+
+define arm_aapcs_vfpcc <4 x float> @mov_float_1() {
+; CHECK-LABEL: mov_float_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI20_0
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI20_0:
; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234
; CHECK-NEXT: .long 1065353216
; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234
@@ -205,15 +231,15 @@ entry:
ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
}
-define arm_aapcs_vfpcc <4 x float> @mov_float_m3(float *%dest) {
+define arm_aapcs_vfpcc <4 x float> @mov_float_m3() {
; CHECK-LABEL: mov_float_m3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: adr r0, .LCPI19_0
+; CHECK-NEXT: adr r0, .LCPI21_0
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI19_0:
+; CHECK-NEXT: .LCPI21_0:
; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916
; CHECK-NEXT: .long 3225419776
; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916
@@ -222,7 +248,7 @@ entry:
ret <4 x float> <float -3.000000e+00, float -3.000000e+00, float -3.000000e+00, float -3.000000e+00>
}
-define arm_aapcs_vfpcc <8 x half> @mov_float16_1(half *%dest) {
+define arm_aapcs_vfpcc <8 x half> @mov_float16_1() {
; CHECK-LABEL: mov_float16_1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0x3c00
@@ -232,7 +258,7 @@ entry:
ret <8 x half> <half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00>
}
-define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) {
+define arm_aapcs_vfpcc <8 x half> @mov_float16_m3() {
; CHECK-LABEL: mov_float16_m3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i16 q0, #0xc200
@@ -241,3 +267,20 @@ define arm_aapcs_vfpcc <8 x half> @mov_f
entry:
ret <8 x half> <half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00>
}
+
+define arm_aapcs_vfpcc <2 x double> @mov_double_1() {
+; CHECK-LABEL: mov_double_1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: adr r0, .LCPI24_0
+; CHECK-NEXT: vldrw.u32 q0, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI24_0:
+; CHECK-NEXT: .long 0 @ double 1
+; CHECK-NEXT: .long 1072693248
+; CHECK-NEXT: .long 0 @ double 1
+; CHECK-NEXT: .long 1072693248
+entry:
+ ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+}
Modified: llvm/trunk/test/CodeGen/Thumb2/mve-vmvnimm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-vmvnimm.ll?rev=366106&r1=366105&r2=366106&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-vmvnimm.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-vmvnimm.ll Mon Jul 15 11:42:54 2019
@@ -2,7 +2,7 @@
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_511(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_511() {
; CHECK-LABEL: mov_int16_511:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i16 q0, #0xfe00
@@ -11,7 +11,7 @@ entry:
ret <8 x i16> <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
}
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281() {
; CHECK-LABEL: mov_int16_65281:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i16 q0, #0xfe
@@ -20,7 +20,7 @@ entry:
ret <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7() {
; CHECK-LABEL: mov_int32_m7:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x6
@@ -29,7 +29,7 @@ entry:
ret <4 x i32> <i32 -7, i32 -7, i32 -7, i32 -7>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769() {
; CHECK-LABEL: mov_int32_m769:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x300
@@ -38,7 +38,7 @@ entry:
ret <4 x i32> <i32 -769, i32 -769, i32 -769, i32 -769>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145() {
; CHECK-LABEL: mov_int32_m262145:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x40000
@@ -47,7 +47,7 @@ entry:
ret <4 x i32> <i32 -262145, i32 -262145, i32 -262145, i32 -262145>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729() {
; CHECK-LABEL: mov_int32_m134217729:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0x8000000
@@ -56,7 +56,7 @@ entry:
ret <4 x i32> <i32 -134217729, i32 -134217729, i32 -134217729, i32 -134217729>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528() {
; CHECK-LABEL: mov_int32_4294902528:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xfcff
@@ -65,7 +65,7 @@ entry:
ret <4 x i32> <i32 4294902528, i32 4294902528, i32 4294902528, i32 4294902528>
}
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688() {
; CHECK-LABEL: mov_int32_4278386688:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI7_0
More information about the llvm-commits
mailing list