[llvm] 541828e - [ARM] Single source VMOVNT
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 12 06:29:11 PST 2021
Author: David Green
Date: 2021-02-12T14:28:57Z
New Revision: 541828e35da28f8bae0fad58ba86ac0cc3a0f898
URL: https://github.com/llvm/llvm-project/commit/541828e35da28f8bae0fad58ba86ac0cc3a0f898
DIFF: https://github.com/llvm/llvm-project/commit/541828e35da28f8bae0fad58ba86ac0cc3a0f898.diff
LOG: [ARM] Single source VMOVNT
Our current lowering of VMOVNT goes via a shuffle vector of the form
<0, N, 2, N+2, 4, N+4, ..>. That can of course also be a single input
shuffle of the form <0, 0, 2, 2, 4, 4, ..>, where we use a VMOVNT to
insert a vector into the top lanes of itself. This adds lowering of that
case, re-using the existing isVMOVNMask.
Differential Revision: https://reviews.llvm.org/D96065
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/mve-vmovn.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bc4dbd9e1dd5..c7d6cf03e728 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7225,11 +7225,11 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) {
return true;
}
-static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top) {
+static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
unsigned NumElts = VT.getVectorNumElements();
// Make sure the mask has the right size.
if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
- return false;
+ return false;
// If Top
// Look for <0, N, 2, N+2, 4, N+4, ..>.
@@ -7238,10 +7238,11 @@ static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top) {
// Look for <0, N+1, 2, N+3, 4, N+5, ..>
// This inserts Input1 into Input2
unsigned Offset = Top ? 0 : 1;
- for (unsigned i = 0; i < NumElts; i+=2) {
+ unsigned N = SingleSource ? 0 : NumElts;
+ for (unsigned i = 0; i < NumElts; i += 2) {
if (M[i] >= 0 && M[i] != (int)i)
return false;
- if (M[i+1] >= 0 && M[i+1] != (int)(NumElts + i + Offset))
+ if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
return false;
}
@@ -7948,7 +7949,8 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
isReverseMask(M, VT))
return true;
else if (Subtarget->hasMVEIntegerOps() &&
- (isVMOVNMask(M, VT, 0) || isVMOVNMask(M, VT, 1)))
+ (isVMOVNMask(M, VT, true, false) ||
+ isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
return true;
else
return false;
@@ -8364,12 +8366,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
}
}
if (ST->hasMVEIntegerOps()) {
- if (isVMOVNMask(ShuffleMask, VT, 0))
+ if (isVMOVNMask(ShuffleMask, VT, false, false))
return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
DAG.getConstant(0, dl, MVT::i32));
- if (isVMOVNMask(ShuffleMask, VT, 1))
+ if (isVMOVNMask(ShuffleMask, VT, true, false))
return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
DAG.getConstant(1, dl, MVT::i32));
+ if (isVMOVNMask(ShuffleMask, VT, true, true))
+ return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
+ DAG.getConstant(1, dl, MVT::i32));
}
// Also check for these shuffles through CONCAT_VECTORS: we canonicalize
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovn.ll b/llvm/test/CodeGen/Thumb2/mve-vmovn.ll
index bb79451bec52..bfc9db2dc016 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovn.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovn.ll
@@ -44,36 +44,13 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc3(<4 x i32> %src1) {
; CHECK-LABEL: vmovn32_trunc3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov q1, q0
-; CHECK-NEXT: vmov r0, s4
-; CHECK-NEXT: vmov.16 q0[0], r0
-; CHECK-NEXT: vmov.16 q0[1], r0
-; CHECK-NEXT: vmov r0, s5
-; CHECK-NEXT: vmov.16 q0[2], r0
-; CHECK-NEXT: vmov.16 q0[3], r0
-; CHECK-NEXT: vmov r0, s6
-; CHECK-NEXT: vmov.16 q0[4], r0
-; CHECK-NEXT: vmov.16 q0[5], r0
-; CHECK-NEXT: vmov r0, s7
-; CHECK-NEXT: vmov.16 q0[6], r0
-; CHECK-NEXT: vmov.16 q0[7], r0
+; CHECK-NEXT: vmovnt.i32 q0, q0
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn32_trunc3:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vrev64.32 q2, q0
-; CHECKBE-NEXT: vmov r0, s8
-; CHECKBE-NEXT: vmov.16 q1[0], r0
-; CHECKBE-NEXT: vmov.16 q1[1], r0
-; CHECKBE-NEXT: vmov r0, s9
-; CHECKBE-NEXT: vmov.16 q1[2], r0
-; CHECKBE-NEXT: vmov.16 q1[3], r0
-; CHECKBE-NEXT: vmov r0, s10
-; CHECKBE-NEXT: vmov.16 q1[4], r0
-; CHECKBE-NEXT: vmov.16 q1[5], r0
-; CHECKBE-NEXT: vmov r0, s11
-; CHECKBE-NEXT: vmov.16 q1[6], r0
-; CHECKBE-NEXT: vmov.16 q1[7], r0
+; CHECKBE-NEXT: vrev64.32 q1, q0
+; CHECKBE-NEXT: vmovnt.i32 q1, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -125,60 +102,13 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc3(<8 x i16> %src1) {
; CHECK-LABEL: vmovn16_trunc3:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.u16 r0, q0[0]
-; CHECK-NEXT: vmov q1, q0
-; CHECK-NEXT: vmov.8 q0[0], r0
-; CHECK-NEXT: vmov.8 q0[1], r0
-; CHECK-NEXT: vmov.u16 r0, q1[1]
-; CHECK-NEXT: vmov.8 q0[2], r0
-; CHECK-NEXT: vmov.8 q0[3], r0
-; CHECK-NEXT: vmov.u16 r0, q1[2]
-; CHECK-NEXT: vmov.8 q0[4], r0
-; CHECK-NEXT: vmov.8 q0[5], r0
-; CHECK-NEXT: vmov.u16 r0, q1[3]
-; CHECK-NEXT: vmov.8 q0[6], r0
-; CHECK-NEXT: vmov.8 q0[7], r0
-; CHECK-NEXT: vmov.u16 r0, q1[4]
-; CHECK-NEXT: vmov.8 q0[8], r0
-; CHECK-NEXT: vmov.8 q0[9], r0
-; CHECK-NEXT: vmov.u16 r0, q1[5]
-; CHECK-NEXT: vmov.8 q0[10], r0
-; CHECK-NEXT: vmov.8 q0[11], r0
-; CHECK-NEXT: vmov.u16 r0, q1[6]
-; CHECK-NEXT: vmov.8 q0[12], r0
-; CHECK-NEXT: vmov.8 q0[13], r0
-; CHECK-NEXT: vmov.u16 r0, q1[7]
-; CHECK-NEXT: vmov.8 q0[14], r0
-; CHECK-NEXT: vmov.8 q0[15], r0
+; CHECK-NEXT: vmovnt.i16 q0, q0
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn16_trunc3:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vrev64.16 q2, q0
-; CHECKBE-NEXT: vmov.u16 r0, q2[0]
-; CHECKBE-NEXT: vmov.8 q1[0], r0
-; CHECKBE-NEXT: vmov.8 q1[1], r0
-; CHECKBE-NEXT: vmov.u16 r0, q2[1]
-; CHECKBE-NEXT: vmov.8 q1[2], r0
-; CHECKBE-NEXT: vmov.8 q1[3], r0
-; CHECKBE-NEXT: vmov.u16 r0, q2[2]
-; CHECKBE-NEXT: vmov.8 q1[4], r0
-; CHECKBE-NEXT: vmov.8 q1[5], r0
-; CHECKBE-NEXT: vmov.u16 r0, q2[3]
-; CHECKBE-NEXT: vmov.8 q1[6], r0
-; CHECKBE-NEXT: vmov.8 q1[7], r0
-; CHECKBE-NEXT: vmov.u16 r0, q2[4]
-; CHECKBE-NEXT: vmov.8 q1[8], r0
-; CHECKBE-NEXT: vmov.8 q1[9], r0
-; CHECKBE-NEXT: vmov.u16 r0, q2[5]
-; CHECKBE-NEXT: vmov.8 q1[10], r0
-; CHECKBE-NEXT: vmov.8 q1[11], r0
-; CHECKBE-NEXT: vmov.u16 r0, q2[6]
-; CHECKBE-NEXT: vmov.8 q1[12], r0
-; CHECKBE-NEXT: vmov.8 q1[13], r0
-; CHECKBE-NEXT: vmov.u16 r0, q2[7]
-; CHECKBE-NEXT: vmov.8 q1[14], r0
-; CHECKBE-NEXT: vmov.8 q1[15], r0
+; CHECKBE-NEXT: vrev64.16 q1, q0
+; CHECKBE-NEXT: vmovnt.i16 q1, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -604,19 +534,13 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vmovn16_single_t(<8 x i16> %src1) {
; CHECK-LABEL: vmovn16_single_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vins.f16 s0, s0
-; CHECK-NEXT: vins.f16 s1, s1
-; CHECK-NEXT: vins.f16 s2, s2
-; CHECK-NEXT: vins.f16 s3, s3
+; CHECK-NEXT: vmovnt.i32 q0, q0
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn16_single_t:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vrev64.16 q1, q0
-; CHECKBE-NEXT: vins.f16 s5, s5
-; CHECKBE-NEXT: vins.f16 s4, s4
-; CHECKBE-NEXT: vins.f16 s6, s6
-; CHECKBE-NEXT: vins.f16 s7, s7
+; CHECKBE-NEXT: vmovnt.i32 q1, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -864,60 +788,13 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @vmovn8_single_t(<16 x i8> %src1) {
; CHECK-LABEL: vmovn8_single_t:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.u8 r0, q0[0]
-; CHECK-NEXT: vmov q1, q0
-; CHECK-NEXT: vmov.8 q0[0], r0
-; CHECK-NEXT: vmov.8 q0[1], r0
-; CHECK-NEXT: vmov.u8 r0, q1[2]
-; CHECK-NEXT: vmov.8 q0[2], r0
-; CHECK-NEXT: vmov.8 q0[3], r0
-; CHECK-NEXT: vmov.u8 r0, q1[4]
-; CHECK-NEXT: vmov.8 q0[4], r0
-; CHECK-NEXT: vmov.8 q0[5], r0
-; CHECK-NEXT: vmov.u8 r0, q1[6]
-; CHECK-NEXT: vmov.8 q0[6], r0
-; CHECK-NEXT: vmov.8 q0[7], r0
-; CHECK-NEXT: vmov.u8 r0, q1[8]
-; CHECK-NEXT: vmov.8 q0[8], r0
-; CHECK-NEXT: vmov.8 q0[9], r0
-; CHECK-NEXT: vmov.u8 r0, q1[10]
-; CHECK-NEXT: vmov.8 q0[10], r0
-; CHECK-NEXT: vmov.8 q0[11], r0
-; CHECK-NEXT: vmov.u8 r0, q1[12]
-; CHECK-NEXT: vmov.8 q0[12], r0
-; CHECK-NEXT: vmov.8 q0[13], r0
-; CHECK-NEXT: vmov.u8 r0, q1[14]
-; CHECK-NEXT: vmov.8 q0[14], r0
-; CHECK-NEXT: vmov.8 q0[15], r0
+; CHECK-NEXT: vmovnt.i16 q0, q0
; CHECK-NEXT: bx lr
;
; CHECKBE-LABEL: vmovn8_single_t:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vrev64.8 q2, q0
-; CHECKBE-NEXT: vmov.u8 r0, q2[0]
-; CHECKBE-NEXT: vmov.8 q1[0], r0
-; CHECKBE-NEXT: vmov.8 q1[1], r0
-; CHECKBE-NEXT: vmov.u8 r0, q2[2]
-; CHECKBE-NEXT: vmov.8 q1[2], r0
-; CHECKBE-NEXT: vmov.8 q1[3], r0
-; CHECKBE-NEXT: vmov.u8 r0, q2[4]
-; CHECKBE-NEXT: vmov.8 q1[4], r0
-; CHECKBE-NEXT: vmov.8 q1[5], r0
-; CHECKBE-NEXT: vmov.u8 r0, q2[6]
-; CHECKBE-NEXT: vmov.8 q1[6], r0
-; CHECKBE-NEXT: vmov.8 q1[7], r0
-; CHECKBE-NEXT: vmov.u8 r0, q2[8]
-; CHECKBE-NEXT: vmov.8 q1[8], r0
-; CHECKBE-NEXT: vmov.8 q1[9], r0
-; CHECKBE-NEXT: vmov.u8 r0, q2[10]
-; CHECKBE-NEXT: vmov.8 q1[10], r0
-; CHECKBE-NEXT: vmov.8 q1[11], r0
-; CHECKBE-NEXT: vmov.u8 r0, q2[12]
-; CHECKBE-NEXT: vmov.8 q1[12], r0
-; CHECKBE-NEXT: vmov.8 q1[13], r0
-; CHECKBE-NEXT: vmov.u8 r0, q2[14]
-; CHECKBE-NEXT: vmov.8 q1[14], r0
-; CHECKBE-NEXT: vmov.8 q1[15], r0
+; CHECKBE-NEXT: vrev64.8 q1, q0
+; CHECKBE-NEXT: vmovnt.i16 q1, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
entry:
More information about the llvm-commits
mailing list