[llvm] 5b2626e - [ARM] Flatten identity shuffles through vqdmulh nodes
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 1 11:14:35 PST 2021
Author: David Green
Date: 2021-02-01T19:14:20Z
New Revision: 5b2626ea87f48eab011b616a96540b9e83c7ca88
URL: https://github.com/llvm/llvm-project/commit/5b2626ea87f48eab011b616a96540b9e83c7ca88
DIFF: https://github.com/llvm/llvm-project/commit/5b2626ea87f48eab011b616a96540b9e83c7ca88.diff
LOG: [ARM] Flatten identity shuffles through vqdmulh nodes
Given a shuffle(vqdmulh(shuffle, shuffle), we can flatter the shuffles
out if they become an identity mask. This can come up during lane
interleaving, when we do that better.
Differential Revision: https://reviews.llvm.org/D94034
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 12d833975f90..fd17bf0e3da4 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14019,9 +14019,51 @@ static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// When lowering complex nodes that we recognize, like VQDMULH and MULH, we
+// can end up with shuffle(binop(shuffle, shuffle)), that can be simplified to
+// binop as the shuffles cancel out.
+static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ if (!N->getOperand(1).isUndef() || N->getOperand(0).getValueType() != VT)
+ return SDValue();
+ SDValue Op = N->getOperand(0);
+
+ // Looking for binary operators that will have been folded from
+ // truncates/extends.
+ switch (Op.getOpcode()) {
+ case ARMISD::VQDMULH:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ break;
+ default:
+ return SDValue();
+ }
+
+ ShuffleVectorSDNode *Op0 = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0));
+ ShuffleVectorSDNode *Op1 = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1));
+ if (!Op0 || !Op1 || !Op0->getOperand(1).isUndef() ||
+ !Op1->getOperand(1).isUndef() || Op0->getMask() != Op1->getMask() ||
+ Op0->getOperand(0).getValueType() != VT)
+ return SDValue();
+
+ // Check the mask turns into an identity shuffle.
+ ArrayRef<int> NMask = N->getMask();
+ ArrayRef<int> OpMask = Op0->getMask();
+ for (int i = 0, e = NMask.size(); i != e; i++) {
+ if (NMask[i] > 0 && OpMask[NMask[i]] > 0 && OpMask[NMask[i]] != i)
+ return SDValue();
+ }
+
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op0->getOperand(0), Op1->getOperand(0));
+}
+
/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
/// ISD::VECTOR_SHUFFLE.
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+ if (SDValue R = FlattenVectorShuffle(cast<ShuffleVectorSDNode>(N), DAG))
+ return R;
+
// The LLVM shufflevector instruction does not require the shuffle mask
// length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
// have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
index 6722aa706769..9e6abaa33ea3 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
@@ -140,55 +140,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_interleaved(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-LABEL: vqdmulh_i16_interleaved:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.u16 r0, q0[0]
-; CHECK-NEXT: vmov.16 q2[0], r0
-; CHECK-NEXT: vmov.u16 r0, q0[2]
-; CHECK-NEXT: vmov.16 q2[1], r0
-; CHECK-NEXT: vmov.u16 r0, q0[4]
-; CHECK-NEXT: vmov.16 q2[2], r0
-; CHECK-NEXT: vmov.u16 r0, q0[6]
-; CHECK-NEXT: vmov.16 q2[3], r0
-; CHECK-NEXT: vmov.u16 r0, q0[1]
-; CHECK-NEXT: vmov.16 q2[4], r0
-; CHECK-NEXT: vmov.u16 r0, q0[3]
-; CHECK-NEXT: vmov.16 q2[5], r0
-; CHECK-NEXT: vmov.u16 r0, q0[5]
-; CHECK-NEXT: vmov.16 q2[6], r0
-; CHECK-NEXT: vmov.u16 r0, q0[7]
-; CHECK-NEXT: vmov.16 q2[7], r0
-; CHECK-NEXT: vmov.u16 r0, q1[0]
-; CHECK-NEXT: vmov.16 q0[0], r0
-; CHECK-NEXT: vmov.u16 r0, q1[2]
-; CHECK-NEXT: vmov.16 q0[1], r0
-; CHECK-NEXT: vmov.u16 r0, q1[4]
-; CHECK-NEXT: vmov.16 q0[2], r0
-; CHECK-NEXT: vmov.u16 r0, q1[6]
-; CHECK-NEXT: vmov.16 q0[3], r0
-; CHECK-NEXT: vmov.u16 r0, q1[1]
-; CHECK-NEXT: vmov.16 q0[4], r0
-; CHECK-NEXT: vmov.u16 r0, q1[3]
-; CHECK-NEXT: vmov.16 q0[5], r0
-; CHECK-NEXT: vmov.u16 r0, q1[5]
-; CHECK-NEXT: vmov.16 q0[6], r0
-; CHECK-NEXT: vmov.u16 r0, q1[7]
-; CHECK-NEXT: vmov.16 q0[7], r0
-; CHECK-NEXT: vqdmulh.s16 q1, q0, q2
-; CHECK-NEXT: vmov.u16 r0, q1[0]
-; CHECK-NEXT: vmov.16 q0[0], r0
-; CHECK-NEXT: vmov.u16 r0, q1[4]
-; CHECK-NEXT: vmov.16 q0[1], r0
-; CHECK-NEXT: vmov.u16 r0, q1[1]
-; CHECK-NEXT: vmov.16 q0[2], r0
-; CHECK-NEXT: vmov.u16 r0, q1[5]
-; CHECK-NEXT: vmov.16 q0[3], r0
-; CHECK-NEXT: vmov.u16 r0, q1[2]
-; CHECK-NEXT: vmov.16 q0[4], r0
-; CHECK-NEXT: vmov.u16 r0, q1[6]
-; CHECK-NEXT: vmov.16 q0[5], r0
-; CHECK-NEXT: vmov.u16 r0, q1[3]
-; CHECK-NEXT: vmov.16 q0[6], r0
-; CHECK-NEXT: vmov.u16 r0, q1[7]
-; CHECK-NEXT: vmov.16 q0[7], r0
+; CHECK-NEXT: vqdmulh.s16 q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%0 = shufflevector <8 x i16> %s0, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
More information about the llvm-commits
mailing list