[llvm] e73bb45 - [ARM] VQMOVN demand bits analysis
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 5 10:41:32 PDT 2020
Author: David Green
Date: 2020-06-05T18:41:02+01:00
New Revision: e73bb45c2b7bb20eaac230d1708d07c55da47ee2
URL: https://github.com/llvm/llvm-project/commit/e73bb45c2b7bb20eaac230d1708d07c55da47ee2
DIFF: https://github.com/llvm/llvm-project/commit/e73bb45c2b7bb20eaac230d1708d07c55da47ee2.diff
LOG: [ARM] VQMOVN demand bits analysis
Similar to VMOVN, a VQMOVN will only demand the top/bottom lanes of it's
first input. However unlike VMOVN it will need access to the entire
second argument, as that value is saturated not just moved in place.
Differential Revision: https://reviews.llvm.org/D80515
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bb50525b673b..10e7137f4e80 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14457,6 +14457,25 @@ static SDValue PerformVMOVNCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformVQMOVNCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned IsTop = N->getConstantOperandVal(2);
+
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ APInt Op0DemandedElts =
+ APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
+ : APInt::getHighBitsSet(2, 1));
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+ return SDValue();
+}
+
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
@@ -15593,6 +15612,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
case ARMISD::VMOVN:
return PerformVMOVNCombine(N, DCI);
+ case ARMISD::VQMOVNs:
+ case ARMISD::VQMOVNu:
+ return PerformVQMOVNCombine(N, DCI);
case ARMISD::ASRL:
case ARMISD::LSRL:
case ARMISD::LSLL:
diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index 70de3fcf3348..291c13543d14 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -1416,7 +1416,6 @@ define arm_aapcs_vfpcc void @ssatmul_8i_q15(i16* nocapture readonly %pSrcA, i16*
; CHECK-NEXT: vmullt.s16 q2, q1, q0
; CHECK-NEXT: vmullb.s16 q0, q1, q0
; CHECK-NEXT: vqshrnb.s32 q0, q0, #15
-; CHECK-NEXT: vmovlb.s16 q0, q0
; CHECK-NEXT: vqshrnt.s32 q0, q2, #15
; CHECK-NEXT: vstrb.8 q0, [r2], #16
; CHECK-NEXT: le lr, .LBB7_4
@@ -1856,7 +1855,6 @@ define arm_aapcs_vfpcc void @ssatmul_8ti_q15(i16* nocapture readonly %pSrcA, i16
; CHECK-NEXT: vmullt.s16 q7, q6, q5
; CHECK-NEXT: vmullb.s16 q5, q6, q5
; CHECK-NEXT: vqshrnb.s32 q5, q5, #15
-; CHECK-NEXT: vmovlb.s16 q5, q5
; CHECK-NEXT: vqshrnt.s32 q5, q7, #15
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrht.16 q5, [r2], #16
@@ -2645,7 +2643,6 @@ define arm_aapcs_vfpcc void @ssatmul_16i_q7(i8* nocapture readonly %pSrcA, i8* n
; CHECK-NEXT: vmullt.s8 q2, q1, q0
; CHECK-NEXT: vmullb.s8 q0, q1, q0
; CHECK-NEXT: vqshrnb.s16 q0, q0, #7
-; CHECK-NEXT: vmovlb.s8 q0, q0
; CHECK-NEXT: vqshrnt.s16 q0, q2, #7
; CHECK-NEXT: vstrb.8 q0, [r2], #16
; CHECK-NEXT: le lr, .LBB16_4
@@ -3310,7 +3307,6 @@ define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8*
; CHECK-NEXT: vmullt.s8 q5, q4, q0
; CHECK-NEXT: vmullb.s8 q0, q4, q0
; CHECK-NEXT: vqshrnb.s16 q0, q0, #7
-; CHECK-NEXT: vmovlb.s8 q0, q0
; CHECK-NEXT: vqshrnt.s16 q0, q5, #7
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrbt.8 q0, [r2], #16
More information about the llvm-commits
mailing list