[llvm] 98c2e41 - [ARM] Add lowering of uadd_sat to uq{add|sub}8 and uq{add|sub}16
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 11 07:58:32 PDT 2021
Author: Daniel Egger
Date: 2021-07-11T15:58:11+01:00
New Revision: 98c2e4115d8d7d4962df52f595e8d2d0cfdfdc8f
URL: https://github.com/llvm/llvm-project/commit/98c2e4115d8d7d4962df52f595e8d2d0cfdfdc8f
DIFF: https://github.com/llvm/llvm-project/commit/98c2e4115d8d7d4962df52f595e8d2d0cfdfdc8f.diff
LOG: [ARM] Add lowering of uadd_sat to uq{add|sub}8 and uq{add|sub}16
This follow the lead of https://reviews.llvm.org/D68974 to add lowering
of unsigned saturated addition/subtraction.
Differential Revision: https://reviews.llvm.org/D105413
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMInstrThumb2.td
llvm/test/CodeGen/ARM/uadd_sat.ll
llvm/test/CodeGen/ARM/uadd_sat_plus.ll
llvm/test/CodeGen/ARM/usub_sat.ll
llvm/test/CodeGen/ARM/usub_sat_plus.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7269ed9124180..9e419a5d1239c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1115,6 +1115,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::i8, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::i8, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::i16, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::i16, Custom);
}
if (Subtarget->hasBaseDSP()) {
setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
@@ -1776,6 +1780,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::QSUB16b)
MAKE_CASE(ARMISD::QADD8b)
MAKE_CASE(ARMISD::QSUB8b)
+ MAKE_CASE(ARMISD::UQADD16b)
+ MAKE_CASE(ARMISD::UQSUB16b)
+ MAKE_CASE(ARMISD::UQADD8b)
+ MAKE_CASE(ARMISD::UQSUB8b)
MAKE_CASE(ARMISD::BUILD_VECTOR)
MAKE_CASE(ARMISD::BFI)
MAKE_CASE(ARMISD::VORRIMM)
@@ -4948,8 +4956,8 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
-static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
+static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
EVT VT = Op.getValueType();
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
return SDValue();
@@ -4957,15 +4965,40 @@ static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG,
return SDValue();
unsigned NewOpcode;
- bool IsAdd = Op->getOpcode() == ISD::SADDSAT;
switch (VT.getSimpleVT().SimpleTy) {
default:
return SDValue();
case MVT::i8:
- NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b;
+ switch (Op->getOpcode()) {
+ case ISD::UADDSAT:
+ NewOpcode = ARMISD::UQADD8b;
+ break;
+ case ISD::SADDSAT:
+ NewOpcode = ARMISD::QADD8b;
+ break;
+ case ISD::USUBSAT:
+ NewOpcode = ARMISD::UQSUB8b;
+ break;
+ case ISD::SSUBSAT:
+ NewOpcode = ARMISD::QSUB8b;
+ break;
+ }
break;
case MVT::i16:
- NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b;
+ switch (Op->getOpcode()) {
+ case ISD::UADDSAT:
+ NewOpcode = ARMISD::UQADD16b;
+ break;
+ case ISD::SADDSAT:
+ NewOpcode = ARMISD::QADD16b;
+ break;
+ case ISD::USUBSAT:
+ NewOpcode = ARMISD::UQSUB16b;
+ break;
+ case ISD::SSUBSAT:
+ NewOpcode = ARMISD::QSUB16b;
+ break;
+ }
break;
}
@@ -10129,7 +10162,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerUnsignedALUO(Op, DAG);
case ISD::SADDSAT:
case ISD::SSUBSAT:
- return LowerSADDSUBSAT(Op, DAG, Subtarget);
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ return LowerADDSUBSAT(Op, DAG, Subtarget);
case ISD::LOAD:
return LowerPredicateLoad(Op, DAG);
case ISD::STORE:
@@ -10229,7 +10264,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::SADDSAT:
case ISD::SSUBSAT:
- Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
break;
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
@@ -17455,7 +17492,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
}
case ARMISD::SMLALBB:
case ARMISD::QADD16b:
- case ARMISD::QSUB16b: {
+ case ARMISD::QSUB16b:
+ case ARMISD::UQADD16b:
+ case ARMISD::UQSUB16b: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
@@ -17492,7 +17531,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case ARMISD::QADD8b:
- case ARMISD::QSUB8b: {
+ case ARMISD::QSUB8b:
+ case ARMISD::UQADD8b:
+ case ARMISD::UQSUB8b: {
unsigned BitWidth = N->getValueType(0).getSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 5a6dc047cf350..f91e7854f1992 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -279,6 +279,10 @@ class VectorType;
QSUB8b,
QADD16b,
QSUB16b,
+ UQADD8b,
+ UQSUB8b,
+ UQADD16b,
+ UQSUB16b,
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index f6f38e7978a26..7466cecb9b33b 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -240,6 +240,11 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
+def ARMuqadd8b : SDNode<"ARMISD::UQADD8b", SDT_ARMAnd, []>;
+def ARMuqsub8b : SDNode<"ARMISD::UQSUB8b", SDT_ARMAnd, []>;
+def ARMuqadd16b : SDNode<"ARMISD::UQADD16b", SDT_ARMAnd, []>;
+def ARMuqsub16b : SDNode<"ARMISD::UQSUB16b", SDT_ARMAnd, []>;
+
def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -3945,6 +3950,7 @@ def : ARMV5TEPat<(saddsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
(QDADD rGPR:$Rm, rGPR:$Rn)>;
def : ARMV5TEPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
(QDSUB rGPR:$Rm, rGPR:$Rn)>;
+
def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
(QADD8 rGPR:$Rm, rGPR:$Rn)>;
def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
@@ -3963,6 +3969,16 @@ def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>;
def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>;
def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>;
+def : ARMV6Pat<(ARMuqadd8b rGPR:$Rm, rGPR:$Rn),
+ (UQADD8 rGPR:$Rm, rGPR:$Rn)>;
+def : ARMV6Pat<(ARMuqsub8b rGPR:$Rm, rGPR:$Rn),
+ (UQSUB8 rGPR:$Rm, rGPR:$Rn)>;
+def : ARMV6Pat<(ARMuqadd16b rGPR:$Rm, rGPR:$Rn),
+ (UQADD16 rGPR:$Rm, rGPR:$Rn)>;
+def : ARMV6Pat<(ARMuqsub16b rGPR:$Rm, rGPR:$Rn),
+ (UQSUB16 rGPR:$Rm, rGPR:$Rn)>;
+
+
// Signed/Unsigned add/subtract
def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>;
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 1258c70b81f6f..e7eed2a0bbb1a 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2521,6 +2521,7 @@ def : Thumb2DSPPat<(saddsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
(t2QDADD rGPR:$Rm, rGPR:$Rn)>;
def : Thumb2DSPPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)),
(t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
+
def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
(t2QADD8 rGPR:$Rm, rGPR:$Rn)>;
def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
@@ -2530,6 +2531,15 @@ def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn),
def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn),
(t2QSUB16 rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ARMuqadd8b rGPR:$Rm, rGPR:$Rn),
+ (t2UQADD8 rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ARMuqsub8b rGPR:$Rm, rGPR:$Rn),
+ (t2UQSUB8 rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ARMuqadd16b rGPR:$Rm, rGPR:$Rn),
+ (t2UQADD16 rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ARMuqsub16b rGPR:$Rm, rGPR:$Rn),
+ (t2UQSUB16 rGPR:$Rm, rGPR:$Rn)>;
+
// Signed/Unsigned add/subtract
def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>;
diff --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll
index 5036168b33d4e..39c79f4104e6e 100644
--- a/llvm/test/CodeGen/ARM/uadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/uadd_sat.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1
-; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2
-; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP
+; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP
; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM
declare i4 @llvm.uadd.sat.i4(i4, i4)
@@ -106,21 +106,25 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind {
; CHECK-T1-NEXT: .LCPI2_0:
; CHECK-T1-NEXT: .long 65535 @ 0xffff
;
-; CHECK-T2-LABEL: func16:
-; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: add r1, r0
-; CHECK-T2-NEXT: movw r0, #65535
-; CHECK-T2-NEXT: cmp r1, r0
-; CHECK-T2-NEXT: it lo
-; CHECK-T2-NEXT: movlo r0, r1
-; CHECK-T2-NEXT: bx lr
+; CHECK-T2NODSP-LABEL: func16:
+; CHECK-T2NODSP: @ %bb.0:
+; CHECK-T2NODSP-NEXT: add r1, r0
+; CHECK-T2NODSP-NEXT: movw r0, #65535
+; CHECK-T2NODSP-NEXT: cmp r1, r0
+; CHECK-T2NODSP-NEXT: it lo
+; CHECK-T2NODSP-NEXT: movlo r0, r1
+; CHECK-T2NODSP-NEXT: bx lr
+;
+; CHECK-T2DSP-LABEL: func16:
+; CHECK-T2DSP: @ %bb.0:
+; CHECK-T2DSP-NEXT: uqadd16 r0, r0, r1
+; CHECK-T2DSP-NEXT: uxth r0, r0
+; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: func16:
; CHECK-ARM: @ %bb.0:
-; CHECK-ARM-NEXT: add r1, r0, r1
-; CHECK-ARM-NEXT: movw r0, #65535
-; CHECK-ARM-NEXT: cmp r1, r0
-; CHECK-ARM-NEXT: movlo r0, r1
+; CHECK-ARM-NEXT: uqadd16 r0, r0, r1
+; CHECK-ARM-NEXT: uxth r0, r0
; CHECK-ARM-NEXT: bx lr
%tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y)
ret i16 %tmp
@@ -137,19 +141,24 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
; CHECK-T1-NEXT: .LBB3_2:
; CHECK-T1-NEXT: bx lr
;
-; CHECK-T2-LABEL: func8:
-; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: add r0, r1
-; CHECK-T2-NEXT: cmp r0, #255
-; CHECK-T2-NEXT: it hs
-; CHECK-T2-NEXT: movhs r0, #255
-; CHECK-T2-NEXT: bx lr
+; CHECK-T2NODSP-LABEL: func8:
+; CHECK-T2NODSP: @ %bb.0:
+; CHECK-T2NODSP-NEXT: add r0, r1
+; CHECK-T2NODSP-NEXT: cmp r0, #255
+; CHECK-T2NODSP-NEXT: it hs
+; CHECK-T2NODSP-NEXT: movhs r0, #255
+; CHECK-T2NODSP-NEXT: bx lr
+;
+; CHECK-T2DSP-LABEL: func8:
+; CHECK-T2DSP: @ %bb.0:
+; CHECK-T2DSP-NEXT: uqadd8 r0, r0, r1
+; CHECK-T2DSP-NEXT: uxtb r0, r0
+; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: func8:
; CHECK-ARM: @ %bb.0:
-; CHECK-ARM-NEXT: add r0, r0, r1
-; CHECK-ARM-NEXT: cmp r0, #255
-; CHECK-ARM-NEXT: movhs r0, #255
+; CHECK-ARM-NEXT: uqadd8 r0, r0, r1
+; CHECK-ARM-NEXT: uxtb r0, r0
; CHECK-ARM-NEXT: bx lr
%tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y)
ret i8 %tmp
diff --git a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
index 3ebdafa49d97e..451b32f730424 100644
--- a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll
@@ -130,20 +130,15 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw
; CHECK-T2DSP-LABEL: func16:
; CHECK-T2DSP: @ %bb.0:
; CHECK-T2DSP-NEXT: muls r1, r2, r1
-; CHECK-T2DSP-NEXT: uxtah r1, r0, r1
-; CHECK-T2DSP-NEXT: movw r0, #65535
-; CHECK-T2DSP-NEXT: cmp r1, r0
-; CHECK-T2DSP-NEXT: it lo
-; CHECK-T2DSP-NEXT: movlo r0, r1
+; CHECK-T2DSP-NEXT: uqadd16 r0, r0, r1
+; CHECK-T2DSP-NEXT: uxth r0, r0
; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: func16:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: mul r1, r1, r2
-; CHECK-ARM-NEXT: uxtah r1, r0, r1
-; CHECK-ARM-NEXT: movw r0, #65535
-; CHECK-ARM-NEXT: cmp r1, r0
-; CHECK-ARM-NEXT: movlo r0, r1
+; CHECK-ARM-NEXT: uqadd16 r0, r0, r1
+; CHECK-ARM-NEXT: uxth r0, r0
; CHECK-ARM-NEXT: bx lr
%a = mul i16 %y, %z
%tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %a)
@@ -176,18 +171,15 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
; CHECK-T2DSP-LABEL: func8:
; CHECK-T2DSP: @ %bb.0:
; CHECK-T2DSP-NEXT: muls r1, r2, r1
-; CHECK-T2DSP-NEXT: uxtab r0, r0, r1
-; CHECK-T2DSP-NEXT: cmp r0, #255
-; CHECK-T2DSP-NEXT: it hs
-; CHECK-T2DSP-NEXT: movhs r0, #255
+; CHECK-T2DSP-NEXT: uqadd8 r0, r0, r1
+; CHECK-T2DSP-NEXT: uxtb r0, r0
; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: func8:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: smulbb r1, r1, r2
-; CHECK-ARM-NEXT: uxtab r0, r0, r1
-; CHECK-ARM-NEXT: cmp r0, #255
-; CHECK-ARM-NEXT: movhs r0, #255
+; CHECK-ARM-NEXT: uqadd8 r0, r0, r1
+; CHECK-ARM-NEXT: uxtb r0, r0
; CHECK-ARM-NEXT: bx lr
%a = mul i8 %y, %z
%tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %a)
diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll
index 4bf42dbb59eff..c16869f4b4ddb 100644
--- a/llvm/test/CodeGen/ARM/usub_sat.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1
-; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2
-; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP
+; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP
; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM
declare i4 @llvm.usub.sat.i4(i4, i4)
@@ -100,17 +100,23 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind {
; CHECK-T1-NEXT: .LBB2_2:
; CHECK-T1-NEXT: bx lr
;
-; CHECK-T2-LABEL: func16:
-; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: subs r0, r0, r1
-; CHECK-T2-NEXT: it lo
-; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: bx lr
+; CHECK-T2NODSP-LABEL: func16:
+; CHECK-T2NODSP: @ %bb.0:
+; CHECK-T2NODSP-NEXT: subs r0, r0, r1
+; CHECK-T2NODSP-NEXT: it lo
+; CHECK-T2NODSP-NEXT: movlo r0, #0
+; CHECK-T2NODSP-NEXT: bx lr
+;
+; CHECK-T2DSP-LABEL: func16:
+; CHECK-T2DSP: @ %bb.0:
+; CHECK-T2DSP-NEXT: uqsub16 r0, r0, r1
+; CHECK-T2DSP-NEXT: uxth r0, r0
+; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: func16:
; CHECK-ARM: @ %bb.0:
-; CHECK-ARM-NEXT: subs r0, r0, r1
-; CHECK-ARM-NEXT: movlo r0, #0
+; CHECK-ARM-NEXT: uqsub16 r0, r0, r1
+; CHECK-ARM-NEXT: uxth r0, r0
; CHECK-ARM-NEXT: bx lr
%tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y)
ret i16 %tmp
@@ -126,17 +132,23 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
; CHECK-T1-NEXT: .LBB3_2:
; CHECK-T1-NEXT: bx lr
;
-; CHECK-T2-LABEL: func8:
-; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: subs r0, r0, r1
-; CHECK-T2-NEXT: it lo
-; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: bx lr
+; CHECK-T2NODSP-LABEL: func8:
+; CHECK-T2NODSP: @ %bb.0:
+; CHECK-T2NODSP-NEXT: subs r0, r0, r1
+; CHECK-T2NODSP-NEXT: it lo
+; CHECK-T2NODSP-NEXT: movlo r0, #0
+; CHECK-T2NODSP-NEXT: bx lr
+;
+; CHECK-T2DSP-LABEL: func8:
+; CHECK-T2DSP: @ %bb.0:
+; CHECK-T2DSP-NEXT: uqsub8 r0, r0, r1
+; CHECK-T2DSP-NEXT: uxtb r0, r0
+; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: func8:
; CHECK-ARM: @ %bb.0:
-; CHECK-ARM-NEXT: subs r0, r0, r1
-; CHECK-ARM-NEXT: movlo r0, #0
+; CHECK-ARM-NEXT: uqsub8 r0, r0, r1
+; CHECK-ARM-NEXT: uxtb r0, r0
; CHECK-ARM-NEXT: bx lr
%tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y)
ret i8 %tmp
diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
index c0fcd5e8b1a63..04494a2e40599 100644
--- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1
-; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2
-; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP
+; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP
; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM
declare i4 @llvm.usub.sat.i4(i4, i4)
@@ -112,21 +112,27 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw
; CHECK-T1-NEXT: .LBB2_2:
; CHECK-T1-NEXT: bx lr
;
-; CHECK-T2-LABEL: func16:
-; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: muls r1, r2, r1
-; CHECK-T2-NEXT: uxth r1, r1
-; CHECK-T2-NEXT: subs r0, r0, r1
-; CHECK-T2-NEXT: it lo
-; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: bx lr
+; CHECK-T2NODSP-LABEL: func16:
+; CHECK-T2NODSP: @ %bb.0:
+; CHECK-T2NODSP-NEXT: muls r1, r2, r1
+; CHECK-T2NODSP-NEXT: uxth r1, r1
+; CHECK-T2NODSP-NEXT: subs r0, r0, r1
+; CHECK-T2NODSP-NEXT: it lo
+; CHECK-T2NODSP-NEXT: movlo r0, #0
+; CHECK-T2NODSP-NEXT: bx lr
+;
+; CHECK-T2DSP-LABEL: func16:
+; CHECK-T2DSP: @ %bb.0:
+; CHECK-T2DSP-NEXT: muls r1, r2, r1
+; CHECK-T2DSP-NEXT: uqsub16 r0, r0, r1
+; CHECK-T2DSP-NEXT: uxth r0, r0
+; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: func16:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: mul r1, r1, r2
-; CHECK-ARM-NEXT: uxth r1, r1
-; CHECK-ARM-NEXT: subs r0, r0, r1
-; CHECK-ARM-NEXT: movlo r0, #0
+; CHECK-ARM-NEXT: uqsub16 r0, r0, r1
+; CHECK-ARM-NEXT: uxth r0, r0
; CHECK-ARM-NEXT: bx lr
%a = mul i16 %y, %z
%tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a)
@@ -145,21 +151,27 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
; CHECK-T1-NEXT: .LBB3_2:
; CHECK-T1-NEXT: bx lr
;
-; CHECK-T2-LABEL: func8:
-; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: muls r1, r2, r1
-; CHECK-T2-NEXT: uxtb r1, r1
-; CHECK-T2-NEXT: subs r0, r0, r1
-; CHECK-T2-NEXT: it lo
-; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: bx lr
+; CHECK-T2NODSP-LABEL: func8:
+; CHECK-T2NODSP: @ %bb.0:
+; CHECK-T2NODSP-NEXT: muls r1, r2, r1
+; CHECK-T2NODSP-NEXT: uxtb r1, r1
+; CHECK-T2NODSP-NEXT: subs r0, r0, r1
+; CHECK-T2NODSP-NEXT: it lo
+; CHECK-T2NODSP-NEXT: movlo r0, #0
+; CHECK-T2NODSP-NEXT: bx lr
+;
+; CHECK-T2DSP-LABEL: func8:
+; CHECK-T2DSP: @ %bb.0:
+; CHECK-T2DSP-NEXT: muls r1, r2, r1
+; CHECK-T2DSP-NEXT: uqsub8 r0, r0, r1
+; CHECK-T2DSP-NEXT: uxtb r0, r0
+; CHECK-T2DSP-NEXT: bx lr
;
; CHECK-ARM-LABEL: func8:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: smulbb r1, r1, r2
-; CHECK-ARM-NEXT: uxtb r1, r1
-; CHECK-ARM-NEXT: subs r0, r0, r1
-; CHECK-ARM-NEXT: movlo r0, #0
+; CHECK-ARM-NEXT: uqsub8 r0, r0, r1
+; CHECK-ARM-NEXT: uxtb r0, r0
; CHECK-ARM-NEXT: bx lr
%a = mul i8 %y, %z
%tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a)
More information about the llvm-commits
mailing list