[llvm] a10789d - [ARM] Recognize SSAT and USAT from SMIN/SMAX
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 23 00:56:00 PST 2022
Author: David Green
Date: 2022-02-23T08:55:54Z
New Revision: a10789d6cda71cd2ea13309fd6daf854d44e0906
URL: https://github.com/llvm/llvm-project/commit/a10789d6cda71cd2ea13309fd6daf854d44e0906
DIFF: https://github.com/llvm/llvm-project/commit/a10789d6cda71cd2ea13309fd6daf854d44e0906.diff
LOG: [ARM] Recognize SSAT and USAT from SMIN/SMAX
We have some recognition of SSAT and USAT from SELECT_CC at the moment.
This extends the matching to SMIN/SMAX which can help catch more cases,
either from min/max being the canonical form in instcombine or from some
expanded nodes like fp_to_si_sat.
Differential Revision: https://reviews.llvm.org/D119819
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/ARM/fpclamptosat.ll
llvm/test/CodeGen/ARM/sadd_sat.ll
llvm/test/CodeGen/ARM/sadd_sat_plus.ll
llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
llvm/test/CodeGen/ARM/ssat.ll
llvm/test/CodeGen/ARM/ssub_sat.ll
llvm/test/CodeGen/ARM/ssub_sat_plus.ll
llvm/test/CodeGen/ARM/usat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index c678901bb328..cdf5caff228e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1564,6 +1564,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SRL);
if (Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::SHL);
+ // Attempt to lower smin/smax to ssat/usat
+ if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
+ Subtarget->isThumb2()) {
+ setTargetDAGCombine(ISD::SMIN);
+ setTargetDAGCombine(ISD::SMAX);
+ }
setStackPointerRegisterToSaveRestore(ARM::SP);
@@ -17557,12 +17563,57 @@ static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
+// constant bounds.
+static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
+ !Subtarget->isThumb2())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+
+ if (VT != MVT::i32 ||
+ (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)) ||
+ !isa<ConstantSDNode>(Op0.getOperand(1)))
+ return SDValue();
+
+ SDValue Min = Op;
+ SDValue Max = Op0;
+ SDValue Input = Op0.getOperand(0);
+ if (Min.getOpcode() == ISD::SMAX)
+ std::swap(Min, Max);
+
+ APInt MinC = Min.getConstantOperandAPInt(1);
+ APInt MaxC = Max.getConstantOperandAPInt(1);
+
+ if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
+ !(MinC + 1).isPowerOf2())
+ return SDValue();
+
+ SDLoc DL(Op);
+ if (MinC == ~MaxC)
+ return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
+ DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
+ if (MaxC == 0)
+ return DAG.getNode(ARMISD::USAT, DL, VT, Input,
+ DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
+
+ return SDValue();
+}
+
/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
/// saturates.
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
+
+ if (VT == MVT::i32)
+ return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
+
if (!ST->hasMVEIntegerOps())
return SDValue();
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 5223ae1286f5..48241424ac6e 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -2718,28 +2718,14 @@ define i16 @stest_f64i16_mm(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2iz
-; VFP2-NEXT: movw r1, #32767
-; VFP2-NEXT: cmp r0, r1
-; VFP2-NEXT: it ge
-; VFP2-NEXT: movge r0, r1
-; VFP2-NEXT: movw r1, #32768
-; VFP2-NEXT: movt r1, #65535
-; VFP2-NEXT: cmn.w r0, #32768
-; VFP2-NEXT: it le
-; VFP2-NEXT: movle r0, r1
+; VFP2-NEXT: ssat r0, #16, r0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: stest_f64i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f64 s0, d0
-; FULL-NEXT: movw r1, #32767
; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: cmp r0, r1
-; FULL-NEXT: csel r0, r0, r1, lt
-; FULL-NEXT: movw r1, #32768
-; FULL-NEXT: movt r1, #65535
-; FULL-NEXT: cmn.w r0, #32768
-; FULL-NEXT: csel r0, r0, r1, gt
+; FULL-NEXT: ssat r0, #16, r0
; FULL-NEXT: bx lr
entry:
%conv = fptosi double %x to i32
@@ -2820,21 +2806,14 @@ define i16 @ustest_f64i16_mm(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2iz
-; VFP2-NEXT: movw r1, #65535
-; VFP2-NEXT: cmp r0, r1
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r1, r0
-; VFP2-NEXT: bic.w r0, r1, r1, asr #31
+; VFP2-NEXT: usat r0, #16, r0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f64i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f64 s0, d0
-; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: cmp r0, r1
-; FULL-NEXT: csel r0, r0, r1, lt
-; FULL-NEXT: bic.w r0, r0, r0, asr #31
+; FULL-NEXT: usat r0, #16, r0
; FULL-NEXT: bx lr
entry:
%conv = fptosi double %x to i32
@@ -2870,33 +2849,12 @@ define i16 @stest_f32i16_mm(float %x) {
; SOFT-NEXT: .LCPI39_1:
; SOFT-NEXT: .long 4294934528 @ 0xffff8000
;
-; VFP2-LABEL: stest_f32i16_mm:
-; VFP2: @ %bb.0: @ %entry
-; VFP2-NEXT: vcvt.s32.f32 s0, s0
-; VFP2-NEXT: movw r1, #32767
-; VFP2-NEXT: vmov r0, s0
-; VFP2-NEXT: cmp r0, r1
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r1, r0
-; VFP2-NEXT: movw r0, #32768
-; VFP2-NEXT: cmn.w r1, #32768
-; VFP2-NEXT: movt r0, #65535
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, r1
-; VFP2-NEXT: bx lr
-;
-; FULL-LABEL: stest_f32i16_mm:
-; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: vcvt.s32.f32 s0, s0
-; FULL-NEXT: movw r1, #32767
-; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: cmp r0, r1
-; FULL-NEXT: csel r0, r0, r1, lt
-; FULL-NEXT: movw r1, #32768
-; FULL-NEXT: movt r1, #65535
-; FULL-NEXT: cmn.w r0, #32768
-; FULL-NEXT: csel r0, r0, r1, gt
-; FULL-NEXT: bx lr
+; VFP-LABEL: stest_f32i16_mm:
+; VFP: @ %bb.0: @ %entry
+; VFP-NEXT: vcvt.s32.f32 s0, s0
+; VFP-NEXT: vmov r0, s0
+; VFP-NEXT: ssat r0, #16, r0
+; VFP-NEXT: bx lr
entry:
%conv = fptosi float %x to i32
%spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
@@ -2968,26 +2926,12 @@ define i16 @ustest_f32i16_mm(float %x) {
; SOFT-NEXT: .LCPI41_0:
; SOFT-NEXT: .long 65535 @ 0xffff
;
-; VFP2-LABEL: ustest_f32i16_mm:
-; VFP2: @ %bb.0: @ %entry
-; VFP2-NEXT: vcvt.s32.f32 s0, s0
-; VFP2-NEXT: movw r1, #65535
-; VFP2-NEXT: vmov r0, s0
-; VFP2-NEXT: cmp r0, r1
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r1, r0
-; VFP2-NEXT: bic.w r0, r1, r1, asr #31
-; VFP2-NEXT: bx lr
-;
-; FULL-LABEL: ustest_f32i16_mm:
-; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: vcvt.s32.f32 s0, s0
-; FULL-NEXT: movw r1, #65535
-; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: cmp r0, r1
-; FULL-NEXT: csel r0, r0, r1, lt
-; FULL-NEXT: bic.w r0, r0, r0, asr #31
-; FULL-NEXT: bx lr
+; VFP-LABEL: ustest_f32i16_mm:
+; VFP: @ %bb.0: @ %entry
+; VFP-NEXT: vcvt.s32.f32 s0, s0
+; VFP-NEXT: vmov r0, s0
+; VFP-NEXT: usat r0, #16, r0
+; VFP-NEXT: bx lr
entry:
%conv = fptosi float %x to i32
%spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
@@ -3031,30 +2975,16 @@ define i16 @stest_f16i16_mm(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: movw r1, #32767
; VFP2-NEXT: vcvt.s32.f32 s0, s0
; VFP2-NEXT: vmov r0, s0
-; VFP2-NEXT: cmp r0, r1
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r1, r0
-; VFP2-NEXT: movw r0, #32768
-; VFP2-NEXT: cmn.w r1, #32768
-; VFP2-NEXT: movt r0, #65535
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, r1
+; VFP2-NEXT: ssat r0, #16, r0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: stest_f16i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f16 s0, s0
-; FULL-NEXT: movw r1, #32767
; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: cmp r0, r1
-; FULL-NEXT: csel r0, r0, r1, lt
-; FULL-NEXT: movw r1, #32768
-; FULL-NEXT: movt r1, #65535
-; FULL-NEXT: cmn.w r0, #32768
-; FULL-NEXT: csel r0, r0, r1, gt
+; FULL-NEXT: ssat r0, #16, r0
; FULL-NEXT: bx lr
entry:
%conv = fptosi half %x to i32
@@ -3143,23 +3073,16 @@ define i16 @ustest_f16i16_mm(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: movw r1, #65535
; VFP2-NEXT: vcvt.s32.f32 s0, s0
; VFP2-NEXT: vmov r0, s0
-; VFP2-NEXT: cmp r0, r1
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r1, r0
-; VFP2-NEXT: bic.w r0, r1, r1, asr #31
+; VFP2-NEXT: usat r0, #16, r0
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f16i16_mm:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f16 s0, s0
-; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: cmp r0, r1
-; FULL-NEXT: csel r0, r0, r1, lt
-; FULL-NEXT: bic.w r0, r0, r0, asr #31
+; FULL-NEXT: usat r0, #16, r0
; FULL-NEXT: bx lr
entry:
%conv = fptosi half %x to i32
diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll
index 287e52d5044d..e0aca8e43380 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat.ll
@@ -148,15 +148,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func16:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: add r0, r1
-; CHECK-T2NODSP-NEXT: movw r1, #32767
-; CHECK-T2NODSP-NEXT: cmp r0, r1
-; CHECK-T2NODSP-NEXT: it lt
-; CHECK-T2NODSP-NEXT: movlt r1, r0
-; CHECK-T2NODSP-NEXT: movw r0, #32768
-; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
-; CHECK-T2NODSP-NEXT: movt r0, #65535
-; CHECK-T2NODSP-NEXT: it gt
-; CHECK-T2NODSP-NEXT: movgt r0, r1
+; CHECK-T2NODSP-NEXT: ssat r0, #16, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func16:
@@ -219,12 +211,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func8:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: add r0, r1
-; CHECK-T2NODSP-NEXT: cmp r0, #127
-; CHECK-T2NODSP-NEXT: it ge
-; CHECK-T2NODSP-NEXT: movge r0, #127
-; CHECK-T2NODSP-NEXT: cmn.w r0, #128
-; CHECK-T2NODSP-NEXT: it le
-; CHECK-T2NODSP-NEXT: mvnle r0, #127
+; CHECK-T2NODSP-NEXT: ssat r0, #8, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func8:
@@ -280,12 +267,7 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func3:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: add r0, r1
-; CHECK-T2NODSP-NEXT: cmp r0, #7
-; CHECK-T2NODSP-NEXT: it ge
-; CHECK-T2NODSP-NEXT: movge r0, #7
-; CHECK-T2NODSP-NEXT: cmn.w r0, #8
-; CHECK-T2NODSP-NEXT: it le
-; CHECK-T2NODSP-NEXT: mvnle r0, #7
+; CHECK-T2NODSP-NEXT: ssat r0, #4, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func3:
diff --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
index 6a8d9def5663..bbdfa6cea6e4 100644
--- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll
@@ -151,15 +151,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: sxth r1, r1
; CHECK-T2NODSP-NEXT: add r0, r1
-; CHECK-T2NODSP-NEXT: movw r1, #32767
-; CHECK-T2NODSP-NEXT: cmp r0, r1
-; CHECK-T2NODSP-NEXT: it lt
-; CHECK-T2NODSP-NEXT: movlt r1, r0
-; CHECK-T2NODSP-NEXT: movw r0, #32768
-; CHECK-T2NODSP-NEXT: movt r0, #65535
-; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
-; CHECK-T2NODSP-NEXT: it gt
-; CHECK-T2NODSP-NEXT: movgt r0, r1
+; CHECK-T2NODSP-NEXT: ssat r0, #16, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func16:
@@ -205,12 +197,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: sxtb r1, r1
; CHECK-T2NODSP-NEXT: add r0, r1
-; CHECK-T2NODSP-NEXT: cmp r0, #127
-; CHECK-T2NODSP-NEXT: it ge
-; CHECK-T2NODSP-NEXT: movge r0, #127
-; CHECK-T2NODSP-NEXT: cmn.w r0, #128
-; CHECK-T2NODSP-NEXT: it le
-; CHECK-T2NODSP-NEXT: mvnle r0, #127
+; CHECK-T2NODSP-NEXT: ssat r0, #8, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func8:
@@ -257,12 +244,7 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: lsls r1, r1, #28
; CHECK-T2NODSP-NEXT: add.w r0, r0, r1, asr #28
-; CHECK-T2NODSP-NEXT: cmp r0, #7
-; CHECK-T2NODSP-NEXT: it ge
-; CHECK-T2NODSP-NEXT: movge r0, #7
-; CHECK-T2NODSP-NEXT: cmn.w r0, #8
-; CHECK-T2NODSP-NEXT: it le
-; CHECK-T2NODSP-NEXT: mvnle r0, #7
+; CHECK-T2NODSP-NEXT: ssat r0, #4, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func4:
diff --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
index 1f7574a8cca9..def54a046bfc 100644
--- a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
+++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
@@ -125,63 +125,43 @@ while.end: ; preds = %while.body, %while.
define void @ssat_unroll_minmax(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i16* nocapture writeonly %pDst, i32 %blockSize) {
; CHECK-LABEL: ssat_unroll_minmax:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r4, r5, r11, lr}
-; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: cmp r3, #0
-; CHECK-NEXT: beq .LBB1_6
+; CHECK-NEXT: beq .LBB1_5
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: movw r12, #32768
-; CHECK-NEXT: sub lr, r3, #1
+; CHECK-NEXT: sub r12, r3, #1
; CHECK-NEXT: tst r3, #1
-; CHECK-NEXT: movt r12, #65535
; CHECK-NEXT: beq .LBB1_3
; CHECK-NEXT: @ %bb.2: @ %while.body.prol.preheader
-; CHECK-NEXT: ldrsh r3, [r0], #2
-; CHECK-NEXT: ldrsh r4, [r1], #2
-; CHECK-NEXT: smulbb r3, r4, r3
-; CHECK-NEXT: asr r4, r3, #14
-; CHECK-NEXT: cmn r4, #32768
-; CHECK-NEXT: mov r4, r12
-; CHECK-NEXT: asrgt r4, r3, #14
-; CHECK-NEXT: movw r3, #32767
-; CHECK-NEXT: cmp r4, r3
-; CHECK-NEXT: movge r4, r3
-; CHECK-NEXT: mov r3, lr
-; CHECK-NEXT: strh r4, [r2], #2
+; CHECK-NEXT: ldrsh lr, [r0], #2
+; CHECK-NEXT: ldrsh r3, [r1], #2
+; CHECK-NEXT: smulbb r3, r3, lr
+; CHECK-NEXT: ssat r3, #16, r3, asr #14
+; CHECK-NEXT: strh r3, [r2], #2
+; CHECK-NEXT: mov r3, r12
; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit
-; CHECK-NEXT: cmp lr, #0
-; CHECK-NEXT: beq .LBB1_6
-; CHECK-NEXT: @ %bb.4: @ %while.body.preheader1
-; CHECK-NEXT: movw lr, #32767
-; CHECK-NEXT: .LBB1_5: @ %while.body
+; CHECK-NEXT: cmp r12, #0
+; CHECK-NEXT: popeq {r11, pc}
+; CHECK-NEXT: .LBB1_4: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrsh r4, [r0]
-; CHECK-NEXT: ldrsh r5, [r1]
-; CHECK-NEXT: smulbb r4, r5, r4
-; CHECK-NEXT: asr r5, r4, #14
-; CHECK-NEXT: cmn r5, #32768
-; CHECK-NEXT: mov r5, r12
-; CHECK-NEXT: asrgt r5, r4, #14
-; CHECK-NEXT: cmp r5, lr
-; CHECK-NEXT: movge r5, lr
-; CHECK-NEXT: strh r5, [r2]
-; CHECK-NEXT: ldrsh r4, [r0, #2]
+; CHECK-NEXT: ldrsh r12, [r0]
+; CHECK-NEXT: subs r3, r3, #2
+; CHECK-NEXT: ldrsh lr, [r1]
+; CHECK-NEXT: smulbb r12, lr, r12
+; CHECK-NEXT: ssat r12, #16, r12, asr #14
+; CHECK-NEXT: strh r12, [r2]
+; CHECK-NEXT: ldrsh r12, [r0, #2]
; CHECK-NEXT: add r0, r0, #4
-; CHECK-NEXT: ldrsh r5, [r1, #2]
+; CHECK-NEXT: ldrsh lr, [r1, #2]
; CHECK-NEXT: add r1, r1, #4
-; CHECK-NEXT: smulbb r4, r5, r4
-; CHECK-NEXT: asr r5, r4, #14
-; CHECK-NEXT: cmn r5, #32768
-; CHECK-NEXT: mov r5, r12
-; CHECK-NEXT: asrgt r5, r4, #14
-; CHECK-NEXT: cmp r5, lr
-; CHECK-NEXT: movge r5, lr
-; CHECK-NEXT: subs r3, r3, #2
-; CHECK-NEXT: strh r5, [r2, #2]
+; CHECK-NEXT: smulbb r12, lr, r12
+; CHECK-NEXT: ssat r12, #16, r12, asr #14
+; CHECK-NEXT: strh r12, [r2, #2]
; CHECK-NEXT: add r2, r2, #4
-; CHECK-NEXT: bne .LBB1_5
-; CHECK-NEXT: .LBB1_6: @ %while.end
-; CHECK-NEXT: pop {r4, r5, r11, pc}
+; CHECK-NEXT: bne .LBB1_4
+; CHECK-NEXT: .LBB1_5: @ %while.end
+; CHECK-NEXT: pop {r11, pc}
entry:
%cmp.not7 = icmp eq i32 %blockSize, 0
br i1 %cmp.not7, label %while.end, label %while.body.preheader
diff --git a/llvm/test/CodeGen/ARM/ssat.ll b/llvm/test/CodeGen/ARM/ssat.ll
index ff16b59489f5..f792a38987af 100644
--- a/llvm/test/CodeGen/ARM/ssat.ll
+++ b/llvm/test/CodeGen/ARM/ssat.ll
@@ -669,14 +669,7 @@ define i32 @mm_sat_base_32bit(i32 %x) {
;
; V6T2-LABEL: mm_sat_base_32bit:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movge r0, r1
-; V6T2-NEXT: movw r1, #0
-; V6T2-NEXT: movt r1, #65408
-; V6T2-NEXT: cmn r0, #8388608
-; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: ssat r0, #24, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
@@ -705,13 +698,7 @@ define i16 @mm_sat_base_16bit(i16 %x) {
; V6T2-LABEL: mm_sat_base_16bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: sxth r0, r0
-; V6T2-NEXT: movw r1, #2047
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movlt r1, r0
-; V6T2-NEXT: movw r0, #63488
-; V6T2-NEXT: movt r0, #65535
-; V6T2-NEXT: cmn r1, #2048
-; V6T2-NEXT: movgt r0, r1
+; V6T2-NEXT: ssat r0, #12, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i16 @llvm.smin.i16(i16 %x, i16 2047)
@@ -734,10 +721,7 @@ define i8 @mm_sat_base_8bit(i8 %x) {
; V6T2-LABEL: mm_sat_base_8bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: sxtb r0, r0
-; V6T2-NEXT: cmp r0, #31
-; V6T2-NEXT: movge r0, #31
-; V6T2-NEXT: cmn r0, #32
-; V6T2-NEXT: mvnle r0, #31
+; V6T2-NEXT: ssat r0, #6, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i8 @llvm.smin.i8(i8 %x, i8 31)
@@ -763,14 +747,7 @@ define i32 @mm_sat_lower_upper_1(i32 %x) {
;
; V6T2-LABEL: mm_sat_lower_upper_1:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movge r0, r1
-; V6T2-NEXT: movw r1, #0
-; V6T2-NEXT: movt r1, #65408
-; V6T2-NEXT: cmn r0, #8388608
-; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: ssat r0, #24, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
@@ -796,14 +773,7 @@ define i32 @mm_sat_lower_upper_2(i32 %x) {
;
; V6T2-LABEL: mm_sat_lower_upper_2:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movge r0, r1
-; V6T2-NEXT: movw r1, #0
-; V6T2-NEXT: movt r1, #65408
-; V6T2-NEXT: cmn r0, #8388608
-; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: ssat r0, #24, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
@@ -829,14 +799,7 @@ define i32 @mm_sat_upper_lower_1(i32 %x) {
;
; V6T2-LABEL: mm_sat_upper_lower_1:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #0
-; V6T2-NEXT: cmn r0, #8388608
-; V6T2-NEXT: movt r1, #65408
-; V6T2-NEXT: movle r0, r1
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: ssat r0, #24, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
@@ -862,14 +825,7 @@ define i32 @mm_sat_upper_lower_2(i32 %x) {
;
; V6T2-LABEL: mm_sat_upper_lower_2:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #0
-; V6T2-NEXT: cmn r0, #8388608
-; V6T2-NEXT: movt r1, #65408
-; V6T2-NEXT: movle r0, r1
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: ssat r0, #24, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
@@ -895,14 +851,7 @@ define i32 @mm_sat_upper_lower_3(i32 %x) {
;
; V6T2-LABEL: mm_sat_upper_lower_3:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #0
-; V6T2-NEXT: cmn r0, #8388608
-; V6T2-NEXT: movt r1, #65408
-; V6T2-NEXT: movle r0, r1
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: ssat r0, #24, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
@@ -928,14 +877,7 @@ define i32 @mm_sat_le_ge(i32 %x) {
;
; V6T2-LABEL: mm_sat_le_ge:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #0
-; V6T2-NEXT: cmn r0, #8388608
-; V6T2-NEXT: movt r1, #65408
-; V6T2-NEXT: movle r0, r1
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: ssat r0, #24, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll
index 30d7a683654a..1bafba3b49ed 100644
--- a/llvm/test/CodeGen/ARM/ssub_sat.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat.ll
@@ -147,15 +147,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func16:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
-; CHECK-T2NODSP-NEXT: movw r1, #32767
-; CHECK-T2NODSP-NEXT: cmp r0, r1
-; CHECK-T2NODSP-NEXT: it lt
-; CHECK-T2NODSP-NEXT: movlt r1, r0
-; CHECK-T2NODSP-NEXT: movw r0, #32768
-; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
-; CHECK-T2NODSP-NEXT: movt r0, #65535
-; CHECK-T2NODSP-NEXT: it gt
-; CHECK-T2NODSP-NEXT: movgt r0, r1
+; CHECK-T2NODSP-NEXT: ssat r0, #16, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func16:
@@ -218,12 +210,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func8:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
-; CHECK-T2NODSP-NEXT: cmp r0, #127
-; CHECK-T2NODSP-NEXT: it ge
-; CHECK-T2NODSP-NEXT: movge r0, #127
-; CHECK-T2NODSP-NEXT: cmn.w r0, #128
-; CHECK-T2NODSP-NEXT: it le
-; CHECK-T2NODSP-NEXT: mvnle r0, #127
+; CHECK-T2NODSP-NEXT: ssat r0, #8, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func8:
@@ -279,12 +266,7 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; CHECK-T2NODSP-LABEL: func3:
; CHECK-T2NODSP: @ %bb.0:
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
-; CHECK-T2NODSP-NEXT: cmp r0, #7
-; CHECK-T2NODSP-NEXT: it ge
-; CHECK-T2NODSP-NEXT: movge r0, #7
-; CHECK-T2NODSP-NEXT: cmn.w r0, #8
-; CHECK-T2NODSP-NEXT: it le
-; CHECK-T2NODSP-NEXT: mvnle r0, #7
+; CHECK-T2NODSP-NEXT: ssat r0, #4, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func3:
diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
index 5bf7b326c5b9..0a2d1f0e7a24 100644
--- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll
@@ -151,15 +151,7 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: sxth r1, r1
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
-; CHECK-T2NODSP-NEXT: movw r1, #32767
-; CHECK-T2NODSP-NEXT: cmp r0, r1
-; CHECK-T2NODSP-NEXT: it lt
-; CHECK-T2NODSP-NEXT: movlt r1, r0
-; CHECK-T2NODSP-NEXT: movw r0, #32768
-; CHECK-T2NODSP-NEXT: movt r0, #65535
-; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
-; CHECK-T2NODSP-NEXT: it gt
-; CHECK-T2NODSP-NEXT: movgt r0, r1
+; CHECK-T2NODSP-NEXT: ssat r0, #16, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func16:
@@ -205,12 +197,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: sxtb r1, r1
; CHECK-T2NODSP-NEXT: subs r0, r0, r1
-; CHECK-T2NODSP-NEXT: cmp r0, #127
-; CHECK-T2NODSP-NEXT: it ge
-; CHECK-T2NODSP-NEXT: movge r0, #127
-; CHECK-T2NODSP-NEXT: cmn.w r0, #128
-; CHECK-T2NODSP-NEXT: it le
-; CHECK-T2NODSP-NEXT: mvnle r0, #127
+; CHECK-T2NODSP-NEXT: ssat r0, #8, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func8:
@@ -257,12 +244,7 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
; CHECK-T2NODSP-NEXT: muls r1, r2, r1
; CHECK-T2NODSP-NEXT: lsls r1, r1, #28
; CHECK-T2NODSP-NEXT: sub.w r0, r0, r1, asr #28
-; CHECK-T2NODSP-NEXT: cmp r0, #7
-; CHECK-T2NODSP-NEXT: it ge
-; CHECK-T2NODSP-NEXT: movge r0, #7
-; CHECK-T2NODSP-NEXT: cmn.w r0, #8
-; CHECK-T2NODSP-NEXT: it le
-; CHECK-T2NODSP-NEXT: mvnle r0, #7
+; CHECK-T2NODSP-NEXT: ssat r0, #4, r0
; CHECK-T2NODSP-NEXT: bx lr
;
; CHECK-T2DSP-LABEL: func4:
diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll
index 077aa9de317d..024a98dd2934 100644
--- a/llvm/test/CodeGen/ARM/usat.ll
+++ b/llvm/test/CodeGen/ARM/usat.ll
@@ -624,23 +624,12 @@ define i32 @mm_unsigned_sat_base_32bit(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_base_32bit:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: ldr r1, .LCPI15_0
-; V6-NEXT: cmp r0, r1
-; V6-NEXT: movlt r1, r0
-; V6-NEXT: bic r0, r1, r1, asr #31
+; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
-; V6-NEXT: .p2align 2
-; V6-NEXT: @ %bb.1:
-; V6-NEXT: .LCPI15_0:
-; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_base_32bit:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movlt r1, r0
-; V6T2-NEXT: bic r0, r1, r1, asr #31
+; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
@@ -662,21 +651,14 @@ define i16 @mm_unsigned_sat_base_16bit(i16 %x) {
;
; V6-LABEL: mm_unsigned_sat_base_16bit:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: mov r1, #255
; V6-NEXT: sxth r0, r0
-; V6-NEXT: orr r1, r1, #1792
-; V6-NEXT: cmp r0, r1
-; V6-NEXT: movlt r1, r0
-; V6-NEXT: bic r0, r1, r1, asr #31
+; V6-NEXT: usat r0, #11, r0
; V6-NEXT: bx lr
;
; V6T2-LABEL: mm_unsigned_sat_base_16bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: sxth r0, r0
-; V6T2-NEXT: movw r1, #2047
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movlt r1, r0
-; V6T2-NEXT: bic r0, r1, r1, asr #31
+; V6T2-NEXT: usat r0, #11, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i16 @llvm.smin.i16(i16 %x, i16 2047)
@@ -698,17 +680,13 @@ define i8 @mm_unsigned_sat_base_8bit(i8 %x) {
; V6-LABEL: mm_unsigned_sat_base_8bit:
; V6: @ %bb.0: @ %entry
; V6-NEXT: sxtb r0, r0
-; V6-NEXT: cmp r0, #31
-; V6-NEXT: movge r0, #31
-; V6-NEXT: bic r0, r0, r0, asr #31
+; V6-NEXT: usat r0, #5, r0
; V6-NEXT: bx lr
;
; V6T2-LABEL: mm_unsigned_sat_base_8bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: sxtb r0, r0
-; V6T2-NEXT: cmp r0, #31
-; V6T2-NEXT: movge r0, #31
-; V6T2-NEXT: bic r0, r0, r0, asr #31
+; V6T2-NEXT: usat r0, #5, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i8 @llvm.smin.i8(i8 %x, i8 31)
@@ -731,23 +709,12 @@ define i32 @mm_unsigned_sat_lower_upper_1(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_lower_upper_1:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: ldr r1, .LCPI18_0
-; V6-NEXT: cmp r0, r1
-; V6-NEXT: movlt r1, r0
-; V6-NEXT: bic r0, r1, r1, asr #31
+; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
-; V6-NEXT: .p2align 2
-; V6-NEXT: @ %bb.1:
-; V6-NEXT: .LCPI18_0:
-; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_lower_upper_1:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movlt r1, r0
-; V6T2-NEXT: bic r0, r1, r1, asr #31
+; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
@@ -770,23 +737,12 @@ define i32 @mm_unsigned_sat_lower_upper_2(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_lower_upper_2:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: ldr r1, .LCPI19_0
-; V6-NEXT: cmp r0, r1
-; V6-NEXT: movlt r1, r0
-; V6-NEXT: bic r0, r1, r1, asr #31
+; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
-; V6-NEXT: .p2align 2
-; V6-NEXT: @ %bb.1:
-; V6-NEXT: .LCPI19_0:
-; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_lower_upper_2:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: movw r1, #65535
-; V6T2-NEXT: movt r1, #127
-; V6T2-NEXT: cmp r0, r1
-; V6T2-NEXT: movlt r1, r0
-; V6T2-NEXT: bic r0, r1, r1, asr #31
+; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
More information about the llvm-commits
mailing list