[llvm] 8bd6d36 - [ARM] Override hasAndNotCompare (#145441)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 29 03:15:59 PDT 2025
Author: AZero13
Date: 2025-06-29T11:15:56+01:00
New Revision: 8bd6d36a44134f23000762f3cb192a325c4cfd91
URL: https://github.com/llvm/llvm-project/commit/8bd6d36a44134f23000762f3cb192a325c4cfd91
DIFF: https://github.com/llvm/llvm-project/commit/8bd6d36a44134f23000762f3cb192a325c4cfd91.diff
LOG: [ARM] Override hasAndNotCompare (#145441)
bics is available on ARM.
USAT regressions are to be fixed after this because that is an issue
with the ARMISelLowering and should be another PR.
Note that opt optimizes those testcases to min/max intrinsics anyway so
this should have no real effect on codegen.
Proof: https://alive2.llvm.org/ce/z/kPVQ3_
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/test/CodeGen/ARM/fpclamptosat.ll
llvm/test/CodeGen/ARM/usat-with-shift.ll
llvm/test/CodeGen/ARM/usat.ll
llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 9c330e60a7d54..604910e04d4cc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -609,6 +609,11 @@ class VectorType;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+ bool hasAndNotCompare(SDValue V) const override {
+ // We can use bics for any scalar.
+ return V.getValueType().isScalarInteger();
+ }
+
bool
isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 478b98dfac80f..8ab56b228d2a7 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -613,14 +613,21 @@ define i16 @ustest_f64i16(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2iz
-; VFP2-NEXT: usat r0, #16, r0
+; VFP2-NEXT: movw r1, #65535
+; VFP2-NEXT: cmp r0, r1
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r1, r0
+; VFP2-NEXT: bic.w r0, r1, r1, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f64i16:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f64 s0, d0
+; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: usat r0, #16, r0
+; FULL-NEXT: cmp r0, r1
+; FULL-NEXT: csel r0, r0, r1, lt
+; FULL-NEXT: bic.w r0, r0, r0, asr #31
; FULL-NEXT: bx lr
entry:
%conv = fptosi double %x to i32
@@ -738,12 +745,26 @@ define i16 @ustest_f32i16(float %x) {
; SOFT-NEXT: .LCPI14_0:
; SOFT-NEXT: .long 65535 @ 0xffff
;
-; VFP-LABEL: ustest_f32i16:
-; VFP: @ %bb.0: @ %entry
-; VFP-NEXT: vcvt.s32.f32 s0, s0
-; VFP-NEXT: vmov r0, s0
-; VFP-NEXT: usat r0, #16, r0
-; VFP-NEXT: bx lr
+; VFP2-LABEL: ustest_f32i16:
+; VFP2: @ %bb.0: @ %entry
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: movw r1, #65535
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: cmp r0, r1
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r1, r0
+; VFP2-NEXT: bic.w r0, r1, r1, asr #31
+; VFP2-NEXT: bx lr
+;
+; FULL-LABEL: ustest_f32i16:
+; FULL: @ %bb.0: @ %entry
+; FULL-NEXT: vcvt.s32.f32 s0, s0
+; FULL-NEXT: movw r1, #65535
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: cmp r0, r1
+; FULL-NEXT: csel r0, r0, r1, lt
+; FULL-NEXT: bic.w r0, r0, r0, asr #31
+; FULL-NEXT: bx lr
entry:
%conv = fptosi float %x to i32
%0 = icmp slt i32 %conv, 65535
@@ -890,16 +911,23 @@ define i16 @ustest_f16i16(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: movw r1, #65535
; VFP2-NEXT: vcvt.s32.f32 s0, s0
; VFP2-NEXT: vmov r0, s0
-; VFP2-NEXT: usat r0, #16, r0
+; VFP2-NEXT: cmp r0, r1
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r1, r0
+; VFP2-NEXT: bic.w r0, r1, r1, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f16i16:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f16 s0, s0
+; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: usat r0, #16, r0
+; FULL-NEXT: cmp r0, r1
+; FULL-NEXT: csel r0, r0, r1, lt
+; FULL-NEXT: bic.w r0, r0, r0, asr #31
; FULL-NEXT: bx lr
entry:
%conv = fptosi half %x to i32
@@ -1101,83 +1129,48 @@ entry:
define i64 @ustest_f64i64(double %x) {
; SOFT-LABEL: ustest_f64i64:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, r5, r6, lr}
-; SOFT-NEXT: push {r4, r5, r6, lr}
+; SOFT-NEXT: .save {r4, lr}
+; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __fixdfti
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: subs r6, r2, #1
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB20_9
+; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: subs r2, r2, #1
+; SOFT-NEXT: mov r2, r3
+; SOFT-NEXT: sbcs r2, r4
+; SOFT-NEXT: bge .LBB20_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB20_10
+; SOFT-NEXT: bge .LBB20_6
; SOFT-NEXT: .LBB20_2: @ %entry
-; SOFT-NEXT: bge .LBB20_11
+; SOFT-NEXT: blt .LBB20_4
; SOFT-NEXT: .LBB20_3: @ %entry
-; SOFT-NEXT: blt .LBB20_5
+; SOFT-NEXT: mov r3, r4
; SOFT-NEXT: .LBB20_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: asrs r2, r3, #31
+; SOFT-NEXT: bics r0, r2
+; SOFT-NEXT: bics r1, r2
+; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .LBB20_5: @ %entry
-; SOFT-NEXT: rsbs r6, r0, #0
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r1
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB20_12
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB20_13
-; SOFT-NEXT: .LBB20_7: @ %entry
-; SOFT-NEXT: beq .LBB20_14
-; SOFT-NEXT: .LBB20_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
-; SOFT-NEXT: .LBB20_9: @ %entry
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: blt .LBB20_2
-; SOFT-NEXT: .LBB20_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB20_3
-; SOFT-NEXT: .LBB20_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB20_4
-; SOFT-NEXT: b .LBB20_5
-; SOFT-NEXT: .LBB20_12: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB20_7
-; SOFT-NEXT: .LBB20_13: @ %entry
+; SOFT-NEXT: .LBB20_6: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB20_8
-; SOFT-NEXT: .LBB20_14: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: bge .LBB20_3
+; SOFT-NEXT: b .LBB20_4
;
; VFP2-LABEL: ustest_f64i64:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __fixdfti
-; VFP2-NEXT: subs.w lr, r2, #1
+; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
-; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
+; VFP2-NEXT: sbcs r2, r3, #0
+; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: rsbs.w lr, r0, #0
-; VFP2-NEXT: sbcs.w lr, r12, r1
-; VFP2-NEXT: sbcs.w r2, r12, r2
-; VFP2-NEXT: sbcs.w r2, r12, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: bic.w r0, r0, r3, asr #31
+; VFP2-NEXT: bic.w r1, r1, r3, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f64i64:
@@ -1185,22 +1178,14 @@ define i64 @ustest_f64i64(double %x) {
; FULL-NEXT: .save {r7, lr}
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: bl __fixdfti
-; FULL-NEXT: subs.w lr, r2, #1
+; FULL-NEXT: subs r2, #1
; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
+; FULL-NEXT: sbcs r2, r3, #0
+; FULL-NEXT: csel r2, r3, r12, lt
; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: bic.w r0, r0, r2, asr #31
+; FULL-NEXT: bic.w r1, r1, r2, asr #31
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptosi double %x to i128
@@ -1400,83 +1385,48 @@ entry:
define i64 @ustest_f32i64(float %x) {
; SOFT-LABEL: ustest_f32i64:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, r5, r6, lr}
-; SOFT-NEXT: push {r4, r5, r6, lr}
+; SOFT-NEXT: .save {r4, lr}
+; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: subs r6, r2, #1
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB23_9
+; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: subs r2, r2, #1
+; SOFT-NEXT: mov r2, r3
+; SOFT-NEXT: sbcs r2, r4
+; SOFT-NEXT: bge .LBB23_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB23_10
+; SOFT-NEXT: bge .LBB23_6
; SOFT-NEXT: .LBB23_2: @ %entry
-; SOFT-NEXT: bge .LBB23_11
+; SOFT-NEXT: blt .LBB23_4
; SOFT-NEXT: .LBB23_3: @ %entry
-; SOFT-NEXT: blt .LBB23_5
+; SOFT-NEXT: mov r3, r4
; SOFT-NEXT: .LBB23_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: asrs r2, r3, #31
+; SOFT-NEXT: bics r0, r2
+; SOFT-NEXT: bics r1, r2
+; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .LBB23_5: @ %entry
-; SOFT-NEXT: rsbs r6, r0, #0
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r1
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB23_12
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB23_13
-; SOFT-NEXT: .LBB23_7: @ %entry
-; SOFT-NEXT: beq .LBB23_14
-; SOFT-NEXT: .LBB23_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
-; SOFT-NEXT: .LBB23_9: @ %entry
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: blt .LBB23_2
-; SOFT-NEXT: .LBB23_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB23_3
-; SOFT-NEXT: .LBB23_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB23_4
-; SOFT-NEXT: b .LBB23_5
-; SOFT-NEXT: .LBB23_12: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB23_7
-; SOFT-NEXT: .LBB23_13: @ %entry
+; SOFT-NEXT: .LBB23_6: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB23_8
-; SOFT-NEXT: .LBB23_14: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: bge .LBB23_3
+; SOFT-NEXT: b .LBB23_4
;
; VFP2-LABEL: ustest_f32i64:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __fixsfti
-; VFP2-NEXT: subs.w lr, r2, #1
+; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
-; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
+; VFP2-NEXT: sbcs r2, r3, #0
+; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: rsbs.w lr, r0, #0
-; VFP2-NEXT: sbcs.w lr, r12, r1
-; VFP2-NEXT: sbcs.w r2, r12, r2
-; VFP2-NEXT: sbcs.w r2, r12, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: bic.w r0, r0, r3, asr #31
+; VFP2-NEXT: bic.w r1, r1, r3, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f32i64:
@@ -1484,22 +1434,14 @@ define i64 @ustest_f32i64(float %x) {
; FULL-NEXT: .save {r7, lr}
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: bl __fixsfti
-; FULL-NEXT: subs.w lr, r2, #1
+; FULL-NEXT: subs r2, #1
; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
+; FULL-NEXT: sbcs r2, r3, #0
+; FULL-NEXT: csel r2, r3, r12, lt
; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: bic.w r0, r0, r2, asr #31
+; FULL-NEXT: bic.w r1, r1, r2, asr #31
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptosi float %x to i128
@@ -1713,61 +1655,34 @@ entry:
define i64 @ustest_f16i64(half %x) {
; SOFT-LABEL: ustest_f16i64:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, r5, r6, lr}
-; SOFT-NEXT: push {r4, r5, r6, lr}
+; SOFT-NEXT: .save {r4, lr}
+; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: subs r6, r2, #1
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB26_9
+; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: subs r2, r2, #1
+; SOFT-NEXT: mov r2, r3
+; SOFT-NEXT: sbcs r2, r4
+; SOFT-NEXT: bge .LBB26_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB26_10
+; SOFT-NEXT: bge .LBB26_6
; SOFT-NEXT: .LBB26_2: @ %entry
-; SOFT-NEXT: bge .LBB26_11
+; SOFT-NEXT: blt .LBB26_4
; SOFT-NEXT: .LBB26_3: @ %entry
-; SOFT-NEXT: blt .LBB26_5
+; SOFT-NEXT: mov r3, r4
; SOFT-NEXT: .LBB26_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: asrs r2, r3, #31
+; SOFT-NEXT: bics r0, r2
+; SOFT-NEXT: bics r1, r2
+; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .LBB26_5: @ %entry
-; SOFT-NEXT: rsbs r6, r0, #0
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r1
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB26_12
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB26_13
-; SOFT-NEXT: .LBB26_7: @ %entry
-; SOFT-NEXT: beq .LBB26_14
-; SOFT-NEXT: .LBB26_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
-; SOFT-NEXT: .LBB26_9: @ %entry
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: blt .LBB26_2
-; SOFT-NEXT: .LBB26_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB26_3
-; SOFT-NEXT: .LBB26_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB26_4
-; SOFT-NEXT: b .LBB26_5
-; SOFT-NEXT: .LBB26_12: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB26_7
-; SOFT-NEXT: .LBB26_13: @ %entry
+; SOFT-NEXT: .LBB26_6: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB26_8
-; SOFT-NEXT: .LBB26_14: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: bge .LBB26_3
+; SOFT-NEXT: b .LBB26_4
;
; VFP2-LABEL: ustest_f16i64:
; VFP2: @ %bb.0: @ %entry
@@ -1777,24 +1692,16 @@ define i64 @ustest_f16i64(half %x) {
; VFP2-NEXT: bl __aeabi_h2f
; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: bl __fixsfti
-; VFP2-NEXT: subs.w lr, r2, #1
+; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
-; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
+; VFP2-NEXT: sbcs r2, r3, #0
+; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: rsbs.w lr, r0, #0
-; VFP2-NEXT: sbcs.w lr, r12, r1
-; VFP2-NEXT: sbcs.w r2, r12, r2
-; VFP2-NEXT: sbcs.w r2, r12, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: bic.w r0, r0, r3, asr #31
+; VFP2-NEXT: bic.w r1, r1, r3, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f16i64:
@@ -1804,22 +1711,14 @@ define i64 @ustest_f16i64(half %x) {
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
-; FULL-NEXT: subs.w lr, r2, #1
+; FULL-NEXT: subs r2, #1
; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
+; FULL-NEXT: sbcs r2, r3, #0
+; FULL-NEXT: csel r2, r3, r12, lt
; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: bic.w r0, r0, r2, asr #31
+; FULL-NEXT: bic.w r1, r1, r2, asr #31
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptosi half %x to i128
diff --git a/llvm/test/CodeGen/ARM/usat-with-shift.ll b/llvm/test/CodeGen/ARM/usat-with-shift.ll
index cc3de9d6d3407..b9c083e498c0c 100644
--- a/llvm/test/CodeGen/ARM/usat-with-shift.ll
+++ b/llvm/test/CodeGen/ARM/usat-with-shift.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,ARMV6
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s --check-prefixes=CHECK,THUMB
define arm_aapcs_vfpcc i32 @usat_lsl(i32 %num){
-; CHECK-LABEL: usat_lsl
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: usat r0, #7, r0, lsl #2
-; CHECK-NEXT: bx lr
+; CHECK-LABEL: usat_lsl:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: usat r0, #7, r0, lsl #2
+; CHECK-NEXT: bx lr
entry:
%shl = shl i32 %num, 2
%0 = tail call i32 @llvm.arm.usat(i32 %shl, i32 7)
@@ -14,10 +14,10 @@ entry:
}
define arm_aapcs_vfpcc i32 @usat_asr(i32 %num){
-; CHECK-LABEL: usat_asr
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: usat r0, #7, r0, asr #2
-; CHECK-NEXT: bx lr
+; CHECK-LABEL: usat_asr:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: usat r0, #7, r0, asr #2
+; CHECK-NEXT: bx lr
entry:
%shr = ashr i32 %num, 2
%0 = tail call i32 @llvm.arm.usat(i32 %shr, i32 7)
@@ -25,10 +25,24 @@ entry:
}
define arm_aapcs_vfpcc i32 @usat_lsl2(i32 %num){
-; CHECK-LABEL: usat_lsl2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: usat r0, #15, r0, lsl #15
-; CHECK-NEXT: bx lr
+; ARMV6-LABEL: usat_lsl2:
+; ARMV6: @ %bb.0: @ %entry
+; ARMV6-NEXT: lsl r0, r0, #15
+; ARMV6-NEXT: bic r1, r0, r0, asr #31
+; ARMV6-NEXT: mov r0, #255
+; ARMV6-NEXT: orr r0, r0, #32512
+; ARMV6-NEXT: cmp r1, r0
+; ARMV6-NEXT: movlt r0, r1
+; ARMV6-NEXT: bx lr
+;
+; THUMB-LABEL: usat_lsl2:
+; THUMB: @ %bb.0: @ %entry
+; THUMB-NEXT: lsls r0, r0, #15
+; THUMB-NEXT: movw r1, #32767
+; THUMB-NEXT: bic.w r0, r0, r0, asr #31
+; THUMB-NEXT: cmp r0, r1
+; THUMB-NEXT: csel r0, r0, r1, lt
+; THUMB-NEXT: bx lr
entry:
%shl = shl nsw i32 %num, 15
%0 = icmp sgt i32 %shl, 0
@@ -39,10 +53,24 @@ entry:
}
define arm_aapcs_vfpcc i32 @usat_asr2(i32 %num){
-; CHECK-LABEL: usat_asr2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: usat r0, #15, r0, asr #15
-; CHECK-NEXT: bx lr
+; ARMV6-LABEL: usat_asr2:
+; ARMV6: @ %bb.0: @ %entry
+; ARMV6-NEXT: asr r1, r0, #15
+; ARMV6-NEXT: bic r1, r1, r0, asr #31
+; ARMV6-NEXT: mov r0, #255
+; ARMV6-NEXT: orr r0, r0, #32512
+; ARMV6-NEXT: cmp r1, r0
+; ARMV6-NEXT: movlt r0, r1
+; ARMV6-NEXT: bx lr
+;
+; THUMB-LABEL: usat_asr2:
+; THUMB: @ %bb.0: @ %entry
+; THUMB-NEXT: asrs r1, r0, #15
+; THUMB-NEXT: bic.w r0, r1, r0, asr #31
+; THUMB-NEXT: movw r1, #32767
+; THUMB-NEXT: cmp r0, r1
+; THUMB-NEXT: csel r0, r0, r1, lt
+; THUMB-NEXT: bx lr
entry:
%shr = ashr i32 %num, 15
%0 = icmp sgt i32 %shr, 0
diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll
index d01aa1520b326..2e1d0283ebde2 100644
--- a/llvm/test/CodeGen/ARM/usat.ll
+++ b/llvm/test/CodeGen/ARM/usat.ll
@@ -32,12 +32,23 @@ define i32 @unsigned_sat_base_32bit(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_base_32bit:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: ldr r1, .LCPI0_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI0_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_base_32bit:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = icmp slt i32 %x, 8388607
@@ -57,11 +68,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 {
; V4T-NEXT: orr r2, r2, #1792
; V4T-NEXT: asr r1, r1, #16
; V4T-NEXT: cmp r1, r2
-; V4T-NEXT: movge r0, r2
-; V4T-NEXT: lsl r1, r0, #16
-; V4T-NEXT: asr r1, r1, #16
-; V4T-NEXT: cmp r1, #0
-; V4T-NEXT: movle r0, #0
+; V4T-NEXT: movlt r2, r0
+; V4T-NEXT: lsl r0, r2, #16
+; V4T-NEXT: bic r0, r2, r0, asr #31
; V4T-NEXT: bx lr
;
; V6-LABEL: unsigned_sat_base_16bit:
@@ -70,10 +79,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 {
; V6-NEXT: sxth r1, r0
; V6-NEXT: orr r2, r2, #1792
; V6-NEXT: cmp r1, r2
-; V6-NEXT: movge r0, r2
-; V6-NEXT: sxth r1, r0
-; V6-NEXT: cmp r1, #0
-; V6-NEXT: movle r0, #0
+; V6-NEXT: movlt r2, r0
+; V6-NEXT: sxth r0, r2
+; V6-NEXT: bic r0, r2, r0, asr #15
; V6-NEXT: bx lr
;
; V6T2-LABEL: unsigned_sat_base_16bit:
@@ -81,10 +89,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 {
; V6T2-NEXT: sxth r1, r0
; V6T2-NEXT: movw r2, #2047
; V6T2-NEXT: cmp r1, r2
-; V6T2-NEXT: movge r0, r2
-; V6T2-NEXT: sxth r1, r0
-; V6T2-NEXT: cmp r1, #0
-; V6T2-NEXT: movle r0, #0
+; V6T2-NEXT: movlt r2, r0
+; V6T2-NEXT: sxth r0, r2
+; V6T2-NEXT: bic r0, r2, r0, asr #15
; V6T2-NEXT: bx lr
entry:
%0 = icmp slt i16 %x, 2047
@@ -104,9 +111,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 {
; V4T-NEXT: cmp r1, #31
; V4T-NEXT: movge r0, #31
; V4T-NEXT: lsl r1, r0, #24
-; V4T-NEXT: asr r1, r1, #24
-; V4T-NEXT: cmp r1, #0
-; V4T-NEXT: movle r0, #0
+; V4T-NEXT: bic r0, r0, r1, asr #31
; V4T-NEXT: bx lr
;
; V6-LABEL: unsigned_sat_base_8bit:
@@ -115,8 +120,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 {
; V6-NEXT: cmp r1, #31
; V6-NEXT: movge r0, #31
; V6-NEXT: sxtb r1, r0
-; V6-NEXT: cmp r1, #0
-; V6-NEXT: movle r0, #0
+; V6-NEXT: bic r0, r0, r1, asr #7
; V6-NEXT: bx lr
;
; V6T2-LABEL: unsigned_sat_base_8bit:
@@ -125,8 +129,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 {
; V6T2-NEXT: cmp r1, #31
; V6T2-NEXT: movge r0, #31
; V6T2-NEXT: sxtb r1, r0
-; V6T2-NEXT: cmp r1, #0
-; V6T2-NEXT: movle r0, #0
+; V6T2-NEXT: bic r0, r0, r1, asr #7
; V6T2-NEXT: bx lr
entry:
%0 = icmp slt i8 %x, 31
@@ -157,12 +160,23 @@ define i32 @unsigned_sat_lower_upper_1(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_lower_upper_1:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: ldr r1, .LCPI3_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI3_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_lower_upper_1:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%cmpUp = icmp slt i32 %x, 8388607
@@ -188,12 +202,23 @@ define i32 @unsigned_sat_lower_upper_2(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_lower_upper_2:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: ldr r1, .LCPI4_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI4_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_lower_upper_2:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = icmp slt i32 %x, 8388607
@@ -219,12 +244,23 @@ define i32 @unsigned_sat_upper_lower_1(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_upper_lower_1:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI5_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlt r0, r1
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI5_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_upper_lower_1:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlt r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = icmp sgt i32 %x, 0
@@ -250,12 +286,23 @@ define i32 @unsigned_sat_upper_lower_2(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_upper_lower_2:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI6_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlt r0, r1
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI6_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_upper_lower_2:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlt r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = icmp sgt i32 %x, 0
@@ -281,12 +328,23 @@ define i32 @unsigned_sat_upper_lower_3(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_upper_lower_3:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI7_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlt r0, r1
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI7_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_upper_lower_3:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlt r0, r1
; V6T2-NEXT: bx lr
entry:
%cmpLow = icmp sgt i32 %x, 0
diff --git a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
index 726237eb27f2d..024de2b36667b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
@@ -282,12 +282,12 @@ define arm_aapcs_vfpcc <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64>
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov.i32 q2, #0x0
-; CHECK-NEXT: cmp.w r1, #-1
-; CHECK-NEXT: csetm r1, gt
+; CHECK-NEXT: mvns r1, r1
+; CHECK-NEXT: asrs r1, r1, #31
; CHECK-NEXT: bfi r0, r1, #0, #8
; CHECK-NEXT: vmov r1, s3
-; CHECK-NEXT: cmp.w r1, #-1
-; CHECK-NEXT: csetm r1, gt
+; CHECK-NEXT: mvns r1, r1
+; CHECK-NEXT: asrs r1, r1, #31
; CHECK-NEXT: bfi r0, r1, #8, #8
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q1, q2
More information about the llvm-commits
mailing list