[llvm] [ARM] Override hasAndNotCompare (PR #145441)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 28 08:04:42 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/145441
>From 8c4d687fdb6b459b3b4405bbf4da47ceb3683410 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Mon, 23 Jun 2025 17:19:15 -0400
Subject: [PATCH 1/2] [ARM] Override hasAndNotCompare
bics is available on ARM.
---
llvm/lib/Target/ARM/ARMISelLowering.h | 5 +
llvm/test/CodeGen/ARM/fpclamptosat.ll | 357 +++++++-----------
llvm/test/CodeGen/ARM/usat-with-shift.ll | 24 +-
llvm/test/CodeGen/ARM/usat.ll | 122 ++++--
.../CodeGen/Thumb2/mve-vselect-constants.ll | 8 +-
5 files changed, 235 insertions(+), 281 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 9c330e60a7d54..604910e04d4cc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -609,6 +609,11 @@ class VectorType;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+ bool hasAndNotCompare(SDValue V) const override {
+ // We can use bics for any scalar.
+ return V.getValueType().isScalarInteger();
+ }
+
bool
isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 478b98dfac80f..8ab56b228d2a7 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -613,14 +613,21 @@ define i16 @ustest_f64i16(double %x) {
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: vmov r0, r1, d0
; VFP2-NEXT: bl __aeabi_d2iz
-; VFP2-NEXT: usat r0, #16, r0
+; VFP2-NEXT: movw r1, #65535
+; VFP2-NEXT: cmp r0, r1
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r1, r0
+; VFP2-NEXT: bic.w r0, r1, r1, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f64i16:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f64 s0, d0
+; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: usat r0, #16, r0
+; FULL-NEXT: cmp r0, r1
+; FULL-NEXT: csel r0, r0, r1, lt
+; FULL-NEXT: bic.w r0, r0, r0, asr #31
; FULL-NEXT: bx lr
entry:
%conv = fptosi double %x to i32
@@ -738,12 +745,26 @@ define i16 @ustest_f32i16(float %x) {
; SOFT-NEXT: .LCPI14_0:
; SOFT-NEXT: .long 65535 @ 0xffff
;
-; VFP-LABEL: ustest_f32i16:
-; VFP: @ %bb.0: @ %entry
-; VFP-NEXT: vcvt.s32.f32 s0, s0
-; VFP-NEXT: vmov r0, s0
-; VFP-NEXT: usat r0, #16, r0
-; VFP-NEXT: bx lr
+; VFP2-LABEL: ustest_f32i16:
+; VFP2: @ %bb.0: @ %entry
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: movw r1, #65535
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: cmp r0, r1
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r1, r0
+; VFP2-NEXT: bic.w r0, r1, r1, asr #31
+; VFP2-NEXT: bx lr
+;
+; FULL-LABEL: ustest_f32i16:
+; FULL: @ %bb.0: @ %entry
+; FULL-NEXT: vcvt.s32.f32 s0, s0
+; FULL-NEXT: movw r1, #65535
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: cmp r0, r1
+; FULL-NEXT: csel r0, r0, r1, lt
+; FULL-NEXT: bic.w r0, r0, r0, asr #31
+; FULL-NEXT: bx lr
entry:
%conv = fptosi float %x to i32
%0 = icmp slt i32 %conv, 65535
@@ -890,16 +911,23 @@ define i16 @ustest_f16i16(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: bl __aeabi_h2f
; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: movw r1, #65535
; VFP2-NEXT: vcvt.s32.f32 s0, s0
; VFP2-NEXT: vmov r0, s0
-; VFP2-NEXT: usat r0, #16, r0
+; VFP2-NEXT: cmp r0, r1
+; VFP2-NEXT: it lt
+; VFP2-NEXT: movlt r1, r0
+; VFP2-NEXT: bic.w r0, r1, r1, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f16i16:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.s32.f16 s0, s0
+; FULL-NEXT: movw r1, #65535
; FULL-NEXT: vmov r0, s0
-; FULL-NEXT: usat r0, #16, r0
+; FULL-NEXT: cmp r0, r1
+; FULL-NEXT: csel r0, r0, r1, lt
+; FULL-NEXT: bic.w r0, r0, r0, asr #31
; FULL-NEXT: bx lr
entry:
%conv = fptosi half %x to i32
@@ -1101,83 +1129,48 @@ entry:
define i64 @ustest_f64i64(double %x) {
; SOFT-LABEL: ustest_f64i64:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, r5, r6, lr}
-; SOFT-NEXT: push {r4, r5, r6, lr}
+; SOFT-NEXT: .save {r4, lr}
+; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __fixdfti
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: subs r6, r2, #1
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB20_9
+; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: subs r2, r2, #1
+; SOFT-NEXT: mov r2, r3
+; SOFT-NEXT: sbcs r2, r4
+; SOFT-NEXT: bge .LBB20_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB20_10
+; SOFT-NEXT: bge .LBB20_6
; SOFT-NEXT: .LBB20_2: @ %entry
-; SOFT-NEXT: bge .LBB20_11
+; SOFT-NEXT: blt .LBB20_4
; SOFT-NEXT: .LBB20_3: @ %entry
-; SOFT-NEXT: blt .LBB20_5
+; SOFT-NEXT: mov r3, r4
; SOFT-NEXT: .LBB20_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: asrs r2, r3, #31
+; SOFT-NEXT: bics r0, r2
+; SOFT-NEXT: bics r1, r2
+; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .LBB20_5: @ %entry
-; SOFT-NEXT: rsbs r6, r0, #0
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r1
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB20_12
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB20_13
-; SOFT-NEXT: .LBB20_7: @ %entry
-; SOFT-NEXT: beq .LBB20_14
-; SOFT-NEXT: .LBB20_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
-; SOFT-NEXT: .LBB20_9: @ %entry
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: blt .LBB20_2
-; SOFT-NEXT: .LBB20_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB20_3
-; SOFT-NEXT: .LBB20_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB20_4
-; SOFT-NEXT: b .LBB20_5
-; SOFT-NEXT: .LBB20_12: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB20_7
-; SOFT-NEXT: .LBB20_13: @ %entry
+; SOFT-NEXT: .LBB20_6: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB20_8
-; SOFT-NEXT: .LBB20_14: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: bge .LBB20_3
+; SOFT-NEXT: b .LBB20_4
;
; VFP2-LABEL: ustest_f64i64:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __fixdfti
-; VFP2-NEXT: subs.w lr, r2, #1
+; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
-; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
+; VFP2-NEXT: sbcs r2, r3, #0
+; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: rsbs.w lr, r0, #0
-; VFP2-NEXT: sbcs.w lr, r12, r1
-; VFP2-NEXT: sbcs.w r2, r12, r2
-; VFP2-NEXT: sbcs.w r2, r12, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: bic.w r0, r0, r3, asr #31
+; VFP2-NEXT: bic.w r1, r1, r3, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f64i64:
@@ -1185,22 +1178,14 @@ define i64 @ustest_f64i64(double %x) {
; FULL-NEXT: .save {r7, lr}
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: bl __fixdfti
-; FULL-NEXT: subs.w lr, r2, #1
+; FULL-NEXT: subs r2, #1
; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
+; FULL-NEXT: sbcs r2, r3, #0
+; FULL-NEXT: csel r2, r3, r12, lt
; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: bic.w r0, r0, r2, asr #31
+; FULL-NEXT: bic.w r1, r1, r2, asr #31
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptosi double %x to i128
@@ -1400,83 +1385,48 @@ entry:
define i64 @ustest_f32i64(float %x) {
; SOFT-LABEL: ustest_f32i64:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, r5, r6, lr}
-; SOFT-NEXT: push {r4, r5, r6, lr}
+; SOFT-NEXT: .save {r4, lr}
+; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: subs r6, r2, #1
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB23_9
+; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: subs r2, r2, #1
+; SOFT-NEXT: mov r2, r3
+; SOFT-NEXT: sbcs r2, r4
+; SOFT-NEXT: bge .LBB23_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB23_10
+; SOFT-NEXT: bge .LBB23_6
; SOFT-NEXT: .LBB23_2: @ %entry
-; SOFT-NEXT: bge .LBB23_11
+; SOFT-NEXT: blt .LBB23_4
; SOFT-NEXT: .LBB23_3: @ %entry
-; SOFT-NEXT: blt .LBB23_5
+; SOFT-NEXT: mov r3, r4
; SOFT-NEXT: .LBB23_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: asrs r2, r3, #31
+; SOFT-NEXT: bics r0, r2
+; SOFT-NEXT: bics r1, r2
+; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .LBB23_5: @ %entry
-; SOFT-NEXT: rsbs r6, r0, #0
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r1
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB23_12
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB23_13
-; SOFT-NEXT: .LBB23_7: @ %entry
-; SOFT-NEXT: beq .LBB23_14
-; SOFT-NEXT: .LBB23_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
-; SOFT-NEXT: .LBB23_9: @ %entry
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: blt .LBB23_2
-; SOFT-NEXT: .LBB23_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB23_3
-; SOFT-NEXT: .LBB23_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB23_4
-; SOFT-NEXT: b .LBB23_5
-; SOFT-NEXT: .LBB23_12: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB23_7
-; SOFT-NEXT: .LBB23_13: @ %entry
+; SOFT-NEXT: .LBB23_6: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB23_8
-; SOFT-NEXT: .LBB23_14: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: bge .LBB23_3
+; SOFT-NEXT: b .LBB23_4
;
; VFP2-LABEL: ustest_f32i64:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __fixsfti
-; VFP2-NEXT: subs.w lr, r2, #1
+; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
-; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
+; VFP2-NEXT: sbcs r2, r3, #0
+; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: rsbs.w lr, r0, #0
-; VFP2-NEXT: sbcs.w lr, r12, r1
-; VFP2-NEXT: sbcs.w r2, r12, r2
-; VFP2-NEXT: sbcs.w r2, r12, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: bic.w r0, r0, r3, asr #31
+; VFP2-NEXT: bic.w r1, r1, r3, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f32i64:
@@ -1484,22 +1434,14 @@ define i64 @ustest_f32i64(float %x) {
; FULL-NEXT: .save {r7, lr}
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: bl __fixsfti
-; FULL-NEXT: subs.w lr, r2, #1
+; FULL-NEXT: subs r2, #1
; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
+; FULL-NEXT: sbcs r2, r3, #0
+; FULL-NEXT: csel r2, r3, r12, lt
; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: bic.w r0, r0, r2, asr #31
+; FULL-NEXT: bic.w r1, r1, r2, asr #31
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptosi float %x to i128
@@ -1713,61 +1655,34 @@ entry:
define i64 @ustest_f16i64(half %x) {
; SOFT-LABEL: ustest_f16i64:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: .save {r4, r5, r6, lr}
-; SOFT-NEXT: push {r4, r5, r6, lr}
+; SOFT-NEXT: .save {r4, lr}
+; SOFT-NEXT: push {r4, lr}
; SOFT-NEXT: uxth r0, r0
; SOFT-NEXT: bl __aeabi_h2f
; SOFT-NEXT: bl __fixsfti
-; SOFT-NEXT: movs r4, #1
-; SOFT-NEXT: movs r5, #0
-; SOFT-NEXT: subs r6, r2, #1
-; SOFT-NEXT: mov r6, r3
-; SOFT-NEXT: sbcs r6, r5
-; SOFT-NEXT: bge .LBB26_9
+; SOFT-NEXT: movs r4, #0
+; SOFT-NEXT: subs r2, r2, #1
+; SOFT-NEXT: mov r2, r3
+; SOFT-NEXT: sbcs r2, r4
+; SOFT-NEXT: bge .LBB26_5
; SOFT-NEXT: @ %bb.1: @ %entry
-; SOFT-NEXT: bge .LBB26_10
+; SOFT-NEXT: bge .LBB26_6
; SOFT-NEXT: .LBB26_2: @ %entry
-; SOFT-NEXT: bge .LBB26_11
+; SOFT-NEXT: blt .LBB26_4
; SOFT-NEXT: .LBB26_3: @ %entry
-; SOFT-NEXT: blt .LBB26_5
+; SOFT-NEXT: mov r3, r4
; SOFT-NEXT: .LBB26_4: @ %entry
-; SOFT-NEXT: mov r0, r5
+; SOFT-NEXT: asrs r2, r3, #31
+; SOFT-NEXT: bics r0, r2
+; SOFT-NEXT: bics r1, r2
+; SOFT-NEXT: pop {r4, pc}
; SOFT-NEXT: .LBB26_5: @ %entry
-; SOFT-NEXT: rsbs r6, r0, #0
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r1
-; SOFT-NEXT: mov r6, r5
-; SOFT-NEXT: sbcs r6, r2
-; SOFT-NEXT: mov r2, r5
-; SOFT-NEXT: sbcs r2, r3
-; SOFT-NEXT: bge .LBB26_12
-; SOFT-NEXT: @ %bb.6: @ %entry
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: beq .LBB26_13
-; SOFT-NEXT: .LBB26_7: @ %entry
-; SOFT-NEXT: beq .LBB26_14
-; SOFT-NEXT: .LBB26_8: @ %entry
-; SOFT-NEXT: pop {r4, r5, r6, pc}
-; SOFT-NEXT: .LBB26_9: @ %entry
-; SOFT-NEXT: mov r3, r5
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: blt .LBB26_2
-; SOFT-NEXT: .LBB26_10: @ %entry
-; SOFT-NEXT: mov r2, r4
-; SOFT-NEXT: blt .LBB26_3
-; SOFT-NEXT: .LBB26_11: @ %entry
-; SOFT-NEXT: mov r1, r5
-; SOFT-NEXT: bge .LBB26_4
-; SOFT-NEXT: b .LBB26_5
-; SOFT-NEXT: .LBB26_12: @ %entry
-; SOFT-NEXT: mov r4, r5
-; SOFT-NEXT: cmp r4, #0
-; SOFT-NEXT: bne .LBB26_7
-; SOFT-NEXT: .LBB26_13: @ %entry
+; SOFT-NEXT: .LBB26_6: @ %entry
; SOFT-NEXT: mov r0, r4
-; SOFT-NEXT: bne .LBB26_8
-; SOFT-NEXT: .LBB26_14: @ %entry
-; SOFT-NEXT: mov r1, r4
-; SOFT-NEXT: pop {r4, r5, r6, pc}
+; SOFT-NEXT: bge .LBB26_3
+; SOFT-NEXT: b .LBB26_4
;
; VFP2-LABEL: ustest_f16i64:
; VFP2: @ %bb.0: @ %entry
@@ -1777,24 +1692,16 @@ define i64 @ustest_f16i64(half %x) {
; VFP2-NEXT: bl __aeabi_h2f
; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: bl __fixsfti
-; VFP2-NEXT: subs.w lr, r2, #1
+; VFP2-NEXT: subs r2, #1
; VFP2-NEXT: mov.w r12, #0
-; VFP2-NEXT: sbcs lr, r3, #0
-; VFP2-NEXT: itttt ge
+; VFP2-NEXT: sbcs r2, r3, #0
+; VFP2-NEXT: itt ge
; VFP2-NEXT: movge r3, r12
-; VFP2-NEXT: movge r2, #1
-; VFP2-NEXT: movge r1, r12
; VFP2-NEXT: movge r0, r12
-; VFP2-NEXT: rsbs.w lr, r0, #0
-; VFP2-NEXT: sbcs.w lr, r12, r1
-; VFP2-NEXT: sbcs.w r2, r12, r2
-; VFP2-NEXT: sbcs.w r2, r12, r3
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r12, #1
-; VFP2-NEXT: cmp.w r12, #0
-; VFP2-NEXT: itt eq
-; VFP2-NEXT: moveq r0, r12
-; VFP2-NEXT: moveq r1, r12
+; VFP2-NEXT: it ge
+; VFP2-NEXT: movge r1, r12
+; VFP2-NEXT: bic.w r0, r0, r3, asr #31
+; VFP2-NEXT: bic.w r1, r1, r3, asr #31
; VFP2-NEXT: pop {r7, pc}
;
; FULL-LABEL: ustest_f16i64:
@@ -1804,22 +1711,14 @@ define i64 @ustest_f16i64(half %x) {
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
-; FULL-NEXT: subs.w lr, r2, #1
+; FULL-NEXT: subs r2, #1
; FULL-NEXT: mov.w r12, #0
-; FULL-NEXT: sbcs lr, r3, #0
-; FULL-NEXT: it ge
-; FULL-NEXT: movge r2, #1
+; FULL-NEXT: sbcs r2, r3, #0
+; FULL-NEXT: csel r2, r3, r12, lt
; FULL-NEXT: csel r0, r0, r12, lt
-; FULL-NEXT: csel lr, r3, r12, lt
; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: rsbs r3, r0, #0
-; FULL-NEXT: sbcs.w r3, r12, r1
-; FULL-NEXT: sbcs.w r2, r12, r2
-; FULL-NEXT: sbcs.w r2, r12, lr
-; FULL-NEXT: cset r2, lt
-; FULL-NEXT: cmp r2, #0
-; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r2, ne
+; FULL-NEXT: bic.w r0, r0, r2, asr #31
+; FULL-NEXT: bic.w r1, r1, r2, asr #31
; FULL-NEXT: pop {r7, pc}
entry:
%conv = fptosi half %x to i128
diff --git a/llvm/test/CodeGen/ARM/usat-with-shift.ll b/llvm/test/CodeGen/ARM/usat-with-shift.ll
index cc3de9d6d3407..0eca4c4a76c6c 100644
--- a/llvm/test/CodeGen/ARM/usat-with-shift.ll
+++ b/llvm/test/CodeGen/ARM/usat-with-shift.ll
@@ -3,10 +3,10 @@
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s
define arm_aapcs_vfpcc i32 @usat_lsl(i32 %num){
-; CHECK-LABEL: usat_lsl
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: usat r0, #7, r0, lsl #2
-; CHECK-NEXT: bx lr
+; CHECK-LABEL: usat_lsl:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: usat r0, #7, r0, lsl #2
+; CHECK-NEXT: bx lr
entry:
%shl = shl i32 %num, 2
%0 = tail call i32 @llvm.arm.usat(i32 %shl, i32 7)
@@ -14,10 +14,10 @@ entry:
}
define arm_aapcs_vfpcc i32 @usat_asr(i32 %num){
-; CHECK-LABEL: usat_asr
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: usat r0, #7, r0, asr #2
-; CHECK-NEXT: bx lr
+; CHECK-LABEL: usat_asr:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: usat r0, #7, r0, asr #2
+; CHECK-NEXT: bx lr
entry:
%shr = ashr i32 %num, 2
%0 = tail call i32 @llvm.arm.usat(i32 %shr, i32 7)
@@ -25,10 +25,6 @@ entry:
}
define arm_aapcs_vfpcc i32 @usat_lsl2(i32 %num){
-; CHECK-LABEL: usat_lsl2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: usat r0, #15, r0, lsl #15
-; CHECK-NEXT: bx lr
entry:
%shl = shl nsw i32 %num, 15
%0 = icmp sgt i32 %shl, 0
@@ -39,10 +35,6 @@ entry:
}
define arm_aapcs_vfpcc i32 @usat_asr2(i32 %num){
-; CHECK-LABEL: usat_asr2:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: usat r0, #15, r0, asr #15
-; CHECK-NEXT: bx lr
entry:
%shr = ashr i32 %num, 15
%0 = icmp sgt i32 %shr, 0
diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll
index d01aa1520b326..2e1d0283ebde2 100644
--- a/llvm/test/CodeGen/ARM/usat.ll
+++ b/llvm/test/CodeGen/ARM/usat.ll
@@ -32,12 +32,23 @@ define i32 @unsigned_sat_base_32bit(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_base_32bit:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: ldr r1, .LCPI0_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI0_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_base_32bit:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = icmp slt i32 %x, 8388607
@@ -57,11 +68,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 {
; V4T-NEXT: orr r2, r2, #1792
; V4T-NEXT: asr r1, r1, #16
; V4T-NEXT: cmp r1, r2
-; V4T-NEXT: movge r0, r2
-; V4T-NEXT: lsl r1, r0, #16
-; V4T-NEXT: asr r1, r1, #16
-; V4T-NEXT: cmp r1, #0
-; V4T-NEXT: movle r0, #0
+; V4T-NEXT: movlt r2, r0
+; V4T-NEXT: lsl r0, r2, #16
+; V4T-NEXT: bic r0, r2, r0, asr #31
; V4T-NEXT: bx lr
;
; V6-LABEL: unsigned_sat_base_16bit:
@@ -70,10 +79,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 {
; V6-NEXT: sxth r1, r0
; V6-NEXT: orr r2, r2, #1792
; V6-NEXT: cmp r1, r2
-; V6-NEXT: movge r0, r2
-; V6-NEXT: sxth r1, r0
-; V6-NEXT: cmp r1, #0
-; V6-NEXT: movle r0, #0
+; V6-NEXT: movlt r2, r0
+; V6-NEXT: sxth r0, r2
+; V6-NEXT: bic r0, r2, r0, asr #15
; V6-NEXT: bx lr
;
; V6T2-LABEL: unsigned_sat_base_16bit:
@@ -81,10 +89,9 @@ define i16 @unsigned_sat_base_16bit(i16 %x) #0 {
; V6T2-NEXT: sxth r1, r0
; V6T2-NEXT: movw r2, #2047
; V6T2-NEXT: cmp r1, r2
-; V6T2-NEXT: movge r0, r2
-; V6T2-NEXT: sxth r1, r0
-; V6T2-NEXT: cmp r1, #0
-; V6T2-NEXT: movle r0, #0
+; V6T2-NEXT: movlt r2, r0
+; V6T2-NEXT: sxth r0, r2
+; V6T2-NEXT: bic r0, r2, r0, asr #15
; V6T2-NEXT: bx lr
entry:
%0 = icmp slt i16 %x, 2047
@@ -104,9 +111,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 {
; V4T-NEXT: cmp r1, #31
; V4T-NEXT: movge r0, #31
; V4T-NEXT: lsl r1, r0, #24
-; V4T-NEXT: asr r1, r1, #24
-; V4T-NEXT: cmp r1, #0
-; V4T-NEXT: movle r0, #0
+; V4T-NEXT: bic r0, r0, r1, asr #31
; V4T-NEXT: bx lr
;
; V6-LABEL: unsigned_sat_base_8bit:
@@ -115,8 +120,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 {
; V6-NEXT: cmp r1, #31
; V6-NEXT: movge r0, #31
; V6-NEXT: sxtb r1, r0
-; V6-NEXT: cmp r1, #0
-; V6-NEXT: movle r0, #0
+; V6-NEXT: bic r0, r0, r1, asr #7
; V6-NEXT: bx lr
;
; V6T2-LABEL: unsigned_sat_base_8bit:
@@ -125,8 +129,7 @@ define i8 @unsigned_sat_base_8bit(i8 %x) #0 {
; V6T2-NEXT: cmp r1, #31
; V6T2-NEXT: movge r0, #31
; V6T2-NEXT: sxtb r1, r0
-; V6T2-NEXT: cmp r1, #0
-; V6T2-NEXT: movle r0, #0
+; V6T2-NEXT: bic r0, r0, r1, asr #7
; V6T2-NEXT: bx lr
entry:
%0 = icmp slt i8 %x, 31
@@ -157,12 +160,23 @@ define i32 @unsigned_sat_lower_upper_1(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_lower_upper_1:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: ldr r1, .LCPI3_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI3_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_lower_upper_1:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%cmpUp = icmp slt i32 %x, 8388607
@@ -188,12 +202,23 @@ define i32 @unsigned_sat_lower_upper_2(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_lower_upper_2:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: ldr r1, .LCPI4_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI4_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_lower_upper_2:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = icmp slt i32 %x, 8388607
@@ -219,12 +244,23 @@ define i32 @unsigned_sat_upper_lower_1(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_upper_lower_1:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI5_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlt r0, r1
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI5_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_upper_lower_1:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlt r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = icmp sgt i32 %x, 0
@@ -250,12 +286,23 @@ define i32 @unsigned_sat_upper_lower_2(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_upper_lower_2:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI6_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlt r0, r1
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI6_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_upper_lower_2:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlt r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = icmp sgt i32 %x, 0
@@ -281,12 +328,23 @@ define i32 @unsigned_sat_upper_lower_3(i32 %x) #0 {
;
; V6-LABEL: unsigned_sat_upper_lower_3:
; V6: @ %bb.0: @ %entry
-; V6-NEXT: usat r0, #23, r0
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI7_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlt r0, r1
; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI7_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: unsigned_sat_upper_lower_3:
; V6T2: @ %bb.0: @ %entry
-; V6T2-NEXT: usat r0, #23, r0
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlt r0, r1
; V6T2-NEXT: bx lr
entry:
%cmpLow = icmp sgt i32 %x, 0
diff --git a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
index 726237eb27f2d..024de2b36667b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll
@@ -282,12 +282,12 @@ define arm_aapcs_vfpcc <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64>
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov.i32 q2, #0x0
-; CHECK-NEXT: cmp.w r1, #-1
-; CHECK-NEXT: csetm r1, gt
+; CHECK-NEXT: mvns r1, r1
+; CHECK-NEXT: asrs r1, r1, #31
; CHECK-NEXT: bfi r0, r1, #0, #8
; CHECK-NEXT: vmov r1, s3
-; CHECK-NEXT: cmp.w r1, #-1
-; CHECK-NEXT: csetm r1, gt
+; CHECK-NEXT: mvns r1, r1
+; CHECK-NEXT: asrs r1, r1, #31
; CHECK-NEXT: bfi r0, r1, #8, #8
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q1, q2
>From 04c9237afc65ac2f732e84cc48957dd3b286bc80 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 28 Jun 2025 11:04:33 -0400
Subject: [PATCH 2/2] Fix the tests
---
llvm/test/CodeGen/ARM/usat-with-shift.ll | 40 ++++++++++++++++++++++--
1 file changed, 38 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/ARM/usat-with-shift.ll b/llvm/test/CodeGen/ARM/usat-with-shift.ll
index 0eca4c4a76c6c..b9c083e498c0c 100644
--- a/llvm/test/CodeGen/ARM/usat-with-shift.ll
+++ b/llvm/test/CodeGen/ARM/usat-with-shift.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,ARMV6
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s --check-prefixes=CHECK,THUMB
define arm_aapcs_vfpcc i32 @usat_lsl(i32 %num){
; CHECK-LABEL: usat_lsl:
@@ -25,6 +25,24 @@ entry:
}
define arm_aapcs_vfpcc i32 @usat_lsl2(i32 %num){
+; ARMV6-LABEL: usat_lsl2:
+; ARMV6: @ %bb.0: @ %entry
+; ARMV6-NEXT: lsl r0, r0, #15
+; ARMV6-NEXT: bic r1, r0, r0, asr #31
+; ARMV6-NEXT: mov r0, #255
+; ARMV6-NEXT: orr r0, r0, #32512
+; ARMV6-NEXT: cmp r1, r0
+; ARMV6-NEXT: movlt r0, r1
+; ARMV6-NEXT: bx lr
+;
+; THUMB-LABEL: usat_lsl2:
+; THUMB: @ %bb.0: @ %entry
+; THUMB-NEXT: lsls r0, r0, #15
+; THUMB-NEXT: movw r1, #32767
+; THUMB-NEXT: bic.w r0, r0, r0, asr #31
+; THUMB-NEXT: cmp r0, r1
+; THUMB-NEXT: csel r0, r0, r1, lt
+; THUMB-NEXT: bx lr
entry:
%shl = shl nsw i32 %num, 15
%0 = icmp sgt i32 %shl, 0
@@ -35,6 +53,24 @@ entry:
}
define arm_aapcs_vfpcc i32 @usat_asr2(i32 %num){
+; ARMV6-LABEL: usat_asr2:
+; ARMV6: @ %bb.0: @ %entry
+; ARMV6-NEXT: asr r1, r0, #15
+; ARMV6-NEXT: bic r1, r1, r0, asr #31
+; ARMV6-NEXT: mov r0, #255
+; ARMV6-NEXT: orr r0, r0, #32512
+; ARMV6-NEXT: cmp r1, r0
+; ARMV6-NEXT: movlt r0, r1
+; ARMV6-NEXT: bx lr
+;
+; THUMB-LABEL: usat_asr2:
+; THUMB: @ %bb.0: @ %entry
+; THUMB-NEXT: asrs r1, r0, #15
+; THUMB-NEXT: bic.w r0, r1, r0, asr #31
+; THUMB-NEXT: movw r1, #32767
+; THUMB-NEXT: cmp r0, r1
+; THUMB-NEXT: csel r0, r0, r1, lt
+; THUMB-NEXT: bx lr
entry:
%shr = ashr i32 %num, 15
%0 = icmp sgt i32 %shr, 0
More information about the llvm-commits
mailing list