[llvm] [ARM] Port AArch64's CSel handling patterns to ARM (PR #152945)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 10 15:42:15 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: AZero13 (AZero13)
<details>
<summary>Changes</summary>
---
Patch is 332.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152945.diff
14 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMInstrThumb2.td (+9)
- (modified) llvm/test/CodeGen/ARM/fpclamptosat.ll (+66-66)
- (modified) llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll (+306-165)
- (modified) llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll (+244-137)
- (modified) llvm/test/CodeGen/Thumb/scmp.ll (+4-8)
- (modified) llvm/test/CodeGen/Thumb/ucmp.ll (+4-8)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll (+16-16)
- (modified) llvm/test/CodeGen/Thumb2/mve-blockplacement.ll (+12-12)
- (modified) llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll (+85-81)
- (modified) llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll (+1504-1455)
- (modified) llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll (+1158-1254)
- (modified) llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll (+1-1)
- (modified) llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll (+2-4)
- (modified) llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll (+6-10)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 8f56fb0938dd0..cd26e19378170 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -5765,6 +5765,15 @@ let Predicates = [HasV8_1MMainline] in {
def : T2Pat<(ARMcmov (i32 0), (i32 -1), imm:$cc, CPSR),
(t2CSINV ZR, ZR, (inv_cond_XFORM imm:$cc))>;
+ def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 1), imm:$cc, CPSR),
+ (t2CSINC GPRwithZR:$tval, ZR, imm:$cc)>;
+ def : T2Pat<(ARMcmov (i32 1), GPRwithZR:$fval, imm:$cc, CPSR),
+ (t2CSINC GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>;
+ def : T2Pat<(ARMcmov GPRwithZR:$tval, (i32 -1), imm:$cc, CPSR),
+ (t2CSINV GPRwithZR:$tval, ZR, imm:$cc)>;
+ def : T2Pat<(ARMcmov (i32 -1), GPRwithZR:$fval, imm:$cc, CPSR),
+ (t2CSINV GPRwithZR:$fval, ZR, (inv_cond_XFORM imm:$cc))>;
+
multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, imm:$cc, CPSR),
(Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>;
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b228d2a7..87a1221088e09 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -1039,8 +1039,8 @@ define i64 @stest_f64i64(double %x) {
;
; FULL-LABEL: stest_f64i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixdfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -1049,20 +1049,20 @@ define i64 @stest_f64i64(double %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi double %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -1295,8 +1295,8 @@ define i64 @stest_f32i64(float %x) {
;
; FULL-LABEL: stest_f32i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixsfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -1305,20 +1305,20 @@ define i64 @stest_f32i64(float %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi float %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -1556,8 +1556,8 @@ define i64 @stest_f16i64(half %x) {
;
; FULL-LABEL: stest_f16i64:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
@@ -1568,20 +1568,20 @@ define i64 @stest_f16i64(half %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: it ge
; FULL-NEXT: movge r0, #0
-; FULL-NEXT: csel r1, r1, r12, lt
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, lt
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi half %x to i128
%0 = icmp slt i128 %conv, 9223372036854775807
@@ -2708,8 +2708,8 @@ define i64 @stest_f64i64_mm(double %x) {
;
; FULL-LABEL: stest_f64i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixdfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -2718,21 +2718,21 @@ define i64 @stest_f64i64_mm(double %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi double %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3021,8 +3021,8 @@ define i64 @stest_f32i64_mm(float %x) {
;
; FULL-LABEL: stest_f32i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: bl __fixsfti
; FULL-NEXT: subs.w lr, r0, #-1
; FULL-NEXT: mvn r12, #-2147483648
@@ -3031,21 +3031,21 @@ define i64 @stest_f32i64_mm(float %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi float %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
@@ -3339,8 +3339,8 @@ define i64 @stest_f16i64_mm(half %x) {
;
; FULL-LABEL: stest_f16i64_mm:
; FULL: @ %bb.0: @ %entry
-; FULL-NEXT: .save {r4, r5, r7, lr}
-; FULL-NEXT: push {r4, r5, r7, lr}
+; FULL-NEXT: .save {r4, lr}
+; FULL-NEXT: push {r4, lr}
; FULL-NEXT: vmov.f16 r0, s0
; FULL-NEXT: vmov s0, r0
; FULL-NEXT: bl __fixhfti
@@ -3351,21 +3351,21 @@ define i64 @stest_f16i64_mm(half %x) {
; FULL-NEXT: sbcs lr, r3, #0
; FULL-NEXT: cset lr, lt
; FULL-NEXT: cmp.w lr, #0
-; FULL-NEXT: csel r5, r3, lr, ne
-; FULL-NEXT: mov.w r3, #-1
-; FULL-NEXT: csel r0, r0, r3, ne
+; FULL-NEXT: csinv r0, r0, zr, eq
; FULL-NEXT: csel r1, r1, r12, ne
+; FULL-NEXT: csel r3, r3, lr, ne
; FULL-NEXT: csel r2, r2, lr, ne
; FULL-NEXT: rsbs r4, r0, #0
-; FULL-NEXT: mov.w r12, #-2147483648
-; FULL-NEXT: sbcs.w r4, r12, r1
-; FULL-NEXT: sbcs.w r2, r3, r2
-; FULL-NEXT: sbcs.w r2, r3, r5
+; FULL-NEXT: mov.w lr, #-2147483648
+; FULL-NEXT: sbcs.w r4, lr, r1
+; FULL-NEXT: mov.w r12, #-1
+; FULL-NEXT: sbcs.w r2, r12, r2
+; FULL-NEXT: sbcs.w r2, r12, r3
; FULL-NEXT: cset r2, lt
; FULL-NEXT: cmp r2, #0
; FULL-NEXT: csel r0, r0, r2, ne
-; FULL-NEXT: csel r1, r1, r12, ne
-; FULL-NEXT: pop {r4, r5, r7, pc}
+; FULL-NEXT: csel r1, r1, lr, ne
+; FULL-NEXT: pop {r4, pc}
entry:
%conv = fptosi half %x to i128
%spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 5179f97624489..a42a2a8083f6f 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -633,40 +633,74 @@ define i50 @test_signed_i50_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI6_2:
; SOFT-NEXT: .long 131071 @ 0x1ffff
;
-; VFP-LABEL: test_signed_i50_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __aeabi_f2lz
-; VFP-NEXT: vldr s0, .LCPI6_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI6_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: ittt lt
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movtlt r1, #65534
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: it gt
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: itt gt
-; VFP-NEXT: movwgt r1, #65535
-; VFP-NEXT: movtgt r1, #1
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI6_0:
-; VFP-NEXT: .long 0xd8000000 @ float -5.62949953E+14
-; VFP-NEXT: .LCPI6_1:
-; VFP-NEXT: .long 0x57ffffff @ float 5.6294992E+14
+; VFP2-LABEL: test_signed_i50_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: vldr s0, .LCPI6_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI6_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: ittt lt
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movtlt r1, #65534
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: it gt
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: itt gt
+; VFP2-NEXT: movwgt r1, #65535
+; VFP2-NEXT: movtgt r1, #1
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI6_0:
+; VFP2-NEXT: .long 0xd8000000 @ float -5.62949953E+14
+; VFP2-NEXT: .LCPI6_1:
+; VFP2-NEXT: .long 0x57ffffff @ float 5.6294992E+14
+;
+; FP16-LABEL: test_signed_i50_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __aeabi_f2lz
+; FP16-NEXT: vldr s0, .LCPI6_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI6_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: ittt lt
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movtlt r1, #65534
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: itt gt
+; FP16-NEXT: movwgt r1, #65535
+; FP16-NEXT: movtgt r1, #1
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: pop {r4, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI6_0:
+; FP16-NEXT: .long 0xd8000000 @ float -5.62949953E+14
+; FP16-NEXT: .LCPI6_1:
+; FP16-NEXT: .long 0x57ffffff @ float 5.6294992E+14
%x = call i50 @llvm.fptosi.sat.i50.f32(float %f)
ret i50 %x
}
@@ -735,37 +769,69 @@ define i64 @test_signed_i64_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI7_1:
; SOFT-NEXT: .long 2147483647 @ 0x7fffffff
;
-; VFP-LABEL: test_signed_i64_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __aeabi_f2lz
-; VFP-NEXT: vldr s0, .LCPI7_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI7_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt lt
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: movlt.w r1, #-2147483648
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt gt
-; VFP-NEXT: mvngt r1, #-2147483648
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI7_0:
-; VFP-NEXT: .long 0xdf000000 @ float -9.22337203E+18
-; VFP-NEXT: .LCPI7_1:
-; VFP-NEXT: .long 0x5effffff @ float 9.22337149E+18
+; VFP2-LABEL: test_signed_i64_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __aeabi_f2lz
+; VFP2-NEXT: vldr s0, .LCPI7_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI7_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt lt
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: movlt.w r1, #-2147483648
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt gt
+; VFP2-NEXT: mvngt r1, #-2147483648
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI7_0:
+; VFP2-NEXT: .long 0xdf000000 @ float -9.22337203E+18
+; VFP2-NEXT: .LCPI7_1:
+; VFP2-NEXT: .long 0x5effffff @ float 9.22337149E+18
+;
+; FP16-LABEL: test_signed_i64_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __aeabi_f2lz
+; FP16-NEXT: vldr s0, .LCPI7_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI7_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: itt lt
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt.w r1, #-2147483648
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it gt
+; FP16-NEXT: mvngt r1, #-2147483648
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: pop {r4, pc}
+; FP16-NEXT: .p2align 2
+; FP16-NEXT: @ %bb.1:
+; FP16-NEXT: .LCPI7_0:
+; FP16-NEXT: .long 0xdf000000 @ float -9.22337203E+18
+; FP16-NEXT: .LCPI7_1:
+; FP16-NEXT: .long 0x5effffff @ float 9.22337149E+18
%x = call i64 @llvm.fptosi.sat.i64.f32(float %f)
ret i64 %x
}
@@ -880,43 +946,81 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; SOFT-NEXT: .LCPI8_0:
; SOFT-NEXT: .long 1895825407 @ 0x70ffffff
;
-; VFP-LABEL: test_signed_i100_f32:
-; VFP: @ %bb.0:
-; VFP-NEXT: .save {r4, lr}
-; VFP-NEXT: push {r4, lr}
-; VFP-NEXT: mov r4, r0
-; VFP-NEXT: bl __fixsfti
-; VFP-NEXT: vldr s0, .LCPI8_0
-; VFP-NEXT: vmov s2, r4
-; VFP-NEXT: vldr s4, .LCPI8_1
-; VFP-NEXT: vcmp.f32 s2, s0
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt lt
-; VFP-NEXT: movlt r0, #0
-; VFP-NEXT: movlt r1, #0
-; VFP-NEXT: movlt r2, #0
-; VFP-NEXT: mvnlt r3, #7
-; VFP-NEXT: vcmp.f32 s2, s4
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt gt
-; VFP-NEXT: movgt r3, #7
-; VFP-NEXT: movgt.w r2, #-1
-; VFP-NEXT: movgt.w r1, #-1
-; VFP-NEXT: movgt.w r0, #-1
-; VFP-NEXT: vcmp.f32 s2, s2
-; VFP-NEXT: vmrs APSR_nzcv, fpscr
-; VFP-NEXT: itttt vs
-; VFP-NEXT: movvs r0, #0
-; VFP-NEXT: movvs r1, #0
-; VFP-NEXT: movvs r2, #0
-; VFP-NEXT: movvs r3, #0
-; VFP-NEXT: pop {r4, pc}
-; VFP-NEXT: .p2align 2
-; VFP-NEXT: @ %bb.1:
-; VFP-NEXT: .LCPI8_0:
-; VFP-NEXT: .long 0xf1000000 @ float -6.338253E+29
-; VFP-NEXT: .LCPI8_1:
-; VFP-NEXT: .long 0x70ffffff @ float 6.33825262E+29
+; VFP2-LABEL: test_signed_i100_f32:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r4, lr}
+; VFP2-NEXT: push {r4, lr}
+; VFP2-NEXT: mov r4, r0
+; VFP2-NEXT: bl __fixsfti
+; VFP2-NEXT: vldr s0, .LCPI8_0
+; VFP2-NEXT: vmov s2, r4
+; VFP2-NEXT: vldr s4, .LCPI8_1
+; VFP2-NEXT: vcmp.f32 s2, s0
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt lt
+; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: movlt r1, #0
+; VFP2-NEXT: movlt r2, #0
+; VFP2-NEXT: mvnlt r3, #7
+; VFP2-NEXT: vcmp.f32 s2, s4
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt gt
+; VFP2-NEXT: movgt r3, #7
+; VFP2-NEXT: movgt.w r2, #-1
+; VFP2-NEXT: movgt.w r1, #-1
+; VFP2-NEXT: movgt.w r0, #-1
+; VFP2-NEXT: vcmp.f32 s2, s2
+; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: itttt vs
+; VFP2-NEXT: movvs r0, #0
+; VFP2-NEXT: movvs r1, #0
+; VFP2-NEXT: movvs r2, #0
+; VFP2-NEXT: movvs r3, #0
+; VFP2-NEXT: pop {r4, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI8_0:
+; VFP2-NEXT: .long 0xf1000000 @ float -6.338253E+29
+; VFP2-NEXT: .LCPI8_1:
+; VFP2-NEXT: .long 0x70ffffff @ float 6.33825262E+29
+;
+; FP16-LABEL: test_signed_i100_f32:
+; FP16: @ %bb.0:
+; FP16-NEXT: .save {r4, lr}
+; FP16-NEXT: push {r4, lr}
+; FP16-NEXT: mov r4, r0
+; FP16-NEXT: bl __fixsfti
+; FP16-NEXT: vldr s0, .LCPI8_0
+; FP16-NEXT: vmov s2, r4
+; FP16-NEXT: vldr s4, .LCPI8_1
+; FP16-NEXT: vcmp.f32 s2, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcmp.f32 s2, s4
+; FP16-NEXT: itttt lt
+; FP16-NEXT: movlt r0, #0
+; FP16-NEXT: movlt r1, #0
+; FP16-NEXT: movlt r2, #0
+; FP16-NEXT: mvnlt r3, #7
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it gt
+; FP16-NEXT: movgt r3, #7
+; FP16-NEXT: vcmp.f32 s2, s2
+; FP16-NEXT: csinv r2, r2, zr, gt
+; FP16-NEXT: csinv r1, r1, zr, gt
+; FP16-NEXT: csinv r0, r0, zr, gt
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itttt vs
+; FP16-NEXT: movvs r0, #0
+; FP...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/152945
More information about the llvm-commits
mailing list