[llvm] [AArch64] Alter latency of FCSEL under Cortex-A510 (PR #80178)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 31 10:55:33 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

As per the Cortex-A510 software optimization guide, the latency of a fcsel should be 3 not 4. It would previously get the latency from WriteF.

---

Patch is 25.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80178.diff


6 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64SchedA510.td (+2) 
- (modified) llvm/test/CodeGen/AArch64/select_fmf.ll (+8-8) 
- (modified) llvm/test/CodeGen/AArch64/tbl-loops.ll (+4-4) 
- (modified) llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll (+70-70) 
- (modified) llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll (+70-70) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s (+2-2) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 1b66d6bb8fbd4..5e36b6f4d34a2 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -394,6 +394,8 @@ def : InstRW<[CortexA510WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
 def : InstRW<[CortexA510WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
 def : InstRW<[CortexA510WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
 
+def : InstRW<[CortexA510WriteFPALU_F3], (instrs FCSELHrrr, FCSELSrrr, FCSELDrrr)>;
+
 // 4.15. Advanced SIMD integer instructions
 // ASIMD absolute diff
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 5479e5f3b88d2..92d8676ca04be 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -9,11 +9,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnm s4, s1, s2
 ; CHECK-NEXT:    fcmp s1, s2
-; CHECK-NEXT:    fmaxnm s1, s0, s3
+; CHECK-NEXT:    fmaxnm s2, s0, s3
+; CHECK-NEXT:    fmov s1, #0.50000000
 ; CHECK-NEXT:    fccmp s4, s0, #4, lt
-; CHECK-NEXT:    fmov s4, #0.50000000
-; CHECK-NEXT:    fcsel s2, s1, s0, gt
-; CHECK-NEXT:    fadd s1, s0, s4
+; CHECK-NEXT:    fadd s1, s0, s1
+; CHECK-NEXT:    fcsel s2, s2, s0, gt
 ; CHECK-NEXT:    fadd s4, s1, s2
 ; CHECK-NEXT:    fcmp s4, s1
 ; CHECK-NEXT:    b.le .LBB0_2
@@ -67,11 +67,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnm s4, s1, s2
 ; CHECK-NEXT:    fcmp s1, s2
-; CHECK-NEXT:    fmaxnm s1, s0, s3
+; CHECK-NEXT:    fmaxnm s2, s0, s3
+; CHECK-NEXT:    fmov s1, #0.50000000
 ; CHECK-NEXT:    fccmp s4, s0, #0, ge
-; CHECK-NEXT:    fmov s4, #0.50000000
-; CHECK-NEXT:    fcsel s2, s0, s1, gt
-; CHECK-NEXT:    fadd s1, s0, s4
+; CHECK-NEXT:    fadd s1, s0, s1
+; CHECK-NEXT:    fcsel s2, s0, s2, gt
 ; CHECK-NEXT:    fadd s4, s1, s2
 ; CHECK-NEXT:    fcmp s4, s1
 ; CHECK-NEXT:    b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index 365fe03ab0b08..4f8a4f7aede3e 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -562,25 +562,25 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmp s3, s1
 ; CHECK-NEXT:    fcsel s4, s1, s3, gt
 ; CHECK-NEXT:    fcmp s3, #0.0
-; CHECK-NEXT:    ldp s3, s5, [x8, #8]
 ; CHECK-NEXT:    fcvtzs w11, s2
+; CHECK-NEXT:    ldp s3, s5, [x8, #8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    fcsel s4, s0, s4, mi
 ; CHECK-NEXT:    fcmp s3, s1
 ; CHECK-NEXT:    strb w11, [x9]
+; CHECK-NEXT:    fcvtzs w12, s4
 ; CHECK-NEXT:    fcsel s6, s1, s3, gt
 ; CHECK-NEXT:    fcmp s3, #0.0
-; CHECK-NEXT:    fcvtzs w12, s4
 ; CHECK-NEXT:    fcsel s3, s0, s6, mi
 ; CHECK-NEXT:    fcmp s5, s1
 ; CHECK-NEXT:    strb w12, [x9, #1]
 ; CHECK-NEXT:    fcsel s6, s1, s5, gt
 ; CHECK-NEXT:    fcmp s5, #0.0
 ; CHECK-NEXT:    fcvtzs w13, s3
-; CHECK-NEXT:    fcsel s5, s0, s6, mi
+; CHECK-NEXT:    fcsel s2, s0, s6, mi
 ; CHECK-NEXT:    subs w10, w10, #1
 ; CHECK-NEXT:    strb w13, [x9, #2]
-; CHECK-NEXT:    fcvtzs w14, s5
+; CHECK-NEXT:    fcvtzs w14, s2
 ; CHECK-NEXT:    strb w14, [x9, #3]
 ; CHECK-NEXT:    add x9, x9, #4
 ; CHECK-NEXT:    b.ne .LBB3_6
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 4f1e3fdc34fcd..16b34cce93293 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[3]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[4]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[5]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[6]
 ; CHECK-NOFP-SD-NEXT:    mov h1, v1.h[7]
 ; CHECK-NOFP-SD-NEXT:    mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    fcmp s0, s1
+; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s0, s0, s1, gt
 ; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
-; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h0, s0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, gt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, gt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    ldr h2, [x8, :lo12:.LCPI14_0]
 ; CHECK-NOFP-NEXT:    mov w8, #-8388608 // =0xff800000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcsel s1, s4, s1, gt
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    ret
@@ -527,6 +527,7 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, gt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -534,8 +535,8 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, gt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
@@ -544,50 +545,49 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    mov w8, #57344 // =0xe000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-NEXT:    movk w8, #51071, lsl #16
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcsel s1, s4, s1, gt
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index a2bfc3c438da3..497109dfeaf09 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[3]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[4]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[5]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[6]
 ; CHECK-NOFP-SD-NEXT:    mov h1, v1.h[7]
 ; CHECK-NOFP-SD-NEXT:    mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    fcmp s0, s1
+; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s0, s0, s1, lt
 ; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
-; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h0, s0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, lt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, lt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    ldr h2, [x8, :lo12:.LCPI14_0]
 ; CHECK-NOFP-NEXT:    mov w8, #2139095040 // =0x7f800000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, lt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/80178


More information about the llvm-commits mailing list