[llvm] [AArch64] Alter latency of FCSEL under Cortex-A510 (PR #80178)

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 31 10:55:00 PST 2024


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/80178

As per the Cortex-A510 software optimization guide, the latency of a fcsel should be 3 not 4. It would previously get the latency from WriteF.

>From 4353f2c97f47df12911c87581bf01f58cca9c0f2 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 31 Jan 2024 18:53:32 +0000
Subject: [PATCH] [AArch64] Alter latency of FCSEL under Cortex-A510

As per the Cortex-A510 software optimization guide, the latency of a fcsel
should be 3 not 4. It would previously get the latency from WriteF.
---
 llvm/lib/Target/AArch64/AArch64SchedA510.td   |   2 +
 llvm/test/CodeGen/AArch64/select_fmf.ll       |  16 +-
 llvm/test/CodeGen/AArch64/tbl-loops.ll        |   8 +-
 .../AArch64/vecreduce-fmax-legalization.ll    | 140 +++++++++---------
 .../AArch64/vecreduce-fmin-legalization.ll    | 140 +++++++++---------
 .../AArch64/Cortex/A510-basic-instructions.s  |   4 +-
 6 files changed, 156 insertions(+), 154 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 1b66d6bb8fbd4..5e36b6f4d34a2 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -394,6 +394,8 @@ def : InstRW<[CortexA510WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
 def : InstRW<[CortexA510WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
 def : InstRW<[CortexA510WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
 
+def : InstRW<[CortexA510WriteFPALU_F3], (instrs FCSELHrrr, FCSELSrrr, FCSELDrrr)>;
+
 // 4.15. Advanced SIMD integer instructions
 // ASIMD absolute diff
 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 5479e5f3b88d2..92d8676ca04be 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -9,11 +9,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnm s4, s1, s2
 ; CHECK-NEXT:    fcmp s1, s2
-; CHECK-NEXT:    fmaxnm s1, s0, s3
+; CHECK-NEXT:    fmaxnm s2, s0, s3
+; CHECK-NEXT:    fmov s1, #0.50000000
 ; CHECK-NEXT:    fccmp s4, s0, #4, lt
-; CHECK-NEXT:    fmov s4, #0.50000000
-; CHECK-NEXT:    fcsel s2, s1, s0, gt
-; CHECK-NEXT:    fadd s1, s0, s4
+; CHECK-NEXT:    fadd s1, s0, s1
+; CHECK-NEXT:    fcsel s2, s2, s0, gt
 ; CHECK-NEXT:    fadd s4, s1, s2
 ; CHECK-NEXT:    fcmp s4, s1
 ; CHECK-NEXT:    b.le .LBB0_2
@@ -67,11 +67,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnm s4, s1, s2
 ; CHECK-NEXT:    fcmp s1, s2
-; CHECK-NEXT:    fmaxnm s1, s0, s3
+; CHECK-NEXT:    fmaxnm s2, s0, s3
+; CHECK-NEXT:    fmov s1, #0.50000000
 ; CHECK-NEXT:    fccmp s4, s0, #0, ge
-; CHECK-NEXT:    fmov s4, #0.50000000
-; CHECK-NEXT:    fcsel s2, s0, s1, gt
-; CHECK-NEXT:    fadd s1, s0, s4
+; CHECK-NEXT:    fadd s1, s0, s1
+; CHECK-NEXT:    fcsel s2, s0, s2, gt
 ; CHECK-NEXT:    fadd s4, s1, s2
 ; CHECK-NEXT:    fcmp s4, s1
 ; CHECK-NEXT:    b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index 365fe03ab0b08..4f8a4f7aede3e 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -562,25 +562,25 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmp s3, s1
 ; CHECK-NEXT:    fcsel s4, s1, s3, gt
 ; CHECK-NEXT:    fcmp s3, #0.0
-; CHECK-NEXT:    ldp s3, s5, [x8, #8]
 ; CHECK-NEXT:    fcvtzs w11, s2
+; CHECK-NEXT:    ldp s3, s5, [x8, #8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    fcsel s4, s0, s4, mi
 ; CHECK-NEXT:    fcmp s3, s1
 ; CHECK-NEXT:    strb w11, [x9]
+; CHECK-NEXT:    fcvtzs w12, s4
 ; CHECK-NEXT:    fcsel s6, s1, s3, gt
 ; CHECK-NEXT:    fcmp s3, #0.0
-; CHECK-NEXT:    fcvtzs w12, s4
 ; CHECK-NEXT:    fcsel s3, s0, s6, mi
 ; CHECK-NEXT:    fcmp s5, s1
 ; CHECK-NEXT:    strb w12, [x9, #1]
 ; CHECK-NEXT:    fcsel s6, s1, s5, gt
 ; CHECK-NEXT:    fcmp s5, #0.0
 ; CHECK-NEXT:    fcvtzs w13, s3
-; CHECK-NEXT:    fcsel s5, s0, s6, mi
+; CHECK-NEXT:    fcsel s2, s0, s6, mi
 ; CHECK-NEXT:    subs w10, w10, #1
 ; CHECK-NEXT:    strb w13, [x9, #2]
-; CHECK-NEXT:    fcvtzs w14, s5
+; CHECK-NEXT:    fcvtzs w14, s2
 ; CHECK-NEXT:    strb w14, [x9, #3]
 ; CHECK-NEXT:    add x9, x9, #4
 ; CHECK-NEXT:    b.ne .LBB3_6
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 4f1e3fdc34fcd..16b34cce93293 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[3]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[4]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[5]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[6]
 ; CHECK-NOFP-SD-NEXT:    mov h1, v1.h[7]
 ; CHECK-NOFP-SD-NEXT:    mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, gt
 ; CHECK-NOFP-SD-NEXT:    fcmp s0, s1
+; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s0, s0, s1, gt
 ; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
-; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h0, s0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-SD-NEXT:    fmaxnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, gt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, gt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    ldr h2, [x8, :lo12:.LCPI14_0]
 ; CHECK-NOFP-NEXT:    mov w8, #-8388608 // =0xff800000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcsel s1, s4, s1, gt
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    ret
@@ -527,6 +527,7 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, gt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -534,8 +535,8 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, gt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
@@ -544,50 +545,49 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    mov w8, #57344 // =0xe000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-NEXT:    movk w8, #51071, lsl #16
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcsel s1, s4, s1, gt
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, gt
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index a2bfc3c438da3..497109dfeaf09 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[3]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[4]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    mov h4, v1.h[5]
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
 ; CHECK-NOFP-SD-NEXT:    mov h5, v0.h[6]
 ; CHECK-NOFP-SD-NEXT:    mov h1, v1.h[7]
 ; CHECK-NOFP-SD-NEXT:    mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
+; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
 ; CHECK-NOFP-SD-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-SD-NEXT:    fcvt s5, h5
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcmp s5, s4
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s3, s5, s4, lt
 ; CHECK-NOFP-SD-NEXT:    fcmp s0, s1
+; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcsel s0, s0, s1, lt
 ; CHECK-NOFP-SD-NEXT:    fcvt h2, s2
-; CHECK-NOFP-SD-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h0, s0
-; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s3, h3
+; CHECK-NOFP-SD-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-SD-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-SD-NEXT:    fminnm s2, s2, s3
 ; CHECK-NOFP-SD-NEXT:    fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, lt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, lt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    ldr h2, [x8, :lo12:.LCPI14_0]
 ; CHECK-NOFP-NEXT:    mov w8, #2139095040 // =0x7f800000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, lt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcsel s1, s4, s1, lt
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    ret
@@ -527,6 +527,7 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcvt s17, h17
 ; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcvt s4, h4
 ; CHECK-NOFP-NEXT:    fcmp s1, s16
 ; CHECK-NOFP-NEXT:    fcsel s1, s1, s16, lt
 ; CHECK-NOFP-NEXT:    fcmp s0, s17
@@ -534,8 +535,8 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    fcvt s16, h16
 ; CHECK-NOFP-NEXT:    fcsel s0, s0, s17, lt
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcmp s2, s16
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
@@ -544,50 +545,49 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ; CHECK-NOFP-NEXT:    mov w8, #57344 // =0xe000
 ; CHECK-NOFP-NEXT:    fcvt s2, h2
 ; CHECK-NOFP-NEXT:    movk w8, #18303, lsl #16
-; CHECK-NOFP-NEXT:    fmov s16, w8
-; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    fcmp s3, s2
-; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fcvt s0, h0
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h4
+; CHECK-NOFP-NEXT:    fmov s1, w8
+; CHECK-NOFP-NEXT:    fcsel s3, s3, s1, lt
+; CHECK-NOFP-NEXT:    fcmp s4, s2
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h5
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h5
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h6
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h6
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT:    fcvt s3, h7
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT:    fcvt s4, h7
+; CHECK-NOFP-NEXT:    fcvt h3, s3
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
-; CHECK-NOFP-NEXT:    fcmp s3, s2
+; CHECK-NOFP-NEXT:    fcmp s4, s2
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fcsel s1, s4, s1, lt
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt h1, s1
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
 ; CHECK-NOFP-NEXT:    fcvt s1, h1
-; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
-; CHECK-NOFP-NEXT:    fcsel s1, s3, s16, lt
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
-; CHECK-NOFP-NEXT:    fcvt h1, s1
 ; CHECK-NOFP-NEXT:    fcvt s0, h0
-; CHECK-NOFP-NEXT:    fcvt s1, h1
 ; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
 ; CHECK-NOFP-NEXT:    fcvt h0, s0
 ; CHECK-NOFP-NEXT:    ret
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s
index 1edb0b8ad21e0..8a5df91ad7973 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s
@@ -1886,8 +1886,8 @@ drps
 # CHECK-NEXT:  1      3     0.50                        fccmpe	d9, d31, #0, le
 # CHECK-NEXT:  1      3     0.50                        fccmpe	d3, d0, #15, gt
 # CHECK-NEXT:  1      3     0.50                        fccmpe	d31, d5, #7, ne
-# CHECK-NEXT:  1      4     0.50                        fcsel	s3, s20, s9, pl
-# CHECK-NEXT:  1      4     0.50                        fcsel	d9, d10, d11, mi
+# CHECK-NEXT:  1      3     0.50                        fcsel	s3, s20, s9, pl
+# CHECK-NEXT:  1      3     0.50                        fcsel	d9, d10, d11, mi
 # CHECK-NEXT:  1      4     0.50                        fmov	s0, s1
 # CHECK-NEXT:  1      4     0.50                        fabs	s2, s3
 # CHECK-NEXT:  1      4     0.50                        fneg	s4, s5



More information about the llvm-commits mailing list