[llvm] [AArch64] Alter latency of FCSEL under Cortex-A510 (PR #80178)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 31 10:55:00 PST 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/80178
As per the Cortex-A510 software optimization guide, the latency of a fcsel should be 3 not 4. It would previously get the latency from WriteF.
>From 4353f2c97f47df12911c87581bf01f58cca9c0f2 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 31 Jan 2024 18:53:32 +0000
Subject: [PATCH] [AArch64] Alter latency of FCSEL under Cortex-A510
As per the Cortex-A510 software optimization guide, the latency of a fcsel
should be 3 not 4. It would previously get the latency from WriteF.
---
llvm/lib/Target/AArch64/AArch64SchedA510.td | 2 +
llvm/test/CodeGen/AArch64/select_fmf.ll | 16 +-
llvm/test/CodeGen/AArch64/tbl-loops.ll | 8 +-
.../AArch64/vecreduce-fmax-legalization.ll | 140 +++++++++---------
.../AArch64/vecreduce-fmin-legalization.ll | 140 +++++++++---------
.../AArch64/Cortex/A510-basic-instructions.s | 4 +-
6 files changed, 156 insertions(+), 154 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 1b66d6bb8fbd4..5e36b6f4d34a2 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -394,6 +394,8 @@ def : InstRW<[CortexA510WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
def : InstRW<[CortexA510WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
def : InstRW<[CortexA510WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[CortexA510WriteFPALU_F3], (instrs FCSELHrrr, FCSELSrrr, FCSELDrrr)>;
+
// 4.15. Advanced SIMD integer instructions
// ASIMD absolute diff
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 5479e5f3b88d2..92d8676ca04be 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -9,11 +9,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
-; CHECK-NEXT: fmaxnm s1, s0, s3
+; CHECK-NEXT: fmaxnm s2, s0, s3
+; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fccmp s4, s0, #4, lt
-; CHECK-NEXT: fmov s4, #0.50000000
-; CHECK-NEXT: fcsel s2, s1, s0, gt
-; CHECK-NEXT: fadd s1, s0, s4
+; CHECK-NEXT: fadd s1, s0, s1
+; CHECK-NEXT: fcsel s2, s2, s0, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB0_2
@@ -67,11 +67,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
-; CHECK-NEXT: fmaxnm s1, s0, s3
+; CHECK-NEXT: fmaxnm s2, s0, s3
+; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fccmp s4, s0, #0, ge
-; CHECK-NEXT: fmov s4, #0.50000000
-; CHECK-NEXT: fcsel s2, s0, s1, gt
-; CHECK-NEXT: fadd s1, s0, s4
+; CHECK-NEXT: fadd s1, s0, s1
+; CHECK-NEXT: fcsel s2, s0, s2, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index 365fe03ab0b08..4f8a4f7aede3e 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -562,25 +562,25 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
; CHECK-NEXT: fcmp s3, s1
; CHECK-NEXT: fcsel s4, s1, s3, gt
; CHECK-NEXT: fcmp s3, #0.0
-; CHECK-NEXT: ldp s3, s5, [x8, #8]
; CHECK-NEXT: fcvtzs w11, s2
+; CHECK-NEXT: ldp s3, s5, [x8, #8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: fcsel s4, s0, s4, mi
; CHECK-NEXT: fcmp s3, s1
; CHECK-NEXT: strb w11, [x9]
+; CHECK-NEXT: fcvtzs w12, s4
; CHECK-NEXT: fcsel s6, s1, s3, gt
; CHECK-NEXT: fcmp s3, #0.0
-; CHECK-NEXT: fcvtzs w12, s4
; CHECK-NEXT: fcsel s3, s0, s6, mi
; CHECK-NEXT: fcmp s5, s1
; CHECK-NEXT: strb w12, [x9, #1]
; CHECK-NEXT: fcsel s6, s1, s5, gt
; CHECK-NEXT: fcmp s5, #0.0
; CHECK-NEXT: fcvtzs w13, s3
-; CHECK-NEXT: fcsel s5, s0, s6, mi
+; CHECK-NEXT: fcsel s2, s0, s6, mi
; CHECK-NEXT: subs w10, w10, #1
; CHECK-NEXT: strb w13, [x9, #2]
-; CHECK-NEXT: fcvtzs w14, s5
+; CHECK-NEXT: fcvtzs w14, s2
; CHECK-NEXT: strb w14, [x9, #3]
; CHECK-NEXT: add x9, x9, #4
; CHECK-NEXT: b.ne .LBB3_6
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 4f1e3fdc34fcd..16b34cce93293 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: fcmp s0, s1
+; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, gt
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h0, s0
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
; CHECK-NOFP-NEXT: fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
+; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
; CHECK-NOFP-NEXT: mov w8, #-8388608 // =0xff800000
; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: fmov s16, w8
-; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h4
+; CHECK-NOFP-NEXT: fmov s1, w8
+; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h5
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h5
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h6
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h6
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h7
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h7
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
@@ -527,6 +527,7 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
; CHECK-NOFP-NEXT: fcmp s0, s17
@@ -534,8 +535,8 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
+; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
@@ -544,50 +545,49 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000
; CHECK-NOFP-NEXT: fcvt s2, h2
; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16
-; CHECK-NOFP-NEXT: fmov s16, w8
-; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h4
+; CHECK-NOFP-NEXT: fmov s1, w8
+; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h5
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h5
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h6
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h6
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h7
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h7
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index a2bfc3c438da3..497109dfeaf09 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
; CHECK-NOFP-SD-NEXT: fcmp s0, s1
+; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, lt
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h0, s0
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, lt
; CHECK-NOFP-NEXT: fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt
; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
+; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
; CHECK-NOFP-NEXT: mov w8, #2139095040 // =0x7f800000
; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: fmov s16, w8
-; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h4
+; CHECK-NOFP-NEXT: fmov s1, w8
+; CHECK-NOFP-NEXT: fcsel s3, s3, s1, lt
+; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h5
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h5
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h6
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h6
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h7
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h7
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcsel s1, s4, s1, lt
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
@@ -527,6 +527,7 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, lt
; CHECK-NOFP-NEXT: fcmp s0, s17
@@ -534,8 +535,8 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt
; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
+; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
@@ -544,50 +545,49 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000
; CHECK-NOFP-NEXT: fcvt s2, h2
; CHECK-NOFP-NEXT: movk w8, #18303, lsl #16
-; CHECK-NOFP-NEXT: fmov s16, w8
-; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h4
+; CHECK-NOFP-NEXT: fmov s1, w8
+; CHECK-NOFP-NEXT: fcsel s3, s3, s1, lt
+; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h5
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h5
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h6
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h6
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h7
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h7
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcsel s1, s4, s1, lt
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s
index 1edb0b8ad21e0..8a5df91ad7973 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s
@@ -1886,8 +1886,8 @@ drps
# CHECK-NEXT: 1 3 0.50 fccmpe d9, d31, #0, le
# CHECK-NEXT: 1 3 0.50 fccmpe d3, d0, #15, gt
# CHECK-NEXT: 1 3 0.50 fccmpe d31, d5, #7, ne
-# CHECK-NEXT: 1 4 0.50 fcsel s3, s20, s9, pl
-# CHECK-NEXT: 1 4 0.50 fcsel d9, d10, d11, mi
+# CHECK-NEXT: 1 3 0.50 fcsel s3, s20, s9, pl
+# CHECK-NEXT: 1 3 0.50 fcsel d9, d10, d11, mi
# CHECK-NEXT: 1 4 0.50 fmov s0, s1
# CHECK-NEXT: 1 4 0.50 fabs s2, s3
# CHECK-NEXT: 1 4 0.50 fneg s4, s5
More information about the llvm-commits
mailing list