[llvm] [AArch64] Alter latency of FCSEL under Cortex-A510 (PR #80178)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 31 10:55:33 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
As per the Cortex-A510 software optimization guide, the latency of a fcsel should be 3 not 4. It would previously get the latency from WriteF.
---
Patch is 25.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80178.diff
6 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SchedA510.td (+2)
- (modified) llvm/test/CodeGen/AArch64/select_fmf.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/tbl-loops.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll (+70-70)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll (+70-70)
- (modified) llvm/test/tools/llvm-mca/AArch64/Cortex/A510-basic-instructions.s (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 1b66d6bb8fbd4..5e36b6f4d34a2 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -394,6 +394,8 @@ def : InstRW<[CortexA510WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
def : InstRW<[CortexA510WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
def : InstRW<[CortexA510WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[CortexA510WriteFPALU_F3], (instrs FCSELHrrr, FCSELSrrr, FCSELDrrr)>;
+
// 4.15. Advanced SIMD integer instructions
// ASIMD absolute diff
def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 5479e5f3b88d2..92d8676ca04be 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -9,11 +9,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
-; CHECK-NEXT: fmaxnm s1, s0, s3
+; CHECK-NEXT: fmaxnm s2, s0, s3
+; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fccmp s4, s0, #4, lt
-; CHECK-NEXT: fmov s4, #0.50000000
-; CHECK-NEXT: fcsel s2, s1, s0, gt
-; CHECK-NEXT: fadd s1, s0, s4
+; CHECK-NEXT: fadd s1, s0, s1
+; CHECK-NEXT: fcsel s2, s2, s0, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB0_2
@@ -67,11 +67,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
-; CHECK-NEXT: fmaxnm s1, s0, s3
+; CHECK-NEXT: fmaxnm s2, s0, s3
+; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fccmp s4, s0, #0, ge
-; CHECK-NEXT: fmov s4, #0.50000000
-; CHECK-NEXT: fcsel s2, s0, s1, gt
-; CHECK-NEXT: fadd s1, s0, s4
+; CHECK-NEXT: fadd s1, s0, s1
+; CHECK-NEXT: fcsel s2, s0, s2, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index 365fe03ab0b08..4f8a4f7aede3e 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -562,25 +562,25 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
; CHECK-NEXT: fcmp s3, s1
; CHECK-NEXT: fcsel s4, s1, s3, gt
; CHECK-NEXT: fcmp s3, #0.0
-; CHECK-NEXT: ldp s3, s5, [x8, #8]
; CHECK-NEXT: fcvtzs w11, s2
+; CHECK-NEXT: ldp s3, s5, [x8, #8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: fcsel s4, s0, s4, mi
; CHECK-NEXT: fcmp s3, s1
; CHECK-NEXT: strb w11, [x9]
+; CHECK-NEXT: fcvtzs w12, s4
; CHECK-NEXT: fcsel s6, s1, s3, gt
; CHECK-NEXT: fcmp s3, #0.0
-; CHECK-NEXT: fcvtzs w12, s4
; CHECK-NEXT: fcsel s3, s0, s6, mi
; CHECK-NEXT: fcmp s5, s1
; CHECK-NEXT: strb w12, [x9, #1]
; CHECK-NEXT: fcsel s6, s1, s5, gt
; CHECK-NEXT: fcmp s5, #0.0
; CHECK-NEXT: fcvtzs w13, s3
-; CHECK-NEXT: fcsel s5, s0, s6, mi
+; CHECK-NEXT: fcsel s2, s0, s6, mi
; CHECK-NEXT: subs w10, w10, #1
; CHECK-NEXT: strb w13, [x9, #2]
-; CHECK-NEXT: fcvtzs w14, s5
+; CHECK-NEXT: fcvtzs w14, s2
; CHECK-NEXT: strb w14, [x9, #3]
; CHECK-NEXT: add x9, x9, #4
; CHECK-NEXT: b.ne .LBB3_6
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 4f1e3fdc34fcd..16b34cce93293 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
; CHECK-NOFP-SD-NEXT: fcmp s0, s1
+; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, gt
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h0, s0
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
; CHECK-NOFP-NEXT: fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
+; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
; CHECK-NOFP-NEXT: mov w8, #-8388608 // =0xff800000
; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: fmov s16, w8
-; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h4
+; CHECK-NOFP-NEXT: fmov s1, w8
+; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h5
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h5
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h6
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h6
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h7
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h7
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
@@ -527,6 +527,7 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
; CHECK-NOFP-NEXT: fcmp s0, s17
@@ -534,8 +535,8 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
+; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
@@ -544,50 +545,49 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000
; CHECK-NOFP-NEXT: fcvt s2, h2
; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16
-; CHECK-NOFP-NEXT: fmov s16, w8
-; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h4
+; CHECK-NOFP-NEXT: fmov s1, w8
+; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
+; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h5
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h5
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h6
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h6
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
-; CHECK-NOFP-NEXT: fcvt s3, h7
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
+; CHECK-NOFP-NEXT: fcvt s4, h7
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
; CHECK-NOFP-NEXT: fcvt s0, h0
+; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index a2bfc3c438da3..497109dfeaf09 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -242,34 +242,34 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
@@ -277,24 +277,24 @@ define half @test_v16f16(<16 x half> %a) nounwind {
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
-; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
+; CHECK-NOFP-SD-NEXT: fcvt h2, s2
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, lt
; CHECK-NOFP-SD-NEXT: fcmp s0, s1
+; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, lt
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
-; CHECK-NOFP-SD-NEXT: fcvt h3, s3
; CHECK-NOFP-SD-NEXT: fcvt h0, s0
-; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
+; CHECK-NOFP-SD-NEXT: fcvt s2, h2
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
; CHECK-NOFP-SD-NEXT: fminnm s2, s2, s3
; CHECK-NOFP-SD-NEXT: fcvt h1, s2
@@ -420,6 +420,7 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcvt s17, h17
; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fcvt s4, h4
; CHECK-NOFP-NEXT: fcmp s1, s16
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, lt
; CHECK-NOFP-NEXT: fcmp s0, s17
@@ -427,8 +428,8 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: fcvt s16, h16
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt
; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt h0, s0
+; CHECK-NOFP-NEXT: fcmp s2, s16
; CHECK-NOFP-NEXT: fcvt s1, h1
; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
@@ -436,50 +437,49 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
; CHECK-NOFP-NEXT: mov w8, #2139095040 // =0x7f800000
; CHECK-NOFP-NEXT: fcvt s2, h2
-; CHECK-NOFP-NEXT: fmov s16, w8
-; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcvt h1, s1
+; CHECK-NOFP-NEXT: fcvt h0, s0
; CHECK-NOFP-NEXT: fcmp s3, s2
-; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fcvt s1, h1
+; CHECK-NOFP-NEXT: fcvt s0, h0
; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h4
+; CHECK-NOFP-NEXT: fmov s1, w8
+; CHECK-NOFP-NEXT: fcsel s3, s3, s1, lt
+; CHECK-NOFP-NEXT: fcmp s4, s2
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h5
+; CHECK-NOFP-NEXT: fcvt s3, h3
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h5
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h6
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h6
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+; CHECK-NOFP-NEXT: fcvt s3, h3
; CHECK-NOFP-NEXT: fcvt s0, h0
-; CHECK-NOFP-NEXT: fcvt s1, h1
-; CHECK-NOFP-NEXT: fminnm s0, s0, s1
-; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt
-; CHECK-NOFP-NEXT: fcvt s3, h7
+; CHECK-NOFP-NEXT: fminnm s0, s0, s3
+; CHECK-NOFP-NEXT: fcsel s3, s4, s1, lt
+; CHECK-NOFP-NEXT: fcvt s4, h7
+; CHECK-NOFP-NEXT: fcvt h3, s3
; CHECK-NOFP-NEXT: fcvt h0, s0
-; CHECK-NOFP-NEXT: fcvt h1, s1
-; CHECK-NOFP-NEXT: fcmp s3, s2
+; CHECK-NOFP-NEXT: fcmp s4, s2
+...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/80178
More information about the llvm-commits
mailing list