[llvm] [AArch64][GlobalISel] Legalize more G_VECREDUCE_FMIN/FMAX operations. (PR #159082)
Ryan Cowan via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 05:50:16 PDT 2025
https://github.com/HolyMolyCowMan updated https://github.com/llvm/llvm-project/pull/159082
>From 4ff545403465b50c2672bcc8727d3f391d52fe65 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 15 Sep 2025 15:21:28 +0000
Subject: [PATCH] [AArch64][GlobalISel] Legalize more G_VECREDUCE_FMIN/FMAX
operations.
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 1 +
.../vecreduce-fmax-legalization-nan.ll | 106 ++++++++++---
.../AArch64/vecreduce-fmax-legalization.ll | 142 +++++++++++++-----
.../CodeGen/AArch64/vecreduce-fmaximum.ll | 80 +++++++---
.../AArch64/vecreduce-fmin-legalization.ll | 142 +++++++++++++-----
.../CodeGen/AArch64/vecreduce-fminimum.ll | 80 +++++++---
6 files changed, 422 insertions(+), 129 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index ff09b375c3108..ea2196a584127 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1348,6 +1348,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s16, 8)
+ .scalarize(1)
.lower();
getActionDefinitionsBuilder(G_VECREDUCE_MUL)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
index 1d295a30a994b..2368b0288ccb7 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
@@ -6,15 +6,9 @@
; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
;
; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
@@ -557,33 +551,99 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
}
define float @test_v3f32(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-4194304 // =0xffc00000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fmaxnmv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s1
+; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
ret float %b
}
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32_ninf:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-4194304 // =0xffc00000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fmaxnmv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32_ninf:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32_ninf:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s1
+; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
ret float %b
}
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
-; CHECK-LABEL: test_v2f128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: b fmaxl
+; CHECK-NOFP-SD-LABEL: test_v2f128:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: b fmaxl
+;
+; CHECK-FP-SD-LABEL: test_v2f128:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: b fmaxl
+;
+; CHECK-NOFP-GI-LABEL: test_v2f128:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NOFP-GI-NEXT: bl fmaxl
+; CHECK-NOFP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v2f128:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-FP-GI-NEXT: bl fmaxl
+; CHECK-FP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-FP-GI-NEXT: ret
%b = call fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index ee2af110c84cd..a2f4ccd369fb4 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -6,15 +6,9 @@
; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
;
; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
@@ -557,45 +551,123 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
}
define float @test_v3f32(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-8388608 // =0xff800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fmaxnmv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #-8388608 // =0xff800000
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #-8388608 // =0xff800000
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s1
+; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call nnan float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
ret float %b
}
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32_ninf:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-8388609 // =0xff7fffff
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fmaxnmv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32_ninf:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32_ninf:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s1
+; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call nnan ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
ret float %b
}
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
-; CHECK-LABEL: test_v2f128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: bl __gttf2
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.le .LBB18_2
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: .LBB18_2:
-; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v2f128:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: sub sp, sp, #48
+; CHECK-NOFP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NOFP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NOFP-SD-NEXT: bl __gttf2
+; CHECK-NOFP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NOFP-SD-NEXT: cmp w0, #0
+; CHECK-NOFP-SD-NEXT: b.le .LBB18_2
+; CHECK-NOFP-SD-NEXT: // %bb.1:
+; CHECK-NOFP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NOFP-SD-NEXT: .LBB18_2:
+; CHECK-NOFP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NOFP-SD-NEXT: add sp, sp, #48
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v2f128:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: sub sp, sp, #48
+; CHECK-FP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-FP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-FP-SD-NEXT: bl __gttf2
+; CHECK-FP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-FP-SD-NEXT: cmp w0, #0
+; CHECK-FP-SD-NEXT: b.le .LBB18_2
+; CHECK-FP-SD-NEXT: // %bb.1:
+; CHECK-FP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-FP-SD-NEXT: .LBB18_2:
+; CHECK-FP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-FP-SD-NEXT: add sp, sp, #48
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v2f128:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NOFP-GI-NEXT: bl fmaxl
+; CHECK-NOFP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v2f128:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-FP-GI-NEXT: bl fmaxl
+; CHECK-FP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-FP-GI-NEXT: ret
%b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
index be61f9b521795..1d5b70796bdb1 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
@@ -5,12 +5,8 @@
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI
; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
;
; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
declare half @llvm.vector.reduce.fmaximum.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a)
@@ -440,26 +436,74 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; Neutral element is negative infinity which is chosen for padding the widened
; vector.
define float @test_v3f32(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-8388608 // =0xff800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fmaxv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #-8388608 // =0xff800000
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fmaxv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #-8388608 // =0xff800000
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fmaxv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fmax s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fmax s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fmax s0, s0, s1
+; CHECK-FP-GI-NEXT: fmax s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a)
ret float %b
}
; Neutral element chosen for padding the widened vector is not negative infinity.
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32_ninf:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-8388609 // =0xff7fffff
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fmaxv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fmaxv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32_ninf:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fmaxv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fmax s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fmax s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32_ninf:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fmax s0, s0, s1
+; CHECK-FP-GI-NEXT: fmax s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call ninf float @llvm.vector.reduce.fmaximum.v3f32(<3 x float> %a)
ret float %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index 79a8fc35e833d..c5109c8e63497 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -6,15 +6,9 @@
; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
;
; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
declare half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a)
@@ -557,45 +551,123 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
}
define float @test_v3f32(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fminnmv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fminnmv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fminnmv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fminnm s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fminnm s0, s0, s1
+; CHECK-FP-GI-NEXT: fminnm s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
ret float %b
}
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32_ninf:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fminnmv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fminnmv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32_ninf:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fminnmv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fminnm s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fminnm s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32_ninf:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fminnm s0, s0, s1
+; CHECK-FP-GI-NEXT: fminnm s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call nnan ninf float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
ret float %b
}
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
-; CHECK-LABEL: test_v2f128:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.pl .LBB18_2
-; CHECK-NEXT: // %bb.1:
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: .LBB18_2:
-; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v2f128:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: sub sp, sp, #48
+; CHECK-NOFP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NOFP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NOFP-SD-NEXT: bl __lttf2
+; CHECK-NOFP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NOFP-SD-NEXT: cmp w0, #0
+; CHECK-NOFP-SD-NEXT: b.pl .LBB18_2
+; CHECK-NOFP-SD-NEXT: // %bb.1:
+; CHECK-NOFP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NOFP-SD-NEXT: .LBB18_2:
+; CHECK-NOFP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NOFP-SD-NEXT: add sp, sp, #48
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v2f128:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: sub sp, sp, #48
+; CHECK-FP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-FP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-FP-SD-NEXT: bl __lttf2
+; CHECK-FP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-FP-SD-NEXT: cmp w0, #0
+; CHECK-FP-SD-NEXT: b.pl .LBB18_2
+; CHECK-FP-SD-NEXT: // %bb.1:
+; CHECK-FP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-FP-SD-NEXT: .LBB18_2:
+; CHECK-FP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-FP-SD-NEXT: add sp, sp, #48
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v2f128:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NOFP-GI-NEXT: bl fminl
+; CHECK-NOFP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v2f128:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-FP-GI-NEXT: bl fminl
+; CHECK-FP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-FP-GI-NEXT: ret
%b = call nnan fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
ret fp128 %b
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
index e735f670ced0c..56ff68ed0eddc 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
@@ -5,12 +5,8 @@
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI
; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
;
; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
-; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
declare half @llvm.vector.reduce.fminimum.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a)
@@ -440,26 +436,74 @@ define half @test_v11f16(<11 x half> %a) nounwind {
; Neutral element is negative infinity which is chosen for padding the widened
; vector.
define float @test_v3f32(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fminv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fminv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fminv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fmin s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fmin s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fmin s0, s0, s1
+; CHECK-FP-GI-NEXT: fmin s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call float @llvm.vector.reduce.fminimum.v3f32(<3 x float> %a)
ret float %b
}
; Neutral element chosen for padding the widened vector is not negative infinity.
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
-; CHECK-LABEL: test_v3f32_ninf:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v0.s[3], v1.s[0]
-; CHECK-NEXT: fminv s0, v0.4s
-; CHECK-NEXT: ret
+; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-SD: // %bb.0:
+; CHECK-NOFP-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NOFP-SD-NEXT: fminv s0, v0.4s
+; CHECK-NOFP-SD-NEXT: ret
+;
+; CHECK-FP-SD-LABEL: test_v3f32_ninf:
+; CHECK-FP-SD: // %bb.0:
+; CHECK-FP-SD-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-FP-SD-NEXT: fminv s0, v0.4s
+; CHECK-FP-SD-NEXT: ret
+;
+; CHECK-NOFP-GI-LABEL: test_v3f32_ninf:
+; CHECK-NOFP-GI: // %bb.0:
+; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-NOFP-GI-NEXT: fmin s0, s0, s1
+; CHECK-NOFP-GI-NEXT: fmin s0, s0, s2
+; CHECK-NOFP-GI-NEXT: ret
+;
+; CHECK-FP-GI-LABEL: test_v3f32_ninf:
+; CHECK-FP-GI: // %bb.0:
+; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
+; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
+; CHECK-FP-GI-NEXT: fmin s0, s0, s1
+; CHECK-FP-GI-NEXT: fmin s0, s0, s2
+; CHECK-FP-GI-NEXT: ret
%b = call ninf float @llvm.vector.reduce.fminimum.v3f32(<3 x float> %a)
ret float %b
}
More information about the llvm-commits
mailing list