[llvm] [AArch64][NEON] Extend faminmax patterns with fminnm/fmaxnm (PR #104766)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 19 05:05:43 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (SpencerAbson)
<details>
<summary>Changes</summary>
Patterns were previously added to allow the following reductions
- fminimum(abs(a), abs(b)) -> famin(a, b)
- fmaximum(abs(a), abs(b)) -> famax(a, b)
- https://github.com/llvm/llvm-project/pull/103027
It was suggested by @<!-- -->davemgreen that the following reductions are also possible
- fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
- fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
('nnan' documenatation: https://llvm.org/docs/LangRef.html#fast-math-flags)
The 'no NaNs' flag allows optimisations to assume that neither argument is a NaN, and so the differing NaN propagation semantics of llvm.maxnum/llvm.minnum and FAMAX/FAMIN can be ignored in this reduction.
(llvm.maxnum/llvm.minnum: https://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic)
- Changes to LLVM
- lib/target/AArch64/AArch64InstrInfo.td
- add 'fminnm_nnan' and 'fmaxnm_nnan'; patfrags on fminnm/fmaxnm that are predicated on the instrinsic call having the 'nnan' flag.
- add AArch64famin and AArch64famax patfrags, containing the new and existing reductions.
- test/CodeGen/AArch64/aarch64-neon-faminmax.ll
- add positive and negative tests for the new reduction, based on the presence of 'nnan' in the IR intrinsic call.
---
Patch is 21.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104766.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+25-12)
- (modified) llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll (+396-1)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a9324af5beb784..627a574db90058 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -973,6 +973,17 @@ def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r),
CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
}]>;
+// Match "nnan" flagged calls to fminnum and fmmaxnum. Then semantically equivalent
+// to fmaximum/fminimum.
+def fmaxnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
+ (fmaxnum node:$Rn, node:$Rm), [{
+ return N->getFlags().hasNoNaNs();
+ }]>;
+def fminnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
+ (fminnum node:$Rn, node:$Rm), [{
+ return N->getFlags().hasNoNaNs();
+ }]>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -10158,19 +10169,21 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in {
defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
} // End let Predicates = [HasFP8]
+// fminimum(abs(a), abs(b)) -> famin(a, b)
+// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
+def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
+ [(fminimum (fabs node:$Rn), (fabs node:$Rm)),
+ (fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
+
+// fmaximum(abs(a), abs(b)) -> famax(a, b)
+// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
+def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
+ [(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
+ (fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
+
let Predicates = [HasNEON, HasFAMINMAX] in {
- defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
- defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
-
- foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
- // Replace min(abs(a), abs(b)) with famin(a, b)
- def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
- (!cast<Instruction>("FAMIN"#Ty) Ty:$Rn, Ty:$Rm)>;
-
- // Replace max(abs(a), abs(b)) with famax(a, b)
- def : Pat<(Ty (fmaximum (fabs Ty:$Rn), (fabs Ty:$Rm))),
- (!cast<Instruction>("FAMAX"#Ty) Ty:$Rn, Ty:$Rm)>;
- }
+ defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", AArch64famax>;
+ defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", AArch64famin>;
} // End let Predicates = [HasNEON, HasFAMINMAX]
let Uses = [FPMR, FPCR], Predicates = [HasFP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
index dd2deda9839a0d..6bad52499a869d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mattr=+faminmax -verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FAMINMAX
@@ -7,6 +7,10 @@ target triple = "aarch64-unknown-linux-gnu"
; Replace min(abs(a), abs(b)) with famin(a, b)
; Replace max(abs(a), abs(b)) with famax(a, b)
+; When the fastmath flag 'nnan' (no nan) is enabled, we may also replace:
+; minnm(abs(a), abs(b)) with famin(a, b)
+; maxnm(abs(a), abs(b)) with famax(a, b)
+
define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-LABEL: test_max_v4f16:
; CHECK: // %bb.0:
@@ -25,6 +29,44 @@ define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
ret <4 x half> %r
}
+define <4 x half> @test_maxnm_nnan_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_nnan_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call nnan <4 x half> @llvm.maxnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <4 x half> @test_maxnm_noflag_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_noflag_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.4h, v0.4h
+; CHECK-NEXT: fabs v1.4h, v1.4h
+; CHECK-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-LABEL: test_min_v4f16:
; CHECK: // %bb.0:
@@ -43,6 +85,44 @@ define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
ret <4 x half> %r
}
+define <4 x half> @test_minnm_nnan_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_nnan_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call nnan <4 x half> @llvm.minnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <4 x half> @test_minnm_noflag_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_noflag_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.4h, v0.4h
+; CHECK-NEXT: fabs v1.4h, v1.4h
+; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-LABEL: test_max_v8f16:
; CHECK: // %bb.0:
@@ -61,6 +141,44 @@ define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
ret <8 x half> %r
}
+define <8 x half> @test_maxnm_nnan_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_nnan_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call nnan <8 x half> @llvm.maxnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <8 x half> @test_maxnm_noflag_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_noflag_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.8h, v0.8h
+; CHECK-NEXT: fabs v1.8h, v1.8h
+; CHECK-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-LABEL: test_min_v8f16:
; CHECK: // %bb.0:
@@ -79,6 +197,44 @@ define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
ret <8 x half> %r
}
+define <8 x half> @test_minnm_nnan_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_nnan_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call nnan <8 x half> @llvm.minnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <8 x half> @test_minnm_noflag_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_noflag_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.8h, v0.8h
+; CHECK-NEXT: fabs v1.8h, v1.8h
+; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.minnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
; CHECK-LABEL: test_max_v2f32:
; CHECK: // %bb.0:
@@ -97,6 +253,44 @@ define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
ret <2 x float> %r
}
+define <2 x float> @test_maxnm_nnan_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_maxnm_noflag_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.2s, v0.2s
+; CHECK-NEXT: fabs v1.2s, v1.2s
+; CHECK-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
; CHECK-LABEL: test_min_v2f32:
; CHECK: // %bb.0:
@@ -115,6 +309,44 @@ define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
ret <2 x float> %r
}
+define <2 x float> @test_minnm_nnan_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_minnm_nnan_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_minnm_noflag_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_minnm_noflag_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.2s, v0.2s
+; CHECK-NEXT: fabs v1.2s, v1.2s
+; CHECK-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.minnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_max_v4f32:
; CHECK: // %bb.0:
@@ -133,6 +365,44 @@ define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
ret <4 x float> %r
}
+define <4 x float> @test_maxnm_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <4 x float> @test_maxnm_noflag_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.4s, v0.4s
+; CHECK-NEXT: fabs v1.4s, v1.4s
+; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_min_v4f32:
; CHECK: // %bb.0:
@@ -151,6 +421,44 @@ define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
ret <4 x float> %r
}
+define <4 x float> @test_minnm_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minnm_nnan_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <4 x float> @test_minnm_noflag_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minnm_noflag_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.4s, v0.4s
+; CHECK-NEXT: fabs v1.4s, v1.4s
+; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.minnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test_max_v2f64:
; CHECK: // %bb.0:
@@ -169,6 +477,44 @@ define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %r
}
+define <2 x double> @test_maxnm_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_maxnm_noflag_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.2d, v0.2d
+; CHECK-NEXT: fabs v1.2d, v1.2d
+; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test_min_v2f64:
; CHECK: // %bb.0:
@@ -187,6 +533,43 @@ define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %r
}
+define <2 x double> @test_minnm_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minnm_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_min_noflag_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_min_noflag_v2f64:
+; CHECK: // %...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/104766
More information about the llvm-commits
mailing list