[llvm] 5f3c0b2 - [AArch64][NEON] Extend faminmax patterns with fminnm/fmaxnm (#104766)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 06:01:40 PDT 2024
Author: SpencerAbson
Date: 2024-08-20T14:01:36+01:00
New Revision: 5f3c0b23759faa80b0be59531f7e6643f9c95d32
URL: https://github.com/llvm/llvm-project/commit/5f3c0b23759faa80b0be59531f7e6643f9c95d32
DIFF: https://github.com/llvm/llvm-project/commit/5f3c0b23759faa80b0be59531f7e6643f9c95d32.diff
LOG: [AArch64][NEON] Extend faminmax patterns with fminnm/fmaxnm (#104766)
Patterns were previously added to allow the following reductions
- fminimum(abs(a), abs(b)) -> famin(a, b)
- fmaximum(abs(a), abs(b)) -> famax(a, b)
- https://github.com/llvm/llvm-project/pull/103027
It was suggested by @davemgreen that the following reductions are also
possible
- fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
- fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
('nnan' documenatation:
https://llvm.org/docs/LangRef.html#fast-math-flags)
The 'no NaNs' flag allows optimisations to assume that neither argument
is a NaN, and so the differing NaN propagation semantics of
llvm.maxnum/llvm.minnum and FAMAX/FAMIN can be ignored in this
reduction.
(llvm.maxnum/llvm.minnum:
https://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic)
- Changes to LLVM
- lib/target/AArch64/AArch64InstrInfo.td
- add 'fminnm_nnan' and 'fmaxnm_nnan'; patfrags on fminnm/fmaxnm that
are predicated on the instrinsic call having the 'nnan' flag.
- add AArch64famin and AArch64famax patfrags, containing the new and
existing reductions.
- test/CodeGen/AArch64/aarch64-neon-faminmax.ll
- add positive and negative tests for the new reduction, based on the
presence of 'nnan' in the IR intrinsic call.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a5fae8029deab3..ec225a5b234a26 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -973,6 +973,17 @@ def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r),
CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
}]>;
+// Match "nnan" flagged calls to fminnum and fmmaxnum. Then semantically equivalent
+// to fmaximum/fminimum.
+def fmaxnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
+ (fmaxnum node:$Rn, node:$Rm), [{
+ return N->getFlags().hasNoNaNs();
+ }]>;
+def fminnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
+ (fminnum node:$Rn, node:$Rm), [{
+ return N->getFlags().hasNoNaNs();
+ }]>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -10158,19 +10169,21 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in {
defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
} // End let Predicates = [HasFP8]
+// fminimum(abs(a), abs(b)) -> famin(a, b)
+// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
+def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
+ [(fminimum (fabs node:$Rn), (fabs node:$Rm)),
+ (fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
+
+// fmaximum(abs(a), abs(b)) -> famax(a, b)
+// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
+def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
+ [(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
+ (fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
+
let Predicates = [HasNEON, HasFAMINMAX] in {
- defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
- defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
-
- foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
- // Replace min(abs(a), abs(b)) with famin(a, b)
- def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
- (!cast<Instruction>("FAMIN"#Ty) Ty:$Rn, Ty:$Rm)>;
-
- // Replace max(abs(a), abs(b)) with famax(a, b)
- def : Pat<(Ty (fmaximum (fabs Ty:$Rn), (fabs Ty:$Rm))),
- (!cast<Instruction>("FAMAX"#Ty) Ty:$Rn, Ty:$Rm)>;
- }
+ defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", AArch64famax>;
+ defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", AArch64famin>;
} // End let Predicates = [HasNEON, HasFAMINMAX]
let Uses = [FPMR, FPCR], Predicates = [HasFP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
index dd2deda9839a0d..04270a502ec198 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
@@ -7,6 +7,10 @@ target triple = "aarch64-unknown-linux-gnu"
; Replace min(abs(a), abs(b)) with famin(a, b)
; Replace max(abs(a), abs(b)) with famax(a, b)
+; When the fastmath flag 'nnan' (no nan) is enabled, we may also replace:
+; minnm(abs(a), abs(b)) with famin(a, b)
+; maxnm(abs(a), abs(b)) with famax(a, b)
+
define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-LABEL: test_max_v4f16:
; CHECK: // %bb.0:
@@ -25,6 +29,44 @@ define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
ret <4 x half> %r
}
+define <4 x half> @test_maxnm_nnan_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_nnan_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call nnan <4 x half> @llvm.maxnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <4 x half> @test_maxnm_noflag_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_noflag_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.4h, v0.4h
+; CHECK-NEXT: fabs v1.4h, v1.4h
+; CHECK-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-LABEL: test_min_v4f16:
; CHECK: // %bb.0:
@@ -43,6 +85,44 @@ define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
ret <4 x half> %r
}
+define <4 x half> @test_minnm_nnan_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_nnan_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call nnan <4 x half> @llvm.minnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
+define <4 x half> @test_minnm_noflag_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_noflag_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.4h, v0.4h
+; CHECK-NEXT: fabs v1.4h, v1.4h
+; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v4f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+ %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+ %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+ ret <4 x half> %r
+}
+
define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-LABEL: test_max_v8f16:
; CHECK: // %bb.0:
@@ -61,6 +141,44 @@ define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
ret <8 x half> %r
}
+define <8 x half> @test_maxnm_nnan_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_nnan_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call nnan <8 x half> @llvm.maxnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <8 x half> @test_maxnm_noflag_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_noflag_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.8h, v0.8h
+; CHECK-NEXT: fabs v1.8h, v1.8h
+; CHECK-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-LABEL: test_min_v8f16:
; CHECK: // %bb.0:
@@ -79,6 +197,44 @@ define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
ret <8 x half> %r
}
+define <8 x half> @test_minnm_nnan_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_nnan_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call nnan <8 x half> @llvm.minnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
+define <8 x half> @test_minnm_noflag_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_noflag_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.8h, v0.8h
+; CHECK-NEXT: fabs v1.8h, v1.8h
+; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v8f16:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+ %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+ %r = call <8 x half> @llvm.minnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+ ret <8 x half> %r
+}
+
define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
; CHECK-LABEL: test_max_v2f32:
; CHECK: // %bb.0:
@@ -97,6 +253,44 @@ define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
ret <2 x float> %r
}
+define <2 x float> @test_maxnm_nnan_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_maxnm_noflag_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.2s, v0.2s
+; CHECK-NEXT: fabs v1.2s, v1.2s
+; CHECK-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
; CHECK-LABEL: test_min_v2f32:
; CHECK: // %bb.0:
@@ -115,6 +309,44 @@ define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
ret <2 x float> %r
}
+define <2 x float> @test_minnm_nnan_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_minnm_nnan_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_minnm_noflag_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_minnm_noflag_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.2s, v0.2s
+; CHECK-NEXT: fabs v1.2s, v1.2s
+; CHECK-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v2f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+ %r = call <2 x float> @llvm.minnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+ ret <2 x float> %r
+}
+
define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_max_v4f32:
; CHECK: // %bb.0:
@@ -133,6 +365,44 @@ define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
ret <4 x float> %r
}
+define <4 x float> @test_maxnm_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <4 x float> @test_maxnm_noflag_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.4s, v0.4s
+; CHECK-NEXT: fabs v1.4s, v1.4s
+; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test_min_v4f32:
; CHECK: // %bb.0:
@@ -151,6 +421,44 @@ define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
ret <4 x float> %r
}
+define <4 x float> @test_minnm_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minnm_nnan_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
+define <4 x float> @test_minnm_noflag_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minnm_noflag_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.4s, v0.4s
+; CHECK-NEXT: fabs v1.4s, v1.4s
+; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v4f32:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+ %r = call <4 x float> @llvm.minnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+ ret <4 x float> %r
+}
+
define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test_max_v2f64:
; CHECK: // %bb.0:
@@ -169,6 +477,44 @@ define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %r
}
+define <2 x double> @test_maxnm_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_maxnm_noflag_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.2d, v0.2d
+; CHECK-NEXT: fabs v1.2d, v1.2d
+; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test_min_v2f64:
; CHECK: // %bb.0:
@@ -187,6 +533,43 @@ define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %r
}
+define <2 x double> @test_minnm_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minnm_nnan_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_min_noflag_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_min_noflag_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs v0.2d, v0.2d
+; CHECK-NEXT: fabs v1.2d, v1.2d
+; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_noflag_v2f64:
+; CHECK-NO-FAMINMAX: // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT: fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT: fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT: ret
+ %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+ %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+ %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+ ret <2 x double> %r
+}
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
@@ -200,10 +583,22 @@ declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
+declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
+declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>)
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+
declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
+declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>)
+declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+
attributes #0 = { nounwind "target-features"="+fullfp16" }
More information about the llvm-commits
mailing list