[llvm] 5f3c0b2 - [AArch64][NEON] Extend faminmax patterns with fminnm/fmaxnm (#104766)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 20 06:01:40 PDT 2024


Author: SpencerAbson
Date: 2024-08-20T14:01:36+01:00
New Revision: 5f3c0b23759faa80b0be59531f7e6643f9c95d32

URL: https://github.com/llvm/llvm-project/commit/5f3c0b23759faa80b0be59531f7e6643f9c95d32
DIFF: https://github.com/llvm/llvm-project/commit/5f3c0b23759faa80b0be59531f7e6643f9c95d32.diff

LOG: [AArch64][NEON] Extend faminmax patterns with fminnm/fmaxnm (#104766)

Patterns were previously added to allow the following reductions
- fminimum(abs(a), abs(b)) -> famin(a, b)
- fmaximum(abs(a), abs(b)) -> famax(a, b)
- https://github.com/llvm/llvm-project/pull/103027
 
It was suggested by @davemgreen that the following reductions are also
possible
- fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
- fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)

('nnan' documenatation:
https://llvm.org/docs/LangRef.html#fast-math-flags)

The 'no NaNs' flag allows optimisations to assume that neither argument
is a NaN, and so the differing NaN propagation semantics of
llvm.maxnum/llvm.minnum and FAMAX/FAMIN can be ignored in this
reduction.
(llvm.maxnum/llvm.minnum:
https://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic)

- Changes to LLVM
	- lib/target/AArch64/AArch64InstrInfo.td
- add 'fminnm_nnan' and 'fmaxnm_nnan'; patfrags on fminnm/fmaxnm that
are predicated on the instrinsic call having the 'nnan' flag.
- add AArch64famin and AArch64famax patfrags, containing the new and
existing reductions.
	- test/CodeGen/AArch64/aarch64-neon-faminmax.ll
- add positive and negative tests for the new reduction, based on the
presence of 'nnan' in the IR intrinsic call.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a5fae8029deab3..ec225a5b234a26 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -973,6 +973,17 @@ def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r),
          CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
 }]>;
 
+// Match "nnan" flagged calls to fminnum and fmmaxnum. Then semantically equivalent
+// to fmaximum/fminimum.
+def fmaxnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
+                           (fmaxnum node:$Rn, node:$Rm), [{
+  return N->getFlags().hasNoNaNs();
+  }]>;
+def fminnum_nnan : PatFrag<(ops node:$Rn, node:$Rm),
+                           (fminnum node:$Rn, node:$Rm), [{
+  return N->getFlags().hasNoNaNs();
+  }]>;
+
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -10158,19 +10169,21 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in {
   defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
 } // End let Predicates = [HasFP8]
 
+// fminimum(abs(a), abs(b)) -> famin(a, b)
+// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
+def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
+                             [(fminimum (fabs node:$Rn), (fabs node:$Rm)),
+                              (fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
+
+// fmaximum(abs(a), abs(b)) -> famax(a, b)
+// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
+def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
+                             [(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
+                              (fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
+
 let Predicates = [HasNEON, HasFAMINMAX] in {
- defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
- defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
-
- foreach Ty = [v4f16, v8f16, v2f32, v4f32, v2f64] in {
-  // Replace min(abs(a), abs(b)) with famin(a, b)
-  def : Pat<(Ty (fminimum (fabs Ty:$Rn), (fabs Ty:$Rm))),
-            (!cast<Instruction>("FAMIN"#Ty) Ty:$Rn, Ty:$Rm)>;
-
-  // Replace max(abs(a), abs(b)) with famax(a, b)
-  def : Pat<(Ty (fmaximum (fabs Ty:$Rn), (fabs Ty:$Rm))),
-            (!cast<Instruction>("FAMAX"#Ty) Ty:$Rn, Ty:$Rm)>;
- }
+ defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", AArch64famax>;
+ defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", AArch64famin>;
 } // End let Predicates = [HasNEON, HasFAMINMAX]
 
 let Uses = [FPMR, FPCR], Predicates = [HasFP8FMA] in {

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
index dd2deda9839a0d..04270a502ec198 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-faminmax.ll
@@ -7,6 +7,10 @@ target triple = "aarch64-unknown-linux-gnu"
 ; Replace min(abs(a), abs(b)) with famin(a, b)
 ; Replace max(abs(a), abs(b)) with famax(a, b)
 
+; When the fastmath flag 'nnan' (no nan) is enabled, we may also replace:
+; minnm(abs(a), abs(b)) with famin(a, b)
+; maxnm(abs(a), abs(b)) with famax(a, b)
+
 define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-LABEL: test_max_v4f16:
 ; CHECK:       // %bb.0:
@@ -25,6 +29,44 @@ define <4 x half> @test_max_v4f16(<4 x half> %a, <4 x half> %b) #0 {
   ret <4 x half> %r
 }
 
+define <4 x half> @test_maxnm_nnan_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_nnan_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v4f16:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+  %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+  %r = call nnan <4 x half> @llvm.maxnum.v4f16(<4 x half> %aa,  <4 x half> %ab)
+  ret <4 x half> %r
+}
+
+define <4 x half> @test_maxnm_noflag_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_noflag_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.4h, v0.4h
+; CHECK-NEXT:    fabs v1.4h, v1.4h
+; CHECK-NEXT:    fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v4f16:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+  %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+  %r = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+  ret <4 x half> %r
+}
+
 define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-LABEL: test_min_v4f16:
 ; CHECK:       // %bb.0:
@@ -43,6 +85,44 @@ define <4 x half> @test_min_v4f16(<4 x half> %a, <4 x half> %b) #0 {
   ret <4 x half> %r
 }
 
+define <4 x half> @test_minnm_nnan_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_nnan_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v4f16:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+  %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+  %r = call nnan <4 x half> @llvm.minnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+  ret <4 x half> %r
+}
+
+define <4 x half> @test_minnm_noflag_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_noflag_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.4h, v0.4h
+; CHECK-NEXT:    fabs v1.4h, v1.4h
+; CHECK-NEXT:    fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v4f16:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.4h, v0.4h
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
+  %ab = call <4 x half> @llvm.fabs.v4f16(<4 x half> %b)
+  %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %aa, <4 x half> %ab)
+  ret <4 x half> %r
+}
+
 define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-LABEL: test_max_v8f16:
 ; CHECK:       // %bb.0:
@@ -61,6 +141,44 @@ define <8 x half> @test_max_v8f16(<8 x half> %a, <8 x half> %b) #0 {
   ret <8 x half> %r
 }
 
+define <8 x half> @test_maxnm_nnan_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_nnan_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v8f16:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+  %r = call nnan <8 x half> @llvm.maxnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+  ret <8 x half> %r
+}
+
+define <8 x half> @test_maxnm_noflag_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_maxnm_noflag_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.8h, v0.8h
+; CHECK-NEXT:    fabs v1.8h, v1.8h
+; CHECK-NEXT:    fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v8f16:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+  %r = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+  ret <8 x half> %r
+}
+
 define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
 ; CHECK-LABEL: test_min_v8f16:
 ; CHECK:       // %bb.0:
@@ -79,6 +197,44 @@ define <8 x half> @test_min_v8f16(<8 x half> %a, <8 x half> %b) #0 {
   ret <8 x half> %r
 }
 
+define <8 x half> @test_minnm_nnan_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_nnan_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v8f16:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+  %r = call nnan <8 x half> @llvm.minnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+  ret <8 x half> %r
+}
+
+define <8 x half> @test_minnm_noflag_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: test_minnm_noflag_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.8h, v0.8h
+; CHECK-NEXT:    fabs v1.8h, v1.8h
+; CHECK-NEXT:    fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v8f16:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.8h, v0.8h
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.8h, v0.8h, v1.8h
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
+  %ab = call <8 x half> @llvm.fabs.v8f16(<8 x half> %b)
+  %r = call <8 x half> @llvm.minnum.v8f16(<8 x half> %aa, <8 x half> %ab)
+  ret <8 x half> %r
+}
+
 define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_max_v2f32:
 ; CHECK:       // %bb.0:
@@ -97,6 +253,44 @@ define <2 x float> @test_max_v2f32(<2 x float> %a, <2 x float> %b) {
   ret <2 x float> %r
 }
 
+define <2 x float> @test_maxnm_nnan_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v2f32:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+  %r = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+  ret <2 x float> %r
+}
+
+define <2 x float> @test_maxnm_noflag_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.2s, v0.2s
+; CHECK-NEXT:    fabs v1.2s, v1.2s
+; CHECK-NEXT:    fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v2f32:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+  %r = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+  ret <2 x float> %r
+}
+
 define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: test_min_v2f32:
 ; CHECK:       // %bb.0:
@@ -115,6 +309,44 @@ define <2 x float> @test_min_v2f32(<2 x float> %a, <2 x float> %b) {
   ret <2 x float> %r
 }
 
+define <2 x float> @test_minnm_nnan_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_minnm_nnan_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v2f32:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+  %r = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+  ret <2 x float> %r
+}
+
+define <2 x float> @test_minnm_noflag_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_minnm_noflag_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.2s, v0.2s
+; CHECK-NEXT:    fabs v1.2s, v1.2s
+; CHECK-NEXT:    fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v2f32:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.2s, v0.2s
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.2s, v0.2s, v1.2s
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %ab = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b)
+  %r = call <2 x float> @llvm.minnum.v2f32(<2 x float> %aa, <2 x float> %ab)
+  ret <2 x float> %r
+}
+
 define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: test_max_v4f32:
 ; CHECK:       // %bb.0:
@@ -133,6 +365,44 @@ define <4 x float> @test_max_v4f32(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %r
 }
 
+define <4 x float> @test_maxnm_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v4f32:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+  %r = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+  ret <4 x float> %r
+}
+
+define <4 x float> @test_maxnm_noflag_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.4s, v0.4s
+; CHECK-NEXT:    fabs v1.4s, v1.4s
+; CHECK-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v4f32:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+  %r = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+  ret <4 x float> %r
+}
+
 define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: test_min_v4f32:
 ; CHECK:       // %bb.0:
@@ -151,6 +421,44 @@ define <4 x float> @test_min_v4f32(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %r
 }
 
+define <4 x float> @test_minnm_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minnm_nnan_v4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v4f32:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+  %r = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+  ret <4 x float> %r
+}
+
+define <4 x float> @test_minnm_noflag_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minnm_noflag_v4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.4s, v0.4s
+; CHECK-NEXT:    fabs v1.4s, v1.4s
+; CHECK-NEXT:    fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_noflag_v4f32:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.4s, v0.4s
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  %ab = call <4 x float> @llvm.fabs.v4f32(<4 x float> %b)
+  %r = call <4 x float> @llvm.minnum.v4f32(<4 x float> %aa, <4 x float> %ab)
+  ret <4 x float> %r
+}
+
 define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_max_v2f64:
 ; CHECK:       // %bb.0:
@@ -169,6 +477,44 @@ define <2 x double> @test_max_v2f64(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %r
 }
 
+define <2 x double> @test_maxnm_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_maxnm_nnan_v2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_nnan_v2f64:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+  %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+  %r = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+  ret <2 x double> %r
+}
+
+define <2 x double> @test_maxnm_noflag_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_maxnm_noflag_v2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.2d, v0.2d
+; CHECK-NEXT:    fabs v1.2d, v1.2d
+; CHECK-NEXT:    fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_maxnm_noflag_v2f64:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT:    fmaxnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+  %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+  %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+  ret <2 x double> %r
+}
+
 define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: test_min_v2f64:
 ; CHECK:       // %bb.0:
@@ -187,6 +533,43 @@ define <2 x double> @test_min_v2f64(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %r
 }
 
+define <2 x double> @test_minnm_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minnm_nnan_v2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_minnm_nnan_v2f64:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+  %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+  %r = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+  ret <2 x double> %r
+}
+
+define <2 x double> @test_min_noflag_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_min_noflag_v2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs v0.2d, v0.2d
+; CHECK-NEXT:    fabs v1.2d, v1.2d
+; CHECK-NEXT:    fminnm v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+;
+; CHECK-NO-FAMINMAX-LABEL: test_min_noflag_v2f64:
+; CHECK-NO-FAMINMAX:       // %bb.0:
+; CHECK-NO-FAMINMAX-NEXT:    fabs v0.2d, v0.2d
+; CHECK-NO-FAMINMAX-NEXT:    fabs v1.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT:    fminnm v0.2d, v0.2d, v1.2d
+; CHECK-NO-FAMINMAX-NEXT:    ret
+  %aa = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
+  %ab = call <2 x double> @llvm.fabs.v2f64(<2 x double> %b)
+  %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> %aa, <2 x double> %ab)
+  ret <2 x double> %r
+}
 
 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
 declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
@@ -200,10 +583,22 @@ declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
 declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
 declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
 
+declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
+declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>)
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+
 declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
 declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
 declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
 declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
 declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
 
+declare <4 x half> @llvm.maxnum.v4f16(<4 x half>,  <4 x half>)
+declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+
 attributes #0 = { nounwind "target-features"="+fullfp16" }


        


More information about the llvm-commits mailing list