[llvm] [SelectionDAG] Remove `NoNaNsFPMath` uses (PR #183448)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 18:01:37 PDT 2026
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/183448
>From e39c8533fce0b1df1726aa488ccb8ed8b65a5b2b Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Thu, 26 Feb 2026 17:45:39 +0800
Subject: [PATCH] [SelectionDAG] Remove `NoNaNsFPMath` uses
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 +-
llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 +-
.../test/CodeGen/AArch64/sve-bf16-converts.ll | 111 +++++++----
.../CodeGen/AArch64/sve2-bf16-converts.ll | 180 +++++++++++-------
.../CodeGen/AMDGPU/combine_andor_with_cmps.ll | 10 +-
llvm/test/CodeGen/AMDGPU/fmax3.ll | 8 +-
llvm/test/CodeGen/AMDGPU/fmin3.ll | 8 +-
llvm/test/CodeGen/PowerPC/scalar_cmp.ll | 22 +--
.../CodeGen/RISCV/float-maximum-minimum.ll | 4 +-
.../CodeGen/RISCV/half-maximum-minimum.ll | 4 +-
.../X86/avx512fp16-fminimum-fmaximum.ll | 30 +--
llvm/test/CodeGen/X86/fminimum-fmaximum.ll | 26 +--
.../CodeGen/X86/fminimumnum-fmaximumnum.ll | 26 +--
15 files changed, 261 insertions(+), 184 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f104e6724cdf9..51a19c557eb25 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5968,7 +5968,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
assert(!DemandedElts.isZero() && "No demanded elements");
// If we're told that NaNs won't happen, assume they won't.
- if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
+ if (Op->getFlags().hasNoNaNs())
return true;
if (Depth >= MaxRecursionDepth)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cd9de6c729649..78054d2cf6056 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4824,6 +4824,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
bool Trunc = Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
+ SDNodeFlags Flags = Op->getFlags();
if (VT.isScalableVector()) {
// Let common code split the operation.
@@ -4848,7 +4849,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
// Set the quiet bit.
- if (!DAG.isKnownNeverSNaN(SrcVal))
+ if (!DAG.isKnownNeverSNaN(SrcVal) && !Flags.hasNoNaNs())
NaN = DAG.getNode(ISD::OR, DL, I32, Narrow, ImmV(0x400000));
} else if (SrcVT == MVT::nxv2f64 &&
(Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1ee43ab8d8172..932d6a5841aab 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15397,7 +15397,8 @@ static ConstantFPSDNode *getSplatConstantFP(SDValue Op) {
SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
const SDLoc &SL, SDValue Op0,
- SDValue Op1) const {
+ SDValue Op1,
+ bool IsKnownNoNaNs) const {
ConstantFPSDNode *K1 = getSplatConstantFP(Op1);
if (!K1)
return SDValue();
@@ -15454,7 +15455,7 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
// then give the other result, which is different from med3 with a NaN
// input.
SDValue Var = Op0.getOperand(0);
- if (!DAG.isKnownNeverSNaN(Var))
+ if (!IsKnownNoNaNs && !DAG.isKnownNeverSNaN(Var))
return SDValue();
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
@@ -15572,7 +15573,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
(VT == MVT::v2bf16 && Subtarget->hasBF16PackedInsts()) ||
(VT == MVT::v2f16 && Subtarget->hasVOP3PInsts())) &&
Op0.hasOneUse()) {
- if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
+ if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1,
+ N->getFlags().hasNoNaNs()))
return Res;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 968e11b104abd..fc6f70968a92d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -218,7 +218,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
- SDValue Op0, SDValue Op1) const;
+ SDValue Op0, SDValue Op1,
+ bool IsKnownNoNaNs) const;
SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
SDValue Src, SDValue MinVal, SDValue MaxVal,
bool Signed) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll b/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
index 120ab7cc4552e..ae2bd6f18b951 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
@@ -1,8 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NOBF16
-; RUN: llc -mattr=+sve --enable-no-nans-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NOBF16NNAN
-; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,BF16
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,BF16
+; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NOBF16
+; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,BF16
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,BF16
target triple = "aarch64-unknown-linux-gnu"
@@ -109,16 +108,6 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16(<vscale x 2 x float> %
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv2f32_to_nxv2bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT: and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
@@ -128,6 +117,26 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16(<vscale x 2 x float> %
ret <vscale x 2 x bfloat> %res
}
+define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16_nnan(<vscale x 2 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT: lsr z2.s, z0.s, #16
+; NOBF16-NEXT: and z2.s, z2.s, #0x1
+; NOBF16-NEXT: add z0.s, z0.s, z1.s
+; NOBF16-NEXT: add z0.s, z2.s, z0.s
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.d
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 2 x float> %a to <vscale x 2 x bfloat>
+ ret <vscale x 2 x bfloat> %res
+}
+
define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %a) {
; NOBF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
; NOBF16: // %bb.0:
@@ -143,16 +152,6 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT: and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.s
@@ -162,6 +161,26 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %
ret <vscale x 4 x bfloat> %res
}
+define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16_nnan(<vscale x 4 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT: lsr z2.s, z0.s, #16
+; NOBF16-NEXT: and z2.s, z2.s, #0x1
+; NOBF16-NEXT: add z0.s, z0.s, z1.s
+; NOBF16-NEXT: add z0.s, z2.s, z0.s
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.s
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 4 x float> %a to <vscale x 4 x bfloat>
+ ret <vscale x 4 x bfloat> %res
+}
+
define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %a) {
; NOBF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
; NOBF16: // %bb.0:
@@ -186,22 +205,6 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %
; NOBF16-NEXT: uzp1 z0.h, z0.h, z1.h
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: mov z2.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: lsr z3.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z4.s, z0.s, #16
-; NOBF16NNAN-NEXT: and z3.s, z3.s, #0x1
-; NOBF16NNAN-NEXT: and z4.s, z4.s, #0x1
-; NOBF16NNAN-NEXT: add z1.s, z1.s, z2.s
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z2.s
-; NOBF16NNAN-NEXT: add z1.s, z3.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z4.s, z0.s
-; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: uzp1 z0.h, z0.h, z1.h
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.s
@@ -212,3 +215,31 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %
%res = fptrunc <vscale x 8 x float> %a to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %res
}
+
+define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16_nnan(<vscale x 8 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: mov z2.s, #32767 // =0x7fff
+; NOBF16-NEXT: lsr z3.s, z1.s, #16
+; NOBF16-NEXT: lsr z4.s, z0.s, #16
+; NOBF16-NEXT: and z3.s, z3.s, #0x1
+; NOBF16-NEXT: and z4.s, z4.s, #0x1
+; NOBF16-NEXT: add z1.s, z1.s, z2.s
+; NOBF16-NEXT: add z0.s, z0.s, z2.s
+; NOBF16-NEXT: add z1.s, z3.s, z1.s
+; NOBF16-NEXT: add z0.s, z4.s, z0.s
+; NOBF16-NEXT: lsr z1.s, z1.s, #16
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: uzp1 z0.h, z0.h, z1.h
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.s
+; BF16-NEXT: bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: uzp1 z0.h, z0.h, z1.h
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 8 x float> %a to <vscale x 8 x bfloat>
+ ret <vscale x 8 x bfloat> %res
+}
diff --git a/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll b/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
index ca0a2bf0a4915..c8e60861a859b 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
@@ -1,8 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=NOBF16
-; RUN: llc -mattr=+sve2 --enable-no-nans-fp-math < %s | FileCheck %s --check-prefixes=NOBF16NNAN
-; RUN: llc -mattr=+sve2,+bf16 < %s | FileCheck %s --check-prefixes=BF16
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=BF16
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=NOBF16
+; RUN: llc -mattr=+sve2,+bf16 < %s | FileCheck %s --check-prefixes=BF16
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=BF16
target triple = "aarch64-unknown-linux-gnu"
@@ -22,18 +21,6 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16(<vscale x 2 x double>
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: ptrue p0.d
-; NOBF16NNAN-NEXT: mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT: lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT: add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
@@ -44,6 +31,29 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16(<vscale x 2 x double>
ret <vscale x 2 x bfloat> %res
}
+define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16_nnan(<vscale x 2 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: ptrue p0.d
+; NOBF16-NEXT: mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT: lsr z2.s, z0.s, #16
+; NOBF16-NEXT: add z0.s, z0.s, z1.s
+; NOBF16-NEXT: and z2.s, z2.s, #0x1
+; NOBF16-NEXT: add z0.s, z2.s, z0.s
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.d
+; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 2 x double> %a to <vscale x 2 x bfloat>
+ ret <vscale x 2 x bfloat> %res
+}
+
define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double> %a) {
; NOBF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
; NOBF16: // %bb.0:
@@ -70,25 +80,6 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double>
; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: ptrue p0.d
-; NOBF16NNAN-NEXT: mov z2.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: fcvtx z1.s, p0/m, z1.d
-; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT: lsr z3.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z4.s, z0.s, #16
-; NOBF16NNAN-NEXT: add z1.s, z1.s, z2.s
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z2.s
-; NOBF16NNAN-NEXT: and z3.s, z3.s, #0x1
-; NOBF16NNAN-NEXT: and z4.s, z4.s, #0x1
-; NOBF16NNAN-NEXT: add z1.s, z3.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z4.s, z0.s
-; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: uzp1 z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
@@ -102,6 +93,39 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double>
ret <vscale x 4 x bfloat> %res
}
+define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16_nnan(<vscale x 4 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: ptrue p0.d
+; NOBF16-NEXT: mov z2.s, #32767 // =0x7fff
+; NOBF16-NEXT: fcvtx z1.s, p0/m, z1.d
+; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT: lsr z3.s, z1.s, #16
+; NOBF16-NEXT: lsr z4.s, z0.s, #16
+; NOBF16-NEXT: add z1.s, z1.s, z2.s
+; NOBF16-NEXT: add z0.s, z0.s, z2.s
+; NOBF16-NEXT: and z3.s, z3.s, #0x1
+; NOBF16-NEXT: and z4.s, z4.s, #0x1
+; NOBF16-NEXT: add z1.s, z3.s, z1.s
+; NOBF16-NEXT: add z0.s, z4.s, z0.s
+; NOBF16-NEXT: lsr z1.s, z1.s, #16
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.d
+; BF16-NEXT: fcvtx z1.s, p0/m, z1.d
+; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT: bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: uzp1 z0.s, z0.s, z1.s
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 4 x double> %a to <vscale x 4 x bfloat>
+ ret <vscale x 4 x bfloat> %res
+}
+
define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double> %a) {
; NOBF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
; NOBF16: // %bb.0:
@@ -148,39 +172,6 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double>
; NOBF16-NEXT: uzp1 z0.h, z0.h, z2.h
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: ptrue p0.d
-; NOBF16NNAN-NEXT: mov z4.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: fcvtx z3.s, p0/m, z3.d
-; NOBF16NNAN-NEXT: fcvtx z2.s, p0/m, z2.d
-; NOBF16NNAN-NEXT: fcvtx z1.s, p0/m, z1.d
-; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT: lsr z5.s, z3.s, #16
-; NOBF16NNAN-NEXT: lsr z6.s, z2.s, #16
-; NOBF16NNAN-NEXT: lsr z7.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z24.s, z0.s, #16
-; NOBF16NNAN-NEXT: add z3.s, z3.s, z4.s
-; NOBF16NNAN-NEXT: add z2.s, z2.s, z4.s
-; NOBF16NNAN-NEXT: add z1.s, z1.s, z4.s
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z4.s
-; NOBF16NNAN-NEXT: and z5.s, z5.s, #0x1
-; NOBF16NNAN-NEXT: and z6.s, z6.s, #0x1
-; NOBF16NNAN-NEXT: and z7.s, z7.s, #0x1
-; NOBF16NNAN-NEXT: and z24.s, z24.s, #0x1
-; NOBF16NNAN-NEXT: add z3.s, z5.s, z3.s
-; NOBF16NNAN-NEXT: add z2.s, z6.s, z2.s
-; NOBF16NNAN-NEXT: add z1.s, z7.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z24.s, z0.s
-; NOBF16NNAN-NEXT: lsr z3.s, z3.s, #16
-; NOBF16NNAN-NEXT: lsr z2.s, z2.s, #16
-; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: uzp1 z2.s, z2.s, z3.s
-; NOBF16NNAN-NEXT: uzp1 z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: uzp1 z0.h, z0.h, z2.h
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
@@ -199,3 +190,56 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double>
%res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %res
}
+
+define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16_nnan(<vscale x 8 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: ptrue p0.d
+; NOBF16-NEXT: mov z4.s, #32767 // =0x7fff
+; NOBF16-NEXT: fcvtx z3.s, p0/m, z3.d
+; NOBF16-NEXT: fcvtx z2.s, p0/m, z2.d
+; NOBF16-NEXT: fcvtx z1.s, p0/m, z1.d
+; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT: lsr z5.s, z3.s, #16
+; NOBF16-NEXT: lsr z6.s, z2.s, #16
+; NOBF16-NEXT: lsr z7.s, z1.s, #16
+; NOBF16-NEXT: lsr z24.s, z0.s, #16
+; NOBF16-NEXT: add z3.s, z3.s, z4.s
+; NOBF16-NEXT: add z2.s, z2.s, z4.s
+; NOBF16-NEXT: add z1.s, z1.s, z4.s
+; NOBF16-NEXT: add z0.s, z0.s, z4.s
+; NOBF16-NEXT: and z5.s, z5.s, #0x1
+; NOBF16-NEXT: and z6.s, z6.s, #0x1
+; NOBF16-NEXT: and z7.s, z7.s, #0x1
+; NOBF16-NEXT: and z24.s, z24.s, #0x1
+; NOBF16-NEXT: add z3.s, z5.s, z3.s
+; NOBF16-NEXT: add z2.s, z6.s, z2.s
+; NOBF16-NEXT: add z1.s, z7.s, z1.s
+; NOBF16-NEXT: add z0.s, z24.s, z0.s
+; NOBF16-NEXT: lsr z3.s, z3.s, #16
+; NOBF16-NEXT: lsr z2.s, z2.s, #16
+; NOBF16-NEXT: lsr z1.s, z1.s, #16
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: uzp1 z2.s, z2.s, z3.s
+; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s
+; NOBF16-NEXT: uzp1 z0.h, z0.h, z2.h
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.d
+; BF16-NEXT: fcvtx z3.s, p0/m, z3.d
+; BF16-NEXT: fcvtx z2.s, p0/m, z2.d
+; BF16-NEXT: fcvtx z1.s, p0/m, z1.d
+; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT: bfcvt z3.h, p0/m, z3.s
+; BF16-NEXT: bfcvt z2.h, p0/m, z2.s
+; BF16-NEXT: bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: uzp1 z2.s, z2.s, z3.s
+; BF16-NEXT: uzp1 z0.s, z0.s, z1.s
+; BF16-NEXT: uzp1 z0.h, z0.h, z2.h
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 8 x double> %a to <vscale x 8 x bfloat>
+ ret <vscale x 8 x bfloat> %res
+}
diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
index 42245e3d7013d..ecafe94d4cd55 100644
--- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
@@ -2445,8 +2445,8 @@ define i1 @test122(double %arg1, double %arg2, double %arg3) #1 {
; GCN-NEXT: v_cmp_lt_f64_e32 vcc_lo, v[0:1], v[4:5]
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GCN-NEXT: s_setpc_b64 s[30:31]
- %cmp1 = fcmp ult double %arg1, %arg3
- %cmp2 = fcmp ult double %arg2, %arg3
+ %cmp1 = fcmp nnan ult double %arg1, %arg3
+ %cmp2 = fcmp nnan ult double %arg2, %arg3
%or1 = or i1 %cmp1, %cmp2
ret i1 %or1
}
@@ -2463,8 +2463,8 @@ define i1 @test123(double %arg1, double %arg2, double %arg3) #1 {
; GCN-NEXT: s_setpc_b64 s[30:31]
%var1 = call double @llvm.canonicalize.f64(double %arg1)
%var2 = call double @llvm.canonicalize.f64(double %arg2)
- %cmp1 = fcmp ogt double %var1, %arg3
- %cmp2 = fcmp ogt double %var2, %arg3
+ %cmp1 = fcmp nnan ogt double %var1, %arg3
+ %cmp2 = fcmp nnan ogt double %var2, %arg3
%or1 = and i1 %cmp1, %cmp2
ret i1 %or1
}
@@ -2993,6 +2993,6 @@ declare half @llvm.canonicalize.f16(half)
declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>)
attributes #0 = { nounwind "amdgpu-ieee"="false" }
-attributes #1 = { nounwind "no-nans-fp-math"="true" }
+attributes #1 = { nounwind }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX11: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll
index 4368f2a5de3b1..c2b44cb251ffb 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll
@@ -1064,9 +1064,9 @@ define <2 x half> @no_fmax3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; GFX1250-NEXT: v_pk_max3_num_f16 v0, v2, v0, v3
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
- %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
- %max1 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %c, <2 x half> %max)
- %res = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %max1, <2 x half> %d)
+ %max = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
+ %max1 = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %c, <2 x half> %max)
+ %res = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %max1, <2 x half> %d)
ret <2 x half> %res
}
@@ -1077,4 +1077,4 @@ declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind "no-nans-fp-math"="true" }
+attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/fmin3.ll b/llvm/test/CodeGen/AMDGPU/fmin3.ll
index 142bdd42b2c00..382c98218a11c 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin3.ll
@@ -1064,9 +1064,9 @@ define <2 x half> @no_fmin3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; GFX1250-NEXT: v_pk_min3_num_f16 v0, v2, v0, v3
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
entry:
- %min = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
- %min1 = call <2 x half> @llvm.minnum.v2f16(<2 x half> %c, <2 x half> %min)
- %res = call <2 x half> @llvm.minnum.v2f16(<2 x half> %min1, <2 x half> %d)
+ %min = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
+ %min1 = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %c, <2 x half> %min)
+ %res = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %min1, <2 x half> %d)
ret <2 x half> %res
}
@@ -1497,4 +1497,4 @@ declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind "no-nans-fp-math"="true" }
+attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
index 46351782b5dc2..a96e0e4b50056 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll
@@ -1,12 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names \
-; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \
-; RUN: --enable-no-nans-fp-math \
+; RUN: -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
; RUN: --check-prefix=P8
; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \
-; RUN: -verify-machineinstrs --enable-no-signed-zeros-fp-math \
-; RUN: --enable-no-nans-fp-math \
+; RUN: -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
; RUN: --check-prefix=P9
@@ -135,7 +133,7 @@ entry:
; Test one
-define float @select_one_float(float %a, float %b, float %c, float %d) {
+define float @select_one_float(float nofpclass(nan) %a, float nofpclass(nan) %b, float %c, float %d) {
; P8-LABEL: select_one_float:
; P8: # %bb.0: # %entry
; P8-NEXT: fcmpu cr0, f1, f2
@@ -161,7 +159,7 @@ entry:
ret float %cond
}
-define float @select_one_float_nsz(float %a, float %b, float %c, float %d) {
+define float @select_one_float_nsz(float nofpclass(nan) %a, float nofpclass(nan) %b, float %c, float %d) {
; P8-LABEL: select_one_float_nsz:
; P8: # %bb.0: # %entry
; P8-NEXT: fcmpu cr0, f1, f2
@@ -187,7 +185,7 @@ entry:
ret float %cond
}
-define double @select_one_double(double %a, double %b, double %c, double %d) {
+define double @select_one_double(double nofpclass(nan) %a, double nofpclass(nan) %b, double %c, double %d) {
; P8-LABEL: select_one_double:
; P8: # %bb.0: # %entry
; P8-NEXT: xscmpudp cr0, f1, f2
@@ -259,7 +257,7 @@ entry:
; Test oge
-define float @select_oge_float(float %a, float %b, float %c, float %d) {
+define float @select_oge_float(float nofpclass(nan) %a, float nofpclass(nan) %b, float %c, float %d) {
; P8-LABEL: select_oge_float:
; P8: # %bb.0: # %entry
; P8-NEXT: fcmpu cr0, f1, f2
@@ -285,7 +283,7 @@ entry:
ret float %cond
}
-define double @select_oge_double(double %a, double %b, double %c, double %d) {
+define double @select_oge_double(double nofpclass(nan) %a, double nofpclass(nan) %b, double %c, double %d) {
; P8-LABEL: select_oge_double:
; P8: # %bb.0: # %entry
; P8-NEXT: xscmpudp cr0, f1, f2
@@ -529,7 +527,7 @@ entry:
; Test ole
-define float @select_ole_float(float %a, float %b, float %c, float %d) {
+define float @select_ole_float(float nofpclass(nan) %a, float nofpclass(nan) %b, float %c, float %d) {
; P8-LABEL: select_ole_float:
; P8: # %bb.0: # %entry
; P8-NEXT: fcmpu cr0, f1, f2
@@ -555,7 +553,7 @@ entry:
ret float %cond
}
-define double @select_ole_double(double %a, double %b, double %c, double %d) {
+define double @select_ole_double(double nofpclass(nan) %a, double nofpclass(nan) %b, double %c, double %d) {
; P8-LABEL: select_ole_double:
; P8: # %bb.0: # %entry
; P8-NEXT: xscmpudp cr0, f1, f2
@@ -618,7 +616,7 @@ entry:
}
; Test RHS is 1.000000e+00
-define double @onecmp1(double %a, double %y, double %z) {
+define double @onecmp1(double nofpclass(nan) %a, double %y, double %z) {
; P8-LABEL: onecmp1:
; P8: # %bb.0: # %entry
; P8-NEXT: vspltisw v2, 1
diff --git a/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
index 806200c3f0b8e..1e417841acd37 100644
--- a/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
+++ b/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll
@@ -304,7 +304,7 @@ define float @fmaximum_nnan_f32(float %a, float %b) nounwind {
ret float %1
}
-define float @fminimum_nnan_attr_f32(float %a, float %b) nounwind "no-nans-fp-math"="true" {
+define float @fminimum_nnan_attr_f32(float %a, float %b) nounwind {
; RV32IF-LABEL: fminimum_nnan_attr_f32:
; RV32IF: # %bb.0:
; RV32IF-NEXT: fmin.s fa0, fa0, fa1
@@ -342,7 +342,7 @@ define float @fminimum_nnan_attr_f32(float %a, float %b) nounwind "no-nans-fp-ma
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
- %1 = call float @llvm.minimum.f32(float %a, float %b)
+ %1 = call nnan float @llvm.minimum.f32(float %a, float %b)
ret float %1
}
diff --git a/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
index 23ca1992614a1..8295f369ae01a 100644
--- a/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
+++ b/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll
@@ -124,7 +124,7 @@ define half @fmaximum_nnan_f16(half %a, half %b) nounwind {
ret half %1
}
-define half @fminimum_nnan_attr_f16(half %a, half %b) nounwind "no-nans-fp-math"="true" {
+define half @fminimum_nnan_attr_f16(half %a, half %b) nounwind {
; CHECKIZFH-LABEL: fminimum_nnan_attr_f16:
; CHECKIZFH: # %bb.0:
; CHECKIZFH-NEXT: fmin.h fa0, fa0, fa1
@@ -134,7 +134,7 @@ define half @fminimum_nnan_attr_f16(half %a, half %b) nounwind "no-nans-fp-math"
; CHECKIZHINX: # %bb.0:
; CHECKIZHINX-NEXT: fmin.h a0, a0, a1
; CHECKIZHINX-NEXT: ret
- %1 = call half @llvm.minimum.f16(half %a, half %b)
+ %1 = call nnan half @llvm.minimum.f16(half %a, half %b)
ret half %1
}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
index 59cf38f82b7c0..33c1ca4601c00 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
@@ -24,25 +24,25 @@ define half @test_fminimum(half %x, half %y) {
ret half %z
}
-define <8 x half> @test_fminimum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <8 x half> @test_fminimum_v8f16(<8 x half> %x, <8 x half> %y) "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fminimum_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
- %r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
+ %r = call nnan <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
}
-define half @test_fminimum_nnan(half %x, half %y) "no-nans-fp-math"="true" {
+define half @test_fminimum_nnan(half %x, half %y) {
; CHECK-LABEL: test_fminimum_nnan:
; CHECK: # %bb.0:
-; CHECK-NEXT: vfpclasssh $5, %xmm1, %k1 # k1 = isQuietNaN(xmm1) | isNegativeZero(xmm1)
-; CHECK-NEXT: vmovaps %xmm0, %xmm2
-; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT: vminsh %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vfpclasssh $5, %xmm0, %k1 # k1 = isQuietNaN(xmm0) | isNegativeZero(xmm0)
+; CHECK-NEXT: vmovaps %xmm1, %xmm2
+; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; CHECK-NEXT: vminsh %xmm2, %xmm0, %xmm0
; CHECK-NEXT: retq
- %1 = tail call half @llvm.minimum.f16(half %x, half %y)
+ %1 = tail call nnan half @llvm.minimum.f16(half %x, half %y)
ret half %1
}
@@ -98,12 +98,12 @@ define half @test_fmaximum(half %x, half %y) {
ret half %r
}
-define <8 x half> @test_fmaximum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <8 x half> @test_fmaximum_v8f16(<8 x half> %x, <8 x half> %y) "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
- %r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
+ %r = call nnan <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
}
@@ -163,12 +163,12 @@ define half @test_fmaximum_combine_cmps(half %x, half %y) {
ret half %2
}
-define <16 x half> @test_fminimum_v16f16(<16 x half> %x, <16 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <16 x half> @test_fminimum_v16f16(<16 x half> %x, <16 x half> %y) "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: test_fminimum_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
- %r = call <16 x half> @llvm.minimum.v16f16(<16 x half> %x, <16 x half> %y)
+ %r = call nnan <16 x half> @llvm.minimum.v16f16(<16 x half> %x, <16 x half> %y)
ret <16 x half> %r
}
@@ -184,14 +184,14 @@ define <16 x half> @test_fmaximum_v16f16_nans(<16 x half> %x, <16 x half> %y) "n
ret <16 x half> %r
}
-define <32 x half> @test_fminimum_v32f16_szero(<32 x half> %x, <32 x half> %y) "no-nans-fp-math"="true" {
+define <32 x half> @test_fminimum_v32f16_szero(<32 x half> %x, <32 x half> %y) {
; CHECK-LABEL: test_fminimum_v32f16_szero:
; CHECK: # %bb.0:
; CHECK-NEXT: vminph %zmm1, %zmm0, %zmm1
; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; CHECK-NEXT: vpternlogq {{.*#+}} zmm0 = (zmm0 & zmm2) | zmm1
; CHECK-NEXT: retq
- %r = call <32 x half> @llvm.minimum.v32f16(<32 x half> %x, <32 x half> %y)
+ %r = call nnan <32 x half> @llvm.minimum.v32f16(<32 x half> %x, <32 x half> %y)
ret <32 x half> %r
}
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 1041e604c07d3..1f13540e35c6f 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -100,7 +100,7 @@ define float @test_fmaximum(float %x, float %y) nounwind {
ret float %1
}
-define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-signed-zeros-fp-math"="true" {
; SSE2-LABEL: test_fmaximum_scalarize:
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
@@ -120,7 +120,7 @@ define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
+ %r = call nnan <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %r
}
@@ -603,7 +603,7 @@ define float @test_fminimum(float %x, float %y) nounwind {
ret float %1
}
-define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "no-signed-zeros-fp-math"="true" {
; SSE2-LABEL: test_fminimum_scalarize:
; SSE2: # %bb.0:
; SSE2-NEXT: minpd %xmm1, %xmm0
@@ -623,7 +623,7 @@ define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "
; X86: # %bb.0:
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
+ %r = call nnan <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %r
}
@@ -675,7 +675,7 @@ define float @test_fminimum_nan1(float %x, float %y) {
ret float %1
}
-define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true" nounwind {
+define double @test_fminimum_nnan(double %x, double %y) nounwind {
; SSE2-LABEL: test_fminimum_nnan:
; SSE2: # %bb.0:
; SSE2-NEXT: movapd %xmm0, %xmm2
@@ -700,12 +700,12 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
;
; AVX512DQ-LABEL: test_fminimum_nnan:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vfpclasssd $5, %xmm1, %k0 # k0 = isQuietNaN(xmm1) | isNegativeZero(xmm1)
+; AVX512DQ-NEXT: vfpclasssd $5, %xmm0, %k0 # k0 = isQuietNaN(xmm0) | isNegativeZero(xmm0)
; AVX512DQ-NEXT: kmovw %k0, %k1
-; AVX512DQ-NEXT: vmovapd %xmm0, %xmm2
-; AVX512DQ-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
-; AVX512DQ-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
-; AVX512DQ-NEXT: vminsd %xmm2, %xmm1, %xmm0
+; AVX512DQ-NEXT: vmovapd %xmm1, %xmm2
+; AVX512DQ-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; AVX512DQ-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; AVX512DQ-NEXT: vminsd %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512BF16-LABEL: test_fminimum_nnan:
@@ -734,7 +734,7 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
- %1 = tail call double @llvm.minimum.f64(double %x, double %y)
+ %1 = tail call nnan double @llvm.minimum.f64(double %x, double %y)
ret double %1
}
@@ -1068,7 +1068,7 @@ define <2 x double> @test_fminimum_vector(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-signed-zeros-fp-math"="true" {
; SSE2-LABEL: test_fmaximum_vector:
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
@@ -1088,7 +1088,7 @@ define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nan
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
+ %r = call nnan <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %r
}
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
index 29486dac374ad..69e336da8ea1b 100644
--- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -102,7 +102,7 @@ define float @test_fmaximumnum(float %x, float %y) nounwind {
ret float %1
}
-define <4 x float> @test_fmaximumnum_scalarize(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <4 x float> @test_fmaximumnum_scalarize(<4 x float> %x, <4 x float> %y) "no-signed-zeros-fp-math"="true" {
; SSE2-LABEL: test_fmaximumnum_scalarize:
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
@@ -122,7 +122,7 @@ define <4 x float> @test_fmaximumnum_scalarize(<4 x float> %x, <4 x float> %y) "
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
+ %r = call nnan <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %r
}
@@ -549,7 +549,7 @@ define float @test_fminimumnum(float %x, float %y) nounwind {
ret float %1
}
-define <2 x double> @test_fminimumnum_scalarize(<2 x double> %x, <2 x double> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <2 x double> @test_fminimumnum_scalarize(<2 x double> %x, <2 x double> %y) "no-signed-zeros-fp-math"="true" {
; SSE2-LABEL: test_fminimumnum_scalarize:
; SSE2: # %bb.0:
; SSE2-NEXT: minpd %xmm1, %xmm0
@@ -569,7 +569,7 @@ define <2 x double> @test_fminimumnum_scalarize(<2 x double> %x, <2 x double> %y
; X86: # %bb.0:
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y)
+ %r = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %r
}
@@ -618,7 +618,7 @@ define float @test_fminimumnum_nan1(float %x, float %y) {
ret float %1
}
-define double @test_fminimumnum_nnan(double %x, double %y) "no-nans-fp-math"="true" nounwind {
+define double @test_fminimumnum_nnan(double %x, double %y) nounwind {
; SSE2-LABEL: test_fminimumnum_nnan:
; SSE2: # %bb.0:
; SSE2-NEXT: movapd %xmm0, %xmm2
@@ -643,12 +643,12 @@ define double @test_fminimumnum_nnan(double %x, double %y) "no-nans-fp-math"="tr
;
; AVX512DQ-LABEL: test_fminimumnum_nnan:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vfpclasssd $5, %xmm1, %k0 # k0 = isQuietNaN(xmm1) | isNegativeZero(xmm1)
+; AVX512DQ-NEXT: vfpclasssd $5, %xmm0, %k0 # k0 = isQuietNaN(xmm0) | isNegativeZero(xmm0)
; AVX512DQ-NEXT: kmovw %k0, %k1
-; AVX512DQ-NEXT: vmovapd %xmm0, %xmm2
-; AVX512DQ-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
-; AVX512DQ-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1}
-; AVX512DQ-NEXT: vminsd %xmm2, %xmm1, %xmm0
+; AVX512DQ-NEXT: vmovapd %xmm1, %xmm2
+; AVX512DQ-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; AVX512DQ-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; AVX512DQ-NEXT: vminsd %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512BF16-LABEL: test_fminimumnum_nnan:
@@ -677,7 +677,7 @@ define double @test_fminimumnum_nnan(double %x, double %y) "no-nans-fp-math"="tr
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
- %1 = tail call double @llvm.minimumnum.f64(double %x, double %y)
+ %1 = tail call nnan double @llvm.minimumnum.f64(double %x, double %y)
ret double %1
}
@@ -959,7 +959,7 @@ define <2 x double> @test_fminimumnum_vector(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-define <4 x float> @test_fmaximumnum_vector(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <4 x float> @test_fmaximumnum_vector(<4 x float> %x, <4 x float> %y) "no-signed-zeros-fp-math"="true" {
; SSE2-LABEL: test_fmaximumnum_vector:
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
@@ -979,7 +979,7 @@ define <4 x float> @test_fmaximumnum_vector(<4 x float> %x, <4 x float> %y) "no-
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
+ %r = call nnan <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %r
}
More information about the llvm-commits
mailing list