[llvm] [SelectionDAG] Remove `NoNaNsFPMath` uses (PR #183448)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 18:59:20 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: None (paperchalice)
<details>
<summary>Changes</summary>
This pr removes the rest uses in LLVMCodeGen.
---
Patch is 41.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/183448.diff
15 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+1-1)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-1)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+5-3)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+2-1)
- (modified) llvm/test/CodeGen/AArch64/sve-bf16-converts.ll (+71-40)
- (modified) llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll (+112-68)
- (modified) llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/fmax3.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/fmin3.ll (+4-4)
- (modified) llvm/test/CodeGen/PowerPC/scalar_cmp.ll (+10-12)
- (modified) llvm/test/CodeGen/RISCV/float-maximum-minimum.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/half-maximum-minimum.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll (+15-15)
- (modified) llvm/test/CodeGen/X86/fminimum-fmaximum.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll (+13-13)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4ec771d7fd41f..1c5b2d00fe83c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5992,7 +5992,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
assert(!DemandedElts.isZero() && "No demanded elements");
// If we're told that NaNs won't happen, assume they won't.
- if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
+ if (Op->getFlags().hasNoNaNs())
return true;
if (Depth >= MaxRecursionDepth)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc5a3736ecaa1..fc9cc95680cb4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4824,6 +4824,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
bool Trunc = Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
+ SDNodeFlags Flags = Op->getFlags();
if (VT.isScalableVector()) {
// Let common code split the operation.
@@ -4848,7 +4849,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
// Set the quiet bit.
- if (!DAG.isKnownNeverSNaN(SrcVal))
+ if (!DAG.isKnownNeverSNaN(SrcVal) && !Flags.hasNoNaNs())
NaN = DAG.getNode(ISD::OR, DL, I32, Narrow, ImmV(0x400000));
} else if (SrcVT == MVT::nxv2f64 &&
(Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1ee43ab8d8172..932d6a5841aab 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15397,7 +15397,8 @@ static ConstantFPSDNode *getSplatConstantFP(SDValue Op) {
SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
const SDLoc &SL, SDValue Op0,
- SDValue Op1) const {
+ SDValue Op1,
+ bool IsKnownNoNaNs) const {
ConstantFPSDNode *K1 = getSplatConstantFP(Op1);
if (!K1)
return SDValue();
@@ -15454,7 +15455,7 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
// then give the other result, which is different from med3 with a NaN
// input.
SDValue Var = Op0.getOperand(0);
- if (!DAG.isKnownNeverSNaN(Var))
+ if (!IsKnownNoNaNs && !DAG.isKnownNeverSNaN(Var))
return SDValue();
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
@@ -15572,7 +15573,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
(VT == MVT::v2bf16 && Subtarget->hasBF16PackedInsts()) ||
(VT == MVT::v2f16 && Subtarget->hasVOP3PInsts())) &&
Op0.hasOneUse()) {
- if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
+ if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1,
+ N->getFlags().hasNoNaNs()))
return Res;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 968e11b104abd..fc6f70968a92d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -218,7 +218,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
- SDValue Op0, SDValue Op1) const;
+ SDValue Op0, SDValue Op1,
+ bool IsKnownNoNaNs) const;
SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
SDValue Src, SDValue MinVal, SDValue MaxVal,
bool Signed) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll b/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
index 120ab7cc4552e..ae2bd6f18b951 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
@@ -1,8 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NOBF16
-; RUN: llc -mattr=+sve --enable-no-nans-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NOBF16NNAN
-; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,BF16
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,BF16
+; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NOBF16
+; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,BF16
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,BF16
target triple = "aarch64-unknown-linux-gnu"
@@ -109,16 +108,6 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16(<vscale x 2 x float> %
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv2f32_to_nxv2bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT: and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
@@ -128,6 +117,26 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16(<vscale x 2 x float> %
ret <vscale x 2 x bfloat> %res
}
+define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16_nnan(<vscale x 2 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT: lsr z2.s, z0.s, #16
+; NOBF16-NEXT: and z2.s, z2.s, #0x1
+; NOBF16-NEXT: add z0.s, z0.s, z1.s
+; NOBF16-NEXT: add z0.s, z2.s, z0.s
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.d
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 2 x float> %a to <vscale x 2 x bfloat>
+ ret <vscale x 2 x bfloat> %res
+}
+
define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %a) {
; NOBF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
; NOBF16: // %bb.0:
@@ -143,16 +152,6 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT: and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.s
@@ -162,6 +161,26 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %
ret <vscale x 4 x bfloat> %res
}
+define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16_nnan(<vscale x 4 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT: lsr z2.s, z0.s, #16
+; NOBF16-NEXT: and z2.s, z2.s, #0x1
+; NOBF16-NEXT: add z0.s, z0.s, z1.s
+; NOBF16-NEXT: add z0.s, z2.s, z0.s
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.s
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 4 x float> %a to <vscale x 4 x bfloat>
+ ret <vscale x 4 x bfloat> %res
+}
+
define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %a) {
; NOBF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
; NOBF16: // %bb.0:
@@ -186,22 +205,6 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %
; NOBF16-NEXT: uzp1 z0.h, z0.h, z1.h
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: mov z2.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: lsr z3.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z4.s, z0.s, #16
-; NOBF16NNAN-NEXT: and z3.s, z3.s, #0x1
-; NOBF16NNAN-NEXT: and z4.s, z4.s, #0x1
-; NOBF16NNAN-NEXT: add z1.s, z1.s, z2.s
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z2.s
-; NOBF16NNAN-NEXT: add z1.s, z3.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z4.s, z0.s
-; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: uzp1 z0.h, z0.h, z1.h
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.s
@@ -212,3 +215,31 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %
%res = fptrunc <vscale x 8 x float> %a to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %res
}
+
+define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16_nnan(<vscale x 8 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: mov z2.s, #32767 // =0x7fff
+; NOBF16-NEXT: lsr z3.s, z1.s, #16
+; NOBF16-NEXT: lsr z4.s, z0.s, #16
+; NOBF16-NEXT: and z3.s, z3.s, #0x1
+; NOBF16-NEXT: and z4.s, z4.s, #0x1
+; NOBF16-NEXT: add z1.s, z1.s, z2.s
+; NOBF16-NEXT: add z0.s, z0.s, z2.s
+; NOBF16-NEXT: add z1.s, z3.s, z1.s
+; NOBF16-NEXT: add z0.s, z4.s, z0.s
+; NOBF16-NEXT: lsr z1.s, z1.s, #16
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: uzp1 z0.h, z0.h, z1.h
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.s
+; BF16-NEXT: bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: uzp1 z0.h, z0.h, z1.h
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 8 x float> %a to <vscale x 8 x bfloat>
+ ret <vscale x 8 x bfloat> %res
+}
diff --git a/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll b/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
index ca0a2bf0a4915..c8e60861a859b 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
@@ -1,8 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=NOBF16
-; RUN: llc -mattr=+sve2 --enable-no-nans-fp-math < %s | FileCheck %s --check-prefixes=NOBF16NNAN
-; RUN: llc -mattr=+sve2,+bf16 < %s | FileCheck %s --check-prefixes=BF16
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=BF16
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=NOBF16
+; RUN: llc -mattr=+sve2,+bf16 < %s | FileCheck %s --check-prefixes=BF16
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=BF16
target triple = "aarch64-unknown-linux-gnu"
@@ -22,18 +21,6 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16(<vscale x 2 x double>
; NOBF16-NEXT: lsr z0.s, z0.s, #16
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: ptrue p0.d
-; NOBF16NNAN-NEXT: mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT: lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT: add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
@@ -44,6 +31,29 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16(<vscale x 2 x double>
ret <vscale x 2 x bfloat> %res
}
+define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16_nnan(<vscale x 2 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: ptrue p0.d
+; NOBF16-NEXT: mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT: lsr z2.s, z0.s, #16
+; NOBF16-NEXT: add z0.s, z0.s, z1.s
+; NOBF16-NEXT: and z2.s, z2.s, #0x1
+; NOBF16-NEXT: add z0.s, z2.s, z0.s
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.d
+; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 2 x double> %a to <vscale x 2 x bfloat>
+ ret <vscale x 2 x bfloat> %res
+}
+
define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double> %a) {
; NOBF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
; NOBF16: // %bb.0:
@@ -70,25 +80,6 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double>
; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: ptrue p0.d
-; NOBF16NNAN-NEXT: mov z2.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: fcvtx z1.s, p0/m, z1.d
-; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT: lsr z3.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z4.s, z0.s, #16
-; NOBF16NNAN-NEXT: add z1.s, z1.s, z2.s
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z2.s
-; NOBF16NNAN-NEXT: and z3.s, z3.s, #0x1
-; NOBF16NNAN-NEXT: and z4.s, z4.s, #0x1
-; NOBF16NNAN-NEXT: add z1.s, z3.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z4.s, z0.s
-; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: uzp1 z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
@@ -102,6 +93,39 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double>
ret <vscale x 4 x bfloat> %res
}
+define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16_nnan(<vscale x 4 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: ptrue p0.d
+; NOBF16-NEXT: mov z2.s, #32767 // =0x7fff
+; NOBF16-NEXT: fcvtx z1.s, p0/m, z1.d
+; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT: lsr z3.s, z1.s, #16
+; NOBF16-NEXT: lsr z4.s, z0.s, #16
+; NOBF16-NEXT: add z1.s, z1.s, z2.s
+; NOBF16-NEXT: add z0.s, z0.s, z2.s
+; NOBF16-NEXT: and z3.s, z3.s, #0x1
+; NOBF16-NEXT: and z4.s, z4.s, #0x1
+; NOBF16-NEXT: add z1.s, z3.s, z1.s
+; NOBF16-NEXT: add z0.s, z4.s, z0.s
+; NOBF16-NEXT: lsr z1.s, z1.s, #16
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.d
+; BF16-NEXT: fcvtx z1.s, p0/m, z1.d
+; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT: bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: uzp1 z0.s, z0.s, z1.s
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 4 x double> %a to <vscale x 4 x bfloat>
+ ret <vscale x 4 x bfloat> %res
+}
+
define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double> %a) {
; NOBF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
; NOBF16: // %bb.0:
@@ -148,39 +172,6 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double>
; NOBF16-NEXT: uzp1 z0.h, z0.h, z2.h
; NOBF16-NEXT: ret
;
-; NOBF16NNAN-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
-; NOBF16NNAN: // %bb.0:
-; NOBF16NNAN-NEXT: ptrue p0.d
-; NOBF16NNAN-NEXT: mov z4.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT: fcvtx z3.s, p0/m, z3.d
-; NOBF16NNAN-NEXT: fcvtx z2.s, p0/m, z2.d
-; NOBF16NNAN-NEXT: fcvtx z1.s, p0/m, z1.d
-; NOBF16NNAN-NEXT: fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT: lsr z5.s, z3.s, #16
-; NOBF16NNAN-NEXT: lsr z6.s, z2.s, #16
-; NOBF16NNAN-NEXT: lsr z7.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z24.s, z0.s, #16
-; NOBF16NNAN-NEXT: add z3.s, z3.s, z4.s
-; NOBF16NNAN-NEXT: add z2.s, z2.s, z4.s
-; NOBF16NNAN-NEXT: add z1.s, z1.s, z4.s
-; NOBF16NNAN-NEXT: add z0.s, z0.s, z4.s
-; NOBF16NNAN-NEXT: and z5.s, z5.s, #0x1
-; NOBF16NNAN-NEXT: and z6.s, z6.s, #0x1
-; NOBF16NNAN-NEXT: and z7.s, z7.s, #0x1
-; NOBF16NNAN-NEXT: and z24.s, z24.s, #0x1
-; NOBF16NNAN-NEXT: add z3.s, z5.s, z3.s
-; NOBF16NNAN-NEXT: add z2.s, z6.s, z2.s
-; NOBF16NNAN-NEXT: add z1.s, z7.s, z1.s
-; NOBF16NNAN-NEXT: add z0.s, z24.s, z0.s
-; NOBF16NNAN-NEXT: lsr z3.s, z3.s, #16
-; NOBF16NNAN-NEXT: lsr z2.s, z2.s, #16
-; NOBF16NNAN-NEXT: lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT: lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT: uzp1 z2.s, z2.s, z3.s
-; NOBF16NNAN-NEXT: uzp1 z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT: uzp1 z0.h, z0.h, z2.h
-; NOBF16NNAN-NEXT: ret
-;
; BF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
; BF16: // %bb.0:
; BF16-NEXT: ptrue p0.d
@@ -199,3 +190,56 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double>
%res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %res
}
+
+define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16_nnan(<vscale x 8 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16_nnan:
+; NOBF16: // %bb.0:
+; NOBF16-NEXT: ptrue p0.d
+; NOBF16-NEXT: mov z4.s, #32767 // =0x7fff
+; NOBF16-NEXT: fcvtx z3.s, p0/m, z3.d
+; NOBF16-NEXT: fcvtx z2.s, p0/m, z2.d
+; NOBF16-NEXT: fcvtx z1.s, p0/m, z1.d
+; NOBF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT: lsr z5.s, z3.s, #16
+; NOBF16-NEXT: lsr z6.s, z2.s, #16
+; NOBF16-NEXT: lsr z7.s, z1.s, #16
+; NOBF16-NEXT: lsr z24.s, z0.s, #16
+; NOBF16-NEXT: add z3.s, z3.s, z4.s
+; NOBF16-NEXT: add z2.s, z2.s, z4.s
+; NOBF16-NEXT: add z1.s, z1.s, z4.s
+; NOBF16-NEXT: add z0.s, z0.s, z4.s
+; NOBF16-NEXT: and z5.s, z5.s, #0x1
+; NOBF16-NEXT: and z6.s, z6.s, #0x1
+; NOBF16-NEXT: and z7.s, z7.s, #0x1
+; NOBF16-NEXT: and z24.s, z24.s, #0x1
+; NOBF16-NEXT: add z3.s, z5.s, z3.s
+; NOBF16-NEXT: add z2.s, z6.s, z2.s
+; NOBF16-NEXT: add z1.s, z7.s, z1.s
+; NOBF16-NEXT: add z0.s, z24.s, z0.s
+; NOBF16-NEXT: lsr z3.s, z3.s, #16
+; NOBF16-NEXT: lsr z2.s, z2.s, #16
+; NOBF16-NEXT: lsr z1.s, z1.s, #16
+; NOBF16-NEXT: lsr z0.s, z0.s, #16
+; NOBF16-NEXT: uzp1 z2.s, z2.s, z3.s
+; NOBF16-NEXT: uzp1 z0.s, z0.s, z1.s
+; NOBF16-NEXT: uzp1 z0.h, z0.h, z2.h
+; NOBF16-NEXT: ret
+;
+; BF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16_nnan:
+; BF16: // %bb.0:
+; BF16-NEXT: ptrue p0.d
+; BF16-NEXT: fcvtx z3.s, p0/m, z3.d
+; BF16-NEXT: fcvtx z2.s, p0/m, z2.d
+; BF16-NEXT: fcvtx z1.s, p0/m, z1.d
+; BF16-NEXT: fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT: bfcvt z3.h, p0/m, z3.s
+; BF16-NEXT: bfcvt z2.h, p0/m, z2.s
+; BF16-NEXT: bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT: bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT: uzp1 z2.s, z2.s, z3.s
+; BF16-NEXT: uzp1 z0.s, z0.s, z1.s
+; BF16-NEXT: uzp1 z0.h, z0.h, z2.h
+; BF16-NEXT: ret
+ %res = fptrunc nnan <vscale x 8 x double> %a to <vscale x 8 x bfloat>
+ ret <vscale x 8 x bfloat> %res
+}
diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
index 42245e3d7013d..ecafe94d4cd55 100644
--- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
@@ -2445,8 +2445,8 @@ define i1 @test122(double %arg1, double %...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/183448
More information about the llvm-commits
mailing list