[llvm] [SelectionDAG] Remove `NoNaNsFPMath` uses (PR #183448)

via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 8 18:59:20 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: None (paperchalice)

<details>
<summary>Changes</summary>

This pr removes the rest uses in LLVMCodeGen.

---

Patch is 41.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/183448.diff


15 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+1-1) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-1) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+5-3) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+2-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-bf16-converts.ll (+71-40) 
- (modified) llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll (+112-68) 
- (modified) llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/fmax3.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/fmin3.ll (+4-4) 
- (modified) llvm/test/CodeGen/PowerPC/scalar_cmp.ll (+10-12) 
- (modified) llvm/test/CodeGen/RISCV/float-maximum-minimum.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/half-maximum-minimum.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll (+15-15) 
- (modified) llvm/test/CodeGen/X86/fminimum-fmaximum.ll (+13-13) 
- (modified) llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll (+13-13) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4ec771d7fd41f..1c5b2d00fe83c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5992,7 +5992,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
   assert(!DemandedElts.isZero() && "No demanded elements");
 
   // If we're told that NaNs won't happen, assume they won't.
-  if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
+  if (Op->getFlags().hasNoNaNs())
     return true;
 
   if (Depth >= MaxRecursionDepth)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dc5a3736ecaa1..fc9cc95680cb4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4824,6 +4824,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
   SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
   EVT SrcVT = SrcVal.getValueType();
   bool Trunc = Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
+  SDNodeFlags Flags = Op->getFlags();
 
   if (VT.isScalableVector()) {
     // Let common code split the operation.
@@ -4848,7 +4849,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
       Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
 
       // Set the quiet bit.
-      if (!DAG.isKnownNeverSNaN(SrcVal))
+      if (!DAG.isKnownNeverSNaN(SrcVal) && !Flags.hasNoNaNs())
         NaN = DAG.getNode(ISD::OR, DL, I32, Narrow, ImmV(0x400000));
     } else if (SrcVT == MVT::nxv2f64 &&
                (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1ee43ab8d8172..932d6a5841aab 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15397,7 +15397,8 @@ static ConstantFPSDNode *getSplatConstantFP(SDValue Op) {
 
 SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
                                                   const SDLoc &SL, SDValue Op0,
-                                                  SDValue Op1) const {
+                                                  SDValue Op1,
+                                                  bool IsKnownNoNaNs) const {
   ConstantFPSDNode *K1 = getSplatConstantFP(Op1);
   if (!K1)
     return SDValue();
@@ -15454,7 +15455,7 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
     // then give the other result, which is different from med3 with a NaN
     // input.
     SDValue Var = Op0.getOperand(0);
-    if (!DAG.isKnownNeverSNaN(Var))
+    if (!IsKnownNoNaNs && !DAG.isKnownNeverSNaN(Var))
       return SDValue();
 
     const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
@@ -15572,7 +15573,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
        (VT == MVT::v2bf16 && Subtarget->hasBF16PackedInsts()) ||
        (VT == MVT::v2f16 && Subtarget->hasVOP3PInsts())) &&
       Op0.hasOneUse()) {
-    if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
+    if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1,
+                                              N->getFlags().hasNoNaNs()))
       return Res;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 968e11b104abd..fc6f70968a92d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -218,7 +218,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
-                                  SDValue Op0, SDValue Op1) const;
+                                  SDValue Op0, SDValue Op1,
+                                  bool IsKnownNoNaNs) const;
   SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
                                    SDValue Src, SDValue MinVal, SDValue MaxVal,
                                    bool Signed) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll b/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
index 120ab7cc4552e..ae2bd6f18b951 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-converts.ll
@@ -1,8 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve                          < %s | FileCheck %s --check-prefixes=CHECK,NOBF16
-; RUN: llc -mattr=+sve --enable-no-nans-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NOBF16NNAN
-; RUN: llc -mattr=+sve,+bf16                    < %s | FileCheck %s --check-prefixes=CHECK,BF16
-; RUN: llc -mattr=+sme -force-streaming         < %s | FileCheck %s --check-prefixes=CHECK,BF16
+; RUN: llc -mattr=+sve                  < %s | FileCheck %s --check-prefixes=CHECK,NOBF16
+; RUN: llc -mattr=+sve,+bf16            < %s | FileCheck %s --check-prefixes=CHECK,BF16
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,BF16
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -109,16 +108,6 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16(<vscale x 2 x float> %
 ; NOBF16-NEXT:    lsr z0.s, z0.s, #16
 ; NOBF16-NEXT:    ret
 ;
-; NOBF16NNAN-LABEL: fptrunc_nxv2f32_to_nxv2bf16:
-; NOBF16NNAN:       // %bb.0:
-; NOBF16NNAN-NEXT:    mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT:    lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT:    and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT:    add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT:    add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT:    lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT:    ret
-;
 ; BF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16:
 ; BF16:       // %bb.0:
 ; BF16-NEXT:    ptrue p0.d
@@ -128,6 +117,26 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16(<vscale x 2 x float> %
   ret <vscale x 2 x bfloat> %res
 }
 
+define <vscale x 2 x bfloat> @fptrunc_nxv2f32_to_nxv2bf16_nnan(<vscale x 2 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16_nnan:
+; NOBF16:       // %bb.0:
+; NOBF16-NEXT:    mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT:    lsr z2.s, z0.s, #16
+; NOBF16-NEXT:    and z2.s, z2.s, #0x1
+; NOBF16-NEXT:    add z0.s, z0.s, z1.s
+; NOBF16-NEXT:    add z0.s, z2.s, z0.s
+; NOBF16-NEXT:    lsr z0.s, z0.s, #16
+; NOBF16-NEXT:    ret
+;
+; BF16-LABEL: fptrunc_nxv2f32_to_nxv2bf16_nnan:
+; BF16:       // %bb.0:
+; BF16-NEXT:    ptrue p0.d
+; BF16-NEXT:    bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT:    ret
+  %res = fptrunc nnan <vscale x 2 x float> %a to <vscale x 2 x bfloat>
+  ret <vscale x 2 x bfloat> %res
+}
+
 define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %a) {
 ; NOBF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
 ; NOBF16:       // %bb.0:
@@ -143,16 +152,6 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %
 ; NOBF16-NEXT:    lsr z0.s, z0.s, #16
 ; NOBF16-NEXT:    ret
 ;
-; NOBF16NNAN-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
-; NOBF16NNAN:       // %bb.0:
-; NOBF16NNAN-NEXT:    mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT:    lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT:    and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT:    add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT:    add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT:    lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT:    ret
-;
 ; BF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16:
 ; BF16:       // %bb.0:
 ; BF16-NEXT:    ptrue p0.s
@@ -162,6 +161,26 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16(<vscale x 4 x float> %
   ret <vscale x 4 x bfloat> %res
 }
 
+define <vscale x 4 x bfloat> @fptrunc_nxv4f32_to_nxv4bf16_nnan(<vscale x 4 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16_nnan:
+; NOBF16:       // %bb.0:
+; NOBF16-NEXT:    mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT:    lsr z2.s, z0.s, #16
+; NOBF16-NEXT:    and z2.s, z2.s, #0x1
+; NOBF16-NEXT:    add z0.s, z0.s, z1.s
+; NOBF16-NEXT:    add z0.s, z2.s, z0.s
+; NOBF16-NEXT:    lsr z0.s, z0.s, #16
+; NOBF16-NEXT:    ret
+;
+; BF16-LABEL: fptrunc_nxv4f32_to_nxv4bf16_nnan:
+; BF16:       // %bb.0:
+; BF16-NEXT:    ptrue p0.s
+; BF16-NEXT:    bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT:    ret
+  %res = fptrunc nnan <vscale x 4 x float> %a to <vscale x 4 x bfloat>
+  ret <vscale x 4 x bfloat> %res
+}
+
 define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %a) {
 ; NOBF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
 ; NOBF16:       // %bb.0:
@@ -186,22 +205,6 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %
 ; NOBF16-NEXT:    uzp1 z0.h, z0.h, z1.h
 ; NOBF16-NEXT:    ret
 ;
-; NOBF16NNAN-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
-; NOBF16NNAN:       // %bb.0:
-; NOBF16NNAN-NEXT:    mov z2.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT:    lsr z3.s, z1.s, #16
-; NOBF16NNAN-NEXT:    lsr z4.s, z0.s, #16
-; NOBF16NNAN-NEXT:    and z3.s, z3.s, #0x1
-; NOBF16NNAN-NEXT:    and z4.s, z4.s, #0x1
-; NOBF16NNAN-NEXT:    add z1.s, z1.s, z2.s
-; NOBF16NNAN-NEXT:    add z0.s, z0.s, z2.s
-; NOBF16NNAN-NEXT:    add z1.s, z3.s, z1.s
-; NOBF16NNAN-NEXT:    add z0.s, z4.s, z0.s
-; NOBF16NNAN-NEXT:    lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT:    lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT:    uzp1 z0.h, z0.h, z1.h
-; NOBF16NNAN-NEXT:    ret
-;
 ; BF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16:
 ; BF16:       // %bb.0:
 ; BF16-NEXT:    ptrue p0.s
@@ -212,3 +215,31 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16(<vscale x 8 x float> %
   %res = fptrunc <vscale x 8 x float> %a to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %res
 }
+
+define <vscale x 8 x bfloat> @fptrunc_nxv8f32_to_nxv8bf16_nnan(<vscale x 8 x float> %a) {
+; NOBF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16_nnan:
+; NOBF16:       // %bb.0:
+; NOBF16-NEXT:    mov z2.s, #32767 // =0x7fff
+; NOBF16-NEXT:    lsr z3.s, z1.s, #16
+; NOBF16-NEXT:    lsr z4.s, z0.s, #16
+; NOBF16-NEXT:    and z3.s, z3.s, #0x1
+; NOBF16-NEXT:    and z4.s, z4.s, #0x1
+; NOBF16-NEXT:    add z1.s, z1.s, z2.s
+; NOBF16-NEXT:    add z0.s, z0.s, z2.s
+; NOBF16-NEXT:    add z1.s, z3.s, z1.s
+; NOBF16-NEXT:    add z0.s, z4.s, z0.s
+; NOBF16-NEXT:    lsr z1.s, z1.s, #16
+; NOBF16-NEXT:    lsr z0.s, z0.s, #16
+; NOBF16-NEXT:    uzp1 z0.h, z0.h, z1.h
+; NOBF16-NEXT:    ret
+;
+; BF16-LABEL: fptrunc_nxv8f32_to_nxv8bf16_nnan:
+; BF16:       // %bb.0:
+; BF16-NEXT:    ptrue p0.s
+; BF16-NEXT:    bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT:    bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT:    uzp1 z0.h, z0.h, z1.h
+; BF16-NEXT:    ret
+  %res = fptrunc nnan <vscale x 8 x float> %a to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %res
+}
diff --git a/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll b/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
index ca0a2bf0a4915..c8e60861a859b 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bf16-converts.ll
@@ -1,8 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve2                          < %s | FileCheck %s --check-prefixes=NOBF16
-; RUN: llc -mattr=+sve2 --enable-no-nans-fp-math < %s | FileCheck %s --check-prefixes=NOBF16NNAN
-; RUN: llc -mattr=+sve2,+bf16                    < %s | FileCheck %s --check-prefixes=BF16
-; RUN: llc -mattr=+sme -force-streaming          < %s | FileCheck %s --check-prefixes=BF16
+; RUN: llc -mattr=+sve2                 < %s | FileCheck %s --check-prefixes=NOBF16
+; RUN: llc -mattr=+sve2,+bf16           < %s | FileCheck %s --check-prefixes=BF16
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=BF16
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -22,18 +21,6 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16(<vscale x 2 x double>
 ; NOBF16-NEXT:    lsr z0.s, z0.s, #16
 ; NOBF16-NEXT:    ret
 ;
-; NOBF16NNAN-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
-; NOBF16NNAN:       // %bb.0:
-; NOBF16NNAN-NEXT:    ptrue p0.d
-; NOBF16NNAN-NEXT:    mov z1.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT:    fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT:    lsr z2.s, z0.s, #16
-; NOBF16NNAN-NEXT:    add z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT:    and z2.s, z2.s, #0x1
-; NOBF16NNAN-NEXT:    add z0.s, z2.s, z0.s
-; NOBF16NNAN-NEXT:    lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT:    ret
-;
 ; BF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16:
 ; BF16:       // %bb.0:
 ; BF16-NEXT:    ptrue p0.d
@@ -44,6 +31,29 @@ define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16(<vscale x 2 x double>
   ret <vscale x 2 x bfloat> %res
 }
 
+define <vscale x 2 x bfloat> @fptrunc_nxv2f64_to_nxv2bf16_nnan(<vscale x 2 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16_nnan:
+; NOBF16:       // %bb.0:
+; NOBF16-NEXT:    ptrue p0.d
+; NOBF16-NEXT:    mov z1.s, #32767 // =0x7fff
+; NOBF16-NEXT:    fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT:    lsr z2.s, z0.s, #16
+; NOBF16-NEXT:    add z0.s, z0.s, z1.s
+; NOBF16-NEXT:    and z2.s, z2.s, #0x1
+; NOBF16-NEXT:    add z0.s, z2.s, z0.s
+; NOBF16-NEXT:    lsr z0.s, z0.s, #16
+; NOBF16-NEXT:    ret
+;
+; BF16-LABEL: fptrunc_nxv2f64_to_nxv2bf16_nnan:
+; BF16:       // %bb.0:
+; BF16-NEXT:    ptrue p0.d
+; BF16-NEXT:    fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT:    bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT:    ret
+  %res = fptrunc nnan <vscale x 2 x double> %a to <vscale x 2 x bfloat>
+  ret <vscale x 2 x bfloat> %res
+}
+
 define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double> %a) {
 ; NOBF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
 ; NOBF16:       // %bb.0:
@@ -70,25 +80,6 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double>
 ; NOBF16-NEXT:    uzp1 z0.s, z0.s, z1.s
 ; NOBF16-NEXT:    ret
 ;
-; NOBF16NNAN-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
-; NOBF16NNAN:       // %bb.0:
-; NOBF16NNAN-NEXT:    ptrue p0.d
-; NOBF16NNAN-NEXT:    mov z2.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT:    fcvtx z1.s, p0/m, z1.d
-; NOBF16NNAN-NEXT:    fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT:    lsr z3.s, z1.s, #16
-; NOBF16NNAN-NEXT:    lsr z4.s, z0.s, #16
-; NOBF16NNAN-NEXT:    add z1.s, z1.s, z2.s
-; NOBF16NNAN-NEXT:    add z0.s, z0.s, z2.s
-; NOBF16NNAN-NEXT:    and z3.s, z3.s, #0x1
-; NOBF16NNAN-NEXT:    and z4.s, z4.s, #0x1
-; NOBF16NNAN-NEXT:    add z1.s, z3.s, z1.s
-; NOBF16NNAN-NEXT:    add z0.s, z4.s, z0.s
-; NOBF16NNAN-NEXT:    lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT:    lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT:    uzp1 z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT:    ret
-;
 ; BF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16:
 ; BF16:       // %bb.0:
 ; BF16-NEXT:    ptrue p0.d
@@ -102,6 +93,39 @@ define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16(<vscale x 4 x double>
   ret <vscale x 4 x bfloat> %res
 }
 
+define <vscale x 4 x bfloat> @fptrunc_nxv4f64_to_nxv4bf16_nnan(<vscale x 4 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16_nnan:
+; NOBF16:       // %bb.0:
+; NOBF16-NEXT:    ptrue p0.d
+; NOBF16-NEXT:    mov z2.s, #32767 // =0x7fff
+; NOBF16-NEXT:    fcvtx z1.s, p0/m, z1.d
+; NOBF16-NEXT:    fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT:    lsr z3.s, z1.s, #16
+; NOBF16-NEXT:    lsr z4.s, z0.s, #16
+; NOBF16-NEXT:    add z1.s, z1.s, z2.s
+; NOBF16-NEXT:    add z0.s, z0.s, z2.s
+; NOBF16-NEXT:    and z3.s, z3.s, #0x1
+; NOBF16-NEXT:    and z4.s, z4.s, #0x1
+; NOBF16-NEXT:    add z1.s, z3.s, z1.s
+; NOBF16-NEXT:    add z0.s, z4.s, z0.s
+; NOBF16-NEXT:    lsr z1.s, z1.s, #16
+; NOBF16-NEXT:    lsr z0.s, z0.s, #16
+; NOBF16-NEXT:    uzp1 z0.s, z0.s, z1.s
+; NOBF16-NEXT:    ret
+;
+; BF16-LABEL: fptrunc_nxv4f64_to_nxv4bf16_nnan:
+; BF16:       // %bb.0:
+; BF16-NEXT:    ptrue p0.d
+; BF16-NEXT:    fcvtx z1.s, p0/m, z1.d
+; BF16-NEXT:    fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT:    bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT:    bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT:    uzp1 z0.s, z0.s, z1.s
+; BF16-NEXT:    ret
+  %res = fptrunc nnan <vscale x 4 x double> %a to <vscale x 4 x bfloat>
+  ret <vscale x 4 x bfloat> %res
+}
+
 define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double> %a) {
 ; NOBF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
 ; NOBF16:       // %bb.0:
@@ -148,39 +172,6 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double>
 ; NOBF16-NEXT:    uzp1 z0.h, z0.h, z2.h
 ; NOBF16-NEXT:    ret
 ;
-; NOBF16NNAN-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
-; NOBF16NNAN:       // %bb.0:
-; NOBF16NNAN-NEXT:    ptrue p0.d
-; NOBF16NNAN-NEXT:    mov z4.s, #32767 // =0x7fff
-; NOBF16NNAN-NEXT:    fcvtx z3.s, p0/m, z3.d
-; NOBF16NNAN-NEXT:    fcvtx z2.s, p0/m, z2.d
-; NOBF16NNAN-NEXT:    fcvtx z1.s, p0/m, z1.d
-; NOBF16NNAN-NEXT:    fcvtx z0.s, p0/m, z0.d
-; NOBF16NNAN-NEXT:    lsr z5.s, z3.s, #16
-; NOBF16NNAN-NEXT:    lsr z6.s, z2.s, #16
-; NOBF16NNAN-NEXT:    lsr z7.s, z1.s, #16
-; NOBF16NNAN-NEXT:    lsr z24.s, z0.s, #16
-; NOBF16NNAN-NEXT:    add z3.s, z3.s, z4.s
-; NOBF16NNAN-NEXT:    add z2.s, z2.s, z4.s
-; NOBF16NNAN-NEXT:    add z1.s, z1.s, z4.s
-; NOBF16NNAN-NEXT:    add z0.s, z0.s, z4.s
-; NOBF16NNAN-NEXT:    and z5.s, z5.s, #0x1
-; NOBF16NNAN-NEXT:    and z6.s, z6.s, #0x1
-; NOBF16NNAN-NEXT:    and z7.s, z7.s, #0x1
-; NOBF16NNAN-NEXT:    and z24.s, z24.s, #0x1
-; NOBF16NNAN-NEXT:    add z3.s, z5.s, z3.s
-; NOBF16NNAN-NEXT:    add z2.s, z6.s, z2.s
-; NOBF16NNAN-NEXT:    add z1.s, z7.s, z1.s
-; NOBF16NNAN-NEXT:    add z0.s, z24.s, z0.s
-; NOBF16NNAN-NEXT:    lsr z3.s, z3.s, #16
-; NOBF16NNAN-NEXT:    lsr z2.s, z2.s, #16
-; NOBF16NNAN-NEXT:    lsr z1.s, z1.s, #16
-; NOBF16NNAN-NEXT:    lsr z0.s, z0.s, #16
-; NOBF16NNAN-NEXT:    uzp1 z2.s, z2.s, z3.s
-; NOBF16NNAN-NEXT:    uzp1 z0.s, z0.s, z1.s
-; NOBF16NNAN-NEXT:    uzp1 z0.h, z0.h, z2.h
-; NOBF16NNAN-NEXT:    ret
-;
 ; BF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16:
 ; BF16:       // %bb.0:
 ; BF16-NEXT:    ptrue p0.d
@@ -199,3 +190,56 @@ define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16(<vscale x 8 x double>
   %res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %res
 }
+
+define <vscale x 8 x bfloat> @fptrunc_nxv8f64_to_nxv8bf16_nnan(<vscale x 8 x double> %a) {
+; NOBF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16_nnan:
+; NOBF16:       // %bb.0:
+; NOBF16-NEXT:    ptrue p0.d
+; NOBF16-NEXT:    mov z4.s, #32767 // =0x7fff
+; NOBF16-NEXT:    fcvtx z3.s, p0/m, z3.d
+; NOBF16-NEXT:    fcvtx z2.s, p0/m, z2.d
+; NOBF16-NEXT:    fcvtx z1.s, p0/m, z1.d
+; NOBF16-NEXT:    fcvtx z0.s, p0/m, z0.d
+; NOBF16-NEXT:    lsr z5.s, z3.s, #16
+; NOBF16-NEXT:    lsr z6.s, z2.s, #16
+; NOBF16-NEXT:    lsr z7.s, z1.s, #16
+; NOBF16-NEXT:    lsr z24.s, z0.s, #16
+; NOBF16-NEXT:    add z3.s, z3.s, z4.s
+; NOBF16-NEXT:    add z2.s, z2.s, z4.s
+; NOBF16-NEXT:    add z1.s, z1.s, z4.s
+; NOBF16-NEXT:    add z0.s, z0.s, z4.s
+; NOBF16-NEXT:    and z5.s, z5.s, #0x1
+; NOBF16-NEXT:    and z6.s, z6.s, #0x1
+; NOBF16-NEXT:    and z7.s, z7.s, #0x1
+; NOBF16-NEXT:    and z24.s, z24.s, #0x1
+; NOBF16-NEXT:    add z3.s, z5.s, z3.s
+; NOBF16-NEXT:    add z2.s, z6.s, z2.s
+; NOBF16-NEXT:    add z1.s, z7.s, z1.s
+; NOBF16-NEXT:    add z0.s, z24.s, z0.s
+; NOBF16-NEXT:    lsr z3.s, z3.s, #16
+; NOBF16-NEXT:    lsr z2.s, z2.s, #16
+; NOBF16-NEXT:    lsr z1.s, z1.s, #16
+; NOBF16-NEXT:    lsr z0.s, z0.s, #16
+; NOBF16-NEXT:    uzp1 z2.s, z2.s, z3.s
+; NOBF16-NEXT:    uzp1 z0.s, z0.s, z1.s
+; NOBF16-NEXT:    uzp1 z0.h, z0.h, z2.h
+; NOBF16-NEXT:    ret
+;
+; BF16-LABEL: fptrunc_nxv8f64_to_nxv8bf16_nnan:
+; BF16:       // %bb.0:
+; BF16-NEXT:    ptrue p0.d
+; BF16-NEXT:    fcvtx z3.s, p0/m, z3.d
+; BF16-NEXT:    fcvtx z2.s, p0/m, z2.d
+; BF16-NEXT:    fcvtx z1.s, p0/m, z1.d
+; BF16-NEXT:    fcvtx z0.s, p0/m, z0.d
+; BF16-NEXT:    bfcvt z3.h, p0/m, z3.s
+; BF16-NEXT:    bfcvt z2.h, p0/m, z2.s
+; BF16-NEXT:    bfcvt z1.h, p0/m, z1.s
+; BF16-NEXT:    bfcvt z0.h, p0/m, z0.s
+; BF16-NEXT:    uzp1 z2.s, z2.s, z3.s
+; BF16-NEXT:    uzp1 z0.s, z0.s, z1.s
+; BF16-NEXT:    uzp1 z0.h, z0.h, z2.h
+; BF16-NEXT:    ret
+  %res = fptrunc nnan <vscale x 8 x double> %a to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %res
+}
diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
index 42245e3d7013d..ecafe94d4cd55 100644
--- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
@@ -2445,8 +2445,8 @@ define i1 @test122(double %arg1, double %...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/183448


More information about the llvm-commits mailing list