[llvm] expandFMINIMUMNUM_FMAXIMUMNUM: Quiet is not needed for NaN vs NaN (PR #139237)

YunQiang Su via llvm-commits llvm-commits at lists.llvm.org
Sun May 25 18:16:00 PDT 2025


================
@@ -1713,25 +1713,15 @@ define bfloat @v_min3_bf16_minimumnum_minimumnum__v_v_v_0(bfloat %a, bfloat %b,
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 16, v0
 ; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, v4, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v1, v0, vcc
-; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v3
-; GFX8-NEXT:    v_bfe_u32 v4, v3, 16, 1
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v3
-; GFX8-NEXT:    s_movk_i32 s4, 0x7fff
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, s4, v4
-; GFX8-NEXT:    v_or_b32_e32 v5, 0x400000, v3
-; GFX8-NEXT:    v_cmp_u_f32_e32 vcc, v3, v3
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
 ; GFX8-NEXT:    s_movk_i32 s4, 0x8000
-; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v1, v0, vcc
----------------
wzssyqa wrote:

```
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ade88a16193b..5e4bd36f96d0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -213,7 +213,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
           ISD::FLOG10,   ISD::FEXP,       ISD::FEXP2,   ISD::FEXP10,
           ISD::FCEIL,    ISD::FTRUNC,     ISD::FRINT,   ISD::FNEARBYINT,
           ISD::FROUND,   ISD::FROUNDEVEN, ISD::FFLOOR,  ISD::FCANONICALIZE,
-          ISD::SETCC}) {
+          ISD::SETCC,    ISD::FMAXIMUMNUM,ISD::FMINIMUMNUM}) {
       // FIXME: The promoted to type shouldn't need to be explicit
       setOperationAction(Opc, MVT::bf16, Promote);
       AddPromotedToType(Opc, MVT::bf16, MVT::f32);
@@ -776,6 +776,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
           Vec16, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand);
     }
+    for (MVT Vec16 :
+         {MVT::v2bf16, MVT::v4bf16, MVT::v8bf16, MVT::v16bf16, MVT::v32bf16}) {
+      setOperationAction({ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, Vec16, Promote);
+    }
   }
 
   if (Subtarget->hasVOP3PInsts()) {
```

I have a try with this patch. It seems making some difference. Since I don't understand AMDGPU well, I don't know whether it is correct.

https://github.com/llvm/llvm-project/pull/139237


More information about the llvm-commits mailing list