[llvm] [NVPTX] Don't propagate `ninf` and `nnan` in `lowerFREM` (PR #147125)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 6 18:51:17 PDT 2025
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/147125
>From 7484699439278c2c6232e1d5f23ce07d7c24e00d Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Sat, 5 Jul 2025 13:42:42 +0800
Subject: [PATCH 1/2] [NVPTX] Don't propagate `ninf` and `nnan` in `lowerFREM`
0/0 and 1/0 can produce nan and inf.
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 10 +++++++---
llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll | 11 +++++++++++
2 files changed, 18 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index bb0aeb493ed48..9418ca3bf446c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2793,12 +2793,16 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
EVT Ty = Op.getValueType();
SDNodeFlags Flags = Op->getFlags();
+ // fdiv can still generate inf and nan when nnan and ninf are set.
+ SDNodeFlags NewFlags = Flags;
+ NewFlags.setNoNaNs(false);
+ NewFlags.setNoInfs(false);
SDValue Div = DAG.getNode(ISD::FDIV, DL, Ty, X, Y, Flags);
- SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, Ty, Div, Flags);
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, Ty, Div, NewFlags);
SDValue Mul = DAG.getNode(ISD::FMUL, DL, Ty, Trunc, Y,
- Flags | SDNodeFlags::AllowContract);
+ NewFlags | SDNodeFlags::AllowContract);
SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
- Flags | SDNodeFlags::AllowContract);
+ NewFlags | SDNodeFlags::AllowContract);
if (AllowUnsafeFPMath || Flags.hasNoInfs())
return Sub;
diff --git a/llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll b/llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll
new file mode 100644
index 0000000000000..639f9ab201ad0
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll
@@ -0,0 +1,11 @@
+; RUN: llc %s --stop-after=nvptx-isel -mcpu=sm_60 -o - | FileCheck %s
+
+target triple = "nvptx64-unknown-cuda"
+
+define float @frem_ninf_nnan(float %a, float %b) {
+ ; CHECK: nnan ninf FDIV32rr_prec
+ ; CHECK-NOT: nnan ninf contract FNEGf32
+ ; CHECK: contract FNEGf32
+ %r = frem ninf nnan float %a, %b
+ ret float %r
+}
>From 9b0fec2162167acf5b9eebfa0e2b1f62ea741736 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 7 Jul 2025 09:47:35 +0800
Subject: [PATCH 2/2] address comments
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 4 ++--
llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll | 2 +-
llvm/test/CodeGen/NVPTX/frem.ll | 10 +++++-----
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 9418ca3bf446c..e18c75cd49a95 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2797,14 +2797,14 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
SDNodeFlags NewFlags = Flags;
NewFlags.setNoNaNs(false);
NewFlags.setNoInfs(false);
- SDValue Div = DAG.getNode(ISD::FDIV, DL, Ty, X, Y, Flags);
+ SDValue Div = DAG.getNode(ISD::FDIV, DL, Ty, X, Y, NewFlags);
SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, Ty, Div, NewFlags);
SDValue Mul = DAG.getNode(ISD::FMUL, DL, Ty, Trunc, Y,
NewFlags | SDNodeFlags::AllowContract);
SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
NewFlags | SDNodeFlags::AllowContract);
- if (AllowUnsafeFPMath || Flags.hasNoInfs())
+ if (AllowUnsafeFPMath || (Flags.hasNoInfs() && Flags.hasApproximateFuncs()))
return Sub;
// If Y is infinite, return X
diff --git a/llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll b/llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll
index 639f9ab201ad0..b1d498257b5ad 100644
--- a/llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll
+++ b/llvm/test/CodeGen/NVPTX/frem-ninf-nnan.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s --stop-after=nvptx-isel -mcpu=sm_60 -o - | FileCheck %s
+; RUN: llc %s --stop-after=finalize-isel -mcpu=sm_60 -o - | FileCheck %s
target triple = "nvptx64-unknown-cuda"
diff --git a/llvm/test/CodeGen/NVPTX/frem.ll b/llvm/test/CodeGen/NVPTX/frem.ll
index 5805aed1bebe6..479205cd08119 100644
--- a/llvm/test/CodeGen/NVPTX/frem.ll
+++ b/llvm/test/CodeGen/NVPTX/frem.ll
@@ -147,14 +147,14 @@ define half @frem_f16_ninf(half %a, half %b) {
; NORMAL-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1];
; NORMAL-NEXT: cvt.f32.f16 %r1, %rs2;
; NORMAL-NEXT: cvt.f32.f16 %r2, %rs1;
-; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1;
+; NORMAL-NEXT: div.approx.f32 %r3, %r2, %r1;
; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3;
; NORMAL-NEXT: neg.f32 %r5, %r4;
; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2;
; NORMAL-NEXT: cvt.rn.f16.f32 %rs3, %r6;
; NORMAL-NEXT: st.param.b16 [func_retval0], %rs3;
; NORMAL-NEXT: ret;
- %r = frem ninf half %a, %b
+ %r = frem ninf afn half %a, %b
ret half %r
}
@@ -180,13 +180,13 @@ define float @frem_f32_ninf(float %a, float %b) {
; NORMAL-NEXT: // %bb.0:
; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0];
; NORMAL-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1];
-; NORMAL-NEXT: div.rn.f32 %r3, %r1, %r2;
+; NORMAL-NEXT: div.approx.f32 %r3, %r1, %r2;
; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3;
; NORMAL-NEXT: neg.f32 %r5, %r4;
; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1;
; NORMAL-NEXT: st.param.b32 [func_retval0], %r6;
; NORMAL-NEXT: ret;
- %r = frem ninf float %a, %b
+ %r = frem ninf afn float %a, %b
ret float %r
}
@@ -218,7 +218,7 @@ define double @frem_f64_ninf(double %a, double %b) {
; NORMAL-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1;
; NORMAL-NEXT: st.param.b64 [func_retval0], %rd6;
; NORMAL-NEXT: ret;
- %r = frem ninf double %a, %b
+ %r = frem ninf afn double %a, %b
ret double %r
}
More information about the llvm-commits
mailing list