[llvm] [ConstantFolding] Fix incorrect nvvm_round folding (PR #151563)
Lewis Crawford via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 1 02:02:45 PDT 2025
https://github.com/LewisCrawford updated https://github.com/llvm/llvm-project/pull/151563
>From 40d23466994d2c262ed8025368baba8314814c20 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Thu, 31 Jul 2025 17:19:15 +0000
Subject: [PATCH 1/2] [ConstantFolding] Fix incorrect nvvm_round folding
The nvvm_round intrinsic should round to the nearest even
number in the case of ties. It matches the semantics of rint
(not round as the name suggests).
---
llvm/lib/Analysis/ConstantFolding.cpp | 4 +-
.../const-fold-nvvm-unary-arithmetic.ll | 48 +++++++++++++++++++
2 files changed, 49 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 2d52f3440938a..5e8d24b056245 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2679,11 +2679,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::nvvm_round_ftz_f:
case Intrinsic::nvvm_round_f:
case Intrinsic::nvvm_round_d: {
- // Use APFloat implementation instead of native libm call, as some
- // implementations (e.g. on PPC) do not preserve the sign of negative 0.
bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
- V.roundToIntegral(APFloat::rmNearestTiesToAway);
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
return ConstantFP::get(Ty->getContext(), V);
}
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
index 75b850978b75a..6eed7f8f1a2ae 100644
--- a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
@@ -416,6 +416,54 @@ define float @test_round_ftz_f_neg_1_5() {
ret float %res
}
+define double @test_round_d_2_5() {
+; CHECK-LABEL: define double @test_round_d_2_5() {
+; CHECK-NEXT: ret double 2.000000e+00
+;
+ %res = call double @llvm.nvvm.round.d(double 2.5)
+ ret double %res
+}
+
+define float @test_round_f_2_5() {
+; CHECK-LABEL: define float @test_round_f_2_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.f(float 2.5)
+ ret float %res
+}
+
+define float @test_round_ftz_f_2_5() {
+; CHECK-LABEL: define float @test_round_ftz_f_2_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.ftz.f(float 2.5)
+ ret float %res
+}
+
+define double @test_round_d_neg_2_5() {
+; CHECK-LABEL: define double @test_round_d_neg_2_5() {
+; CHECK-NEXT: ret double -2.000000e+00
+;
+ %res = call double @llvm.nvvm.round.d(double -2.5)
+ ret double %res
+}
+
+define float @test_round_f_neg_2_5() {
+; CHECK-LABEL: define float @test_round_f_neg_2_5() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.f(float -2.5)
+ ret float %res
+}
+
+define float @test_round_ftz_f_neg_2_5() {
+; CHECK-LABEL: define float @test_round_ftz_f_neg_2_5() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.ftz.f(float -2.5)
+ ret float %res
+}
+
define double @test_round_d_neg_subnorm() {
; CHECK-LABEL: define double @test_round_d_neg_subnorm() {
; CHECK-NEXT: ret double -0.000000e+00
>From 0b6369907b98fd6a14a17aa3481de6294338d850 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 1 Aug 2025 09:02:02 +0000
Subject: [PATCH 2/2] Add comment about cvt.rni
---
llvm/lib/Analysis/ConstantFolding.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 5e8d24b056245..dd98b62baca33 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2679,6 +2679,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::nvvm_round_ftz_f:
case Intrinsic::nvvm_round_f:
case Intrinsic::nvvm_round_d: {
+ // nvvm_round is lowered to PTX cvt.rni, which will round to nearest
+ // integer, choosing even integer if source is equidistant between two
+ // integers, so the semantics are closer to "rint" rather than "round".
bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
V.roundToIntegral(APFloat::rmNearestTiesToEven);
More information about the llvm-commits
mailing list