[llvm] [ConstantFolding] Fix incorrect nvvm_round folding (PR #151563)

Fri Aug 1 02:02:45 PDT 2025

https://github.com/LewisCrawford updated https://github.com/llvm/llvm-project/pull/151563

>From 40d23466994d2c262ed8025368baba8314814c20 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Thu, 31 Jul 2025 17:19:15 +0000
Subject: [PATCH 1/2] [ConstantFolding] Fix incorrect nvvm_round folding

The nvvm_round intrinsic should round to the nearest even
number in the case of ties. It matches the semantics of rint
(not round as the name suggests).
---
 llvm/lib/Analysis/ConstantFolding.cpp         |  4 +-
 .../const-fold-nvvm-unary-arithmetic.ll       | 48 +++++++++++++++++++
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 2d52f3440938a..5e8d24b056245 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2679,11 +2679,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
       case Intrinsic::nvvm_round_ftz_f:
       case Intrinsic::nvvm_round_f:
       case Intrinsic::nvvm_round_d: {
-        // Use APFloat implementation instead of native libm call, as some
-        // implementations (e.g. on PPC) do not preserve the sign of negative 0.
         bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
         auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
-        V.roundToIntegral(APFloat::rmNearestTiesToAway);
+        V.roundToIntegral(APFloat::rmNearestTiesToEven);
         return ConstantFP::get(Ty->getContext(), V);
       }
 
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
index 75b850978b75a..6eed7f8f1a2ae 100644
--- a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
@@ -416,6 +416,54 @@ define float @test_round_ftz_f_neg_1_5() {
   ret float %res
 }
 
+define double @test_round_d_2_5() {
+; CHECK-LABEL: define double @test_round_d_2_5() {
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+  %res = call double @llvm.nvvm.round.d(double 2.5)
+  ret double %res
+}
+
+define float @test_round_f_2_5() {
+; CHECK-LABEL: define float @test_round_f_2_5() {
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %res = call float @llvm.nvvm.round.f(float 2.5)
+  ret float %res
+}
+
+define float @test_round_ftz_f_2_5() {
+; CHECK-LABEL: define float @test_round_ftz_f_2_5() {
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %res = call float @llvm.nvvm.round.ftz.f(float 2.5)
+  ret float %res
+}
+
+define double @test_round_d_neg_2_5() {
+; CHECK-LABEL: define double @test_round_d_neg_2_5() {
+; CHECK-NEXT:    ret double -2.000000e+00
+;
+  %res = call double @llvm.nvvm.round.d(double -2.5)
+  ret double %res
+}
+
+define float @test_round_f_neg_2_5() {
+; CHECK-LABEL: define float @test_round_f_neg_2_5() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.round.f(float -2.5)
+  ret float %res
+}
+
+define float @test_round_ftz_f_neg_2_5() {
+; CHECK-LABEL: define float @test_round_ftz_f_neg_2_5() {
+; CHECK-NEXT:    ret float -2.000000e+00
+;
+  %res = call float @llvm.nvvm.round.ftz.f(float -2.5)
+  ret float %res
+}
+
 define double @test_round_d_neg_subnorm() {
 ; CHECK-LABEL: define double @test_round_d_neg_subnorm() {
 ; CHECK-NEXT:    ret double -0.000000e+00

>From 0b6369907b98fd6a14a17aa3481de6294338d850 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 1 Aug 2025 09:02:02 +0000
Subject: [PATCH 2/2] Add comment about cvt.rni

---
 llvm/lib/Analysis/ConstantFolding.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 5e8d24b056245..dd98b62baca33 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2679,6 +2679,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
       case Intrinsic::nvvm_round_ftz_f:
       case Intrinsic::nvvm_round_f:
       case Intrinsic::nvvm_round_d: {
+        // nvvm_round is lowered to PTX cvt.rni, which will round to nearest
+        // integer, choosing even integer if source is equidistant between two
+        // integers, so the semantics are closer to "rint" rather than "round".
         bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
         auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
         V.roundToIntegral(APFloat::rmNearestTiesToEven);