[llvm] [NVPTX] Constant-folding for f2i, d2ui, f2ll etc. (PR #118965)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 6 05:09:20 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
@llvm/pr-subscribers-llvm-analysis
Author: Lewis Crawford (LewisCrawford)
<details>
<summary>Changes</summary>
Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.
---
Patch is 77.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118965.diff
3 Files Affected:
- (modified) llvm/lib/Analysis/ConstantFolding.cpp (+265)
- (added) llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll (+1129)
- (added) llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll (+1129)
``````````diff
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index efbccee76f2c51..2806c29462fa31 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
@@ -1678,6 +1679,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::x86_avx512_cvttsd2usi64:
return !Call->isStrictFP();
+ // NVVM float/double to int32/uint32 conversion intrinsics
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_d2ui_rm:
+ case Intrinsic::nvvm_d2ui_rn:
+ case Intrinsic::nvvm_d2ui_rp:
+ case Intrinsic::nvvm_d2ui_rz:
+
+ // NVVM float/double to int64/uint64 conversion intrinsics
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_d2ull_rm:
+ case Intrinsic::nvvm_d2ull_rn:
+ case Intrinsic::nvvm_d2ull_rp:
+ case Intrinsic::nvvm_d2ull_rz:
+
// Sign operations are actually bitwise operations, they do not raise
// exceptions even for SNANs.
case Intrinsic::fabs:
@@ -1840,6 +1893,13 @@ inline bool llvm_fenv_testexcept() {
return false;
}
+static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) {
+ if (V.isDenormal())
+ return APFloat::getZero(Ty->getFltSemantics(), V.isNegative());
+
+ return V;
+}
+
Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
Type *Ty) {
llvm_fenv_clearexcept();
@@ -2300,6 +2360,211 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), U);
}
+ // NVVM float/double to signed/unsigned int32/int64 conversions:
+ switch (IntrinsicID) {
+ // f2i
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ // f2ui
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ // d2i
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ // d2ui
+ case Intrinsic::nvvm_d2ui_rm:
+ case Intrinsic::nvvm_d2ui_rn:
+ case Intrinsic::nvvm_d2ui_rp:
+ case Intrinsic::nvvm_d2ui_rz:
+ // f2ll
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ // f2ull
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ // d2ll
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ // d2ull
+ case Intrinsic::nvvm_d2ull_rm:
+ case Intrinsic::nvvm_d2ull_rn:
+ case Intrinsic::nvvm_d2ull_rp:
+ case Intrinsic::nvvm_d2ull_rz: {
+ // In float-to-integer conversion, NaN inputs are converted to 0.
+ if (U.isNaN())
+ return ConstantInt::get(Ty, 0);
+
+ APFloat::roundingMode RMode = APFloat::roundingMode::Invalid;
+ switch (IntrinsicID) {
+ // i_rm
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2ui_rm:
+ // ll_rm
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ull_rm:
+ RMode = APFloat::rmTowardNegative;
+ break;
+
+ // i_rn
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2ui_rn:
+ // ll_rn
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ull_rn:
+ RMode = APFloat::rmNearestTiesToEven;
+ break;
+
+ // i_rp
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2ui_rp:
+ // ll_rp
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ull_rp:
+ RMode = APFloat::rmTowardPositive;
+ break;
+
+ // i_rz
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_d2ui_rz:
+ // ll_rz
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_d2ull_rz:
+ RMode = APFloat::rmTowardZero;
+ break;
+ default:
+ llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
+ }
+ assert(RM != APFloat::roundingMode::Invalid);
+
+ bool IsFTZ = false;
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ IsFTZ = true;
+ break;
+ }
+
+ bool IsSigned = false;
+ switch (IntrinsicID) {
+ // f2i
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ // d2i
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ // f2ll
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ // d2ll
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ IsSigned = true;
+ break;
+ }
+
+ APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
+ auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U;
+
+ bool IsExact = false;
+ APFloat::opStatus Status =
+ FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
+
+ if (Status != APFloat::opInvalidOp)
+ return ConstantInt::get(Ty, ResInt);
+ return nullptr;
+ }
+ }
+
/// We only fold functions with finite arguments. Folding NaN and inf is
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
new file mode 100644
index 00000000000000..543c73137c1b64
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
@@ -0,0 +1,1129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s
+
+; f2i/f2ui and d2i/d2ui - double/float to i32 tests
+
+;###############################################################
+;# Tests with Positive 1.5 #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_f2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_d2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_d2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn(float 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_f2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz(float 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2ui_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2ui |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_d2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2ui.rm(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2ui.rn(double 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_d2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2ui.rp(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2ui.rz(double 1.5)
+ ret i32 %res
+}
+
+;###############################################################
+;# Tests with Negative 1.5 #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float -1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_f2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float -1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float -1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_d2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rm() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rn() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double -1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_d2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rp() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double -1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn(float...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/118965
More information about the llvm-commits
mailing list