[llvm] [NVPTX] Constant-folding for f2i, d2ui, f2ll etc. (PR #118965)
Lewis Crawford via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 13 13:34:29 PST 2024
https://github.com/LewisCrawford updated https://github.com/llvm/llvm-project/pull/118965
>From e520c47ef2066eaf697eb78b65c2c2eb41398a03 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 15 Nov 2024 17:09:38 +0000
Subject: [PATCH 1/6] [NVPTX] Constant-folding for f2i, d2ui, f2ll etc.
Add constant-folding support for the NVVM intrinsics for converting
float/double to signed/unsigned int32/int64 types, including all
rounding-modes and ftz modifiers.
---
llvm/lib/Analysis/ConstantFolding.cpp | 265 ++++
.../InstSimplify/const-fold-nvvm-f2i-d2i.ll | 1129 +++++++++++++++++
.../InstSimplify/const-fold-nvvm-f2ll-d2ll.ll | 1129 +++++++++++++++++
3 files changed, 2523 insertions(+)
create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
create mode 100644 llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 3d5022e5502e28..dbc8ff2cfca343 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
@@ -1687,6 +1688,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::x86_avx512_cvttsd2usi64:
return !Call->isStrictFP();
+ // NVVM float/double to int32/uint32 conversion intrinsics
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_d2ui_rm:
+ case Intrinsic::nvvm_d2ui_rn:
+ case Intrinsic::nvvm_d2ui_rp:
+ case Intrinsic::nvvm_d2ui_rz:
+
+ // NVVM float/double to int64/uint64 conversion intrinsics
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_d2ull_rm:
+ case Intrinsic::nvvm_d2ull_rn:
+ case Intrinsic::nvvm_d2ull_rp:
+ case Intrinsic::nvvm_d2ull_rz:
+
// Sign operations are actually bitwise operations, they do not raise
// exceptions even for SNANs.
case Intrinsic::fabs:
@@ -1849,6 +1902,13 @@ inline bool llvm_fenv_testexcept() {
return false;
}
+static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) {
+ if (V.isDenormal())
+ return APFloat::getZero(Ty->getFltSemantics(), V.isNegative());
+
+ return V;
+}
+
Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
Type *Ty) {
llvm_fenv_clearexcept();
@@ -2309,6 +2369,211 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), U);
}
+ // NVVM float/double to signed/unsigned int32/int64 conversions:
+ switch (IntrinsicID) {
+ // f2i
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ // f2ui
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ // d2i
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ // d2ui
+ case Intrinsic::nvvm_d2ui_rm:
+ case Intrinsic::nvvm_d2ui_rn:
+ case Intrinsic::nvvm_d2ui_rp:
+ case Intrinsic::nvvm_d2ui_rz:
+ // f2ll
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ // f2ull
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ // d2ll
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ // d2ull
+ case Intrinsic::nvvm_d2ull_rm:
+ case Intrinsic::nvvm_d2ull_rn:
+ case Intrinsic::nvvm_d2ull_rp:
+ case Intrinsic::nvvm_d2ull_rz: {
+ // In float-to-integer conversion, NaN inputs are converted to 0.
+ if (U.isNaN())
+ return ConstantInt::get(Ty, 0);
+
+ APFloat::roundingMode RMode = APFloat::roundingMode::Invalid;
+ switch (IntrinsicID) {
+ // i_rm
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2ui_rm:
+ // ll_rm
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ull_rm:
+ RMode = APFloat::rmTowardNegative;
+ break;
+
+ // i_rn
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2ui_rn:
+ // ll_rn
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ull_rn:
+ RMode = APFloat::rmNearestTiesToEven;
+ break;
+
+ // i_rp
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2ui_rp:
+ // ll_rp
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ull_rp:
+ RMode = APFloat::rmTowardPositive;
+ break;
+
+ // i_rz
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_d2ui_rz:
+ // ll_rz
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_d2ull_rz:
+ RMode = APFloat::rmTowardZero;
+ break;
+ default:
+ llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
+ }
+ assert(RM != APFloat::roundingMode::Invalid);
+
+ bool IsFTZ = false;
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ IsFTZ = true;
+ break;
+ }
+
+ bool IsSigned = false;
+ switch (IntrinsicID) {
+ // f2i
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ // d2i
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ // f2ll
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ // d2ll
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ IsSigned = true;
+ break;
+ }
+
+ APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
+ auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U;
+
+ bool IsExact = false;
+ APFloat::opStatus Status =
+ FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
+
+ if (Status != APFloat::opInvalidOp)
+ return ConstantInt::get(Ty, ResInt);
+ return nullptr;
+ }
+ }
+
/// We only fold functions with finite arguments. Folding NaN and inf is
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
new file mode 100644
index 00000000000000..543c73137c1b64
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
@@ -0,0 +1,1129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s
+
+; f2i/f2ui and d2i/d2ui - double/float to i32 tests
+
+;###############################################################
+;# Tests with Positive 1.5 #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_f2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_d2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_d2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn(float 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_f2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz(float 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2ui_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2ui |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_d2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2ui.rm(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2ui.rn(double 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_d2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2ui.rp(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2ui.rz(double 1.5)
+ ret i32 %res
+}
+
+;###############################################################
+;# Tests with Negative 1.5 #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float -1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_f2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float -1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float -1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_d2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rm() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rn() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double -1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_d2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rp() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double -1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn(float -1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_f2ui_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rp() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rp(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2ui_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rz() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rz(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz(float -1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2ui_rm_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm_ftz() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm.ftz(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2ui_rn_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn_ftz() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn.ftz(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2ui_rp_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rp_ftz() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rp.ftz(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2ui_rz_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rz_ftz() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rz.ftz(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float -1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2ui |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_d2ui_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rm(double -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.d2ui.rm(double -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2ui_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rn() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rn(double -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.d2ui.rn(double -1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_d2ui_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rp() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rp(double -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.d2ui.rp(double -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2ui_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rz() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rz(double -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.d2ui.rz(double -1.5)
+ ret i32 %res
+}
+
+;###############################################################
+;# Tests with NaN #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_nan_f2i_rm() {
+; CHECK-LABEL: define i32 @test_nan_f2i_rm() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2i_rn() {
+; CHECK-LABEL: define i32 @test_nan_f2i_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+
+define i32 @test_nan_f2i_rp() {
+; CHECK-LABEL: define i32 @test_nan_f2i_rp() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2i_rz() {
+; CHECK-LABEL: define i32 @test_nan_f2i_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_nan_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_nan_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_nan_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_nan_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_nan_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_nan_d2i_rm() {
+; CHECK-LABEL: define i32 @test_nan_d2i_rm() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double 0xFFF8000000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_d2i_rn() {
+; CHECK-LABEL: define i32 @test_nan_d2i_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double 0xFFF8000000000000)
+ ret i32 %res
+}
+
+
+define i32 @test_nan_d2i_rp() {
+; CHECK-LABEL: define i32 @test_nan_d2i_rp() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double 0xFFF8000000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_d2i_rz() {
+; CHECK-LABEL: define i32 @test_nan_d2i_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double 0xFFF8000000000000)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_nan_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_nan_f2ui_rm() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_nan_f2ui_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+
+define i32 @test_nan_f2ui_rp() {
+; CHECK-LABEL: define i32 @test_nan_f2ui_rp() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2ui_rz() {
+; CHECK-LABEL: define i32 @test_nan_f2ui_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_nan_f2ui_rm_ftz() {
+; CHECK-LABEL: define i32 @test_nan_f2ui_rm_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2ui_rn_ftz() {
+; CHECK-LABEL: define i32 @test_nan_f2ui_rn_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2ui_rp_ftz() {
+; CHECK-LABEL: define i32 @test_nan_f2ui_rp_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_f2ui_rz_ftz() {
+; CHECK-LABEL: define i32 @test_nan_f2ui_rz_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 0x7FFFFF0000000000)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2ui |
+;+-------------------------------------------------------------+
+define i32 @test_nan_d2ui_rm() {
+; CHECK-LABEL: define i32 @test_nan_d2ui_rm() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rm(double 0xFFF8000000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_d2ui_rn() {
+; CHECK-LABEL: define i32 @test_nan_d2ui_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rn(double 0xFFF8000000000000)
+ ret i32 %res
+}
+
+
+define i32 @test_nan_d2ui_rp() {
+; CHECK-LABEL: define i32 @test_nan_d2ui_rp() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rp(double 0xFFF8000000000000)
+ ret i32 %res
+}
+
+define i32 @test_nan_d2ui_rz() {
+; CHECK-LABEL: define i32 @test_nan_d2ui_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rz(double 0xFFF8000000000000)
+ ret i32 %res
+}
+
+;###############################################################
+;# Tests with Positive Subnormal #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_pos_subnormal_f2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rm() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_subnormal_f2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rp() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_pos_subnormal_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_pos_subnormal_d2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_d2i_rm() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double 0x000fffffffffffff)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_d2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_d2i_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double 0x000fffffffffffff)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_subnormal_d2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_d2i_rp() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double 0x000fffffffffffff)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_d2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_d2i_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double 0x000fffffffffffff)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_pos_subnormal_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rm() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_subnormal_f2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rp() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_pos_subnormal_f2ui_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rm_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2ui_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rn_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2ui_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rp_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_f2ui_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_f2ui_rz_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 0x380FFFFFC0000000)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2ui |
+;+-------------------------------------------------------------+
+define i32 @test_pos_subnormal_d2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_d2ui_rm() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rm(double 0x000fffffffffffff)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_d2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_d2ui_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rn(double 0x000fffffffffffff)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_subnormal_d2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_d2ui_rp() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2ui.rp(double 0x000fffffffffffff)
+ ret i32 %res
+}
+
+define i32 @test_pos_subnormal_d2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_subnormal_d2ui_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rz(double 0x000fffffffffffff)
+ ret i32 %res
+}
+
+;###############################################################
+;# Tests with Negative Subnormal #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_neg_subnormal_f2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rm() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_subnormal_f2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rp() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_neg_subnormal_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_neg_subnormal_d2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_d2i_rm() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double 0x800fffffffffffff)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_d2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_d2i_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double 0x800fffffffffffff)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_subnormal_d2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_d2i_rp() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double 0x800fffffffffffff)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_d2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_d2i_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double 0x800fffffffffffff)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_neg_subnormal_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float 0xB80FFFFFC0000000)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_subnormal_f2ui_rp() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rp() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2ui_rz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_neg_subnormal_f2ui_rm_ftz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rm_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2ui_rn_ftz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rn_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2ui_rp_ftz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rp_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_f2ui_rz_ftz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rz_ftz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 0xB80FFFFFC0000000)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2ui |
+;+-------------------------------------------------------------+
+define i32 @test_neg_subnormal_d2ui_rm() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rm(double 0x800FFFFFFFFFFFFF)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.d2ui.rm(double 0x800fffffffffffff)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_d2ui_rn() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rn() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rn(double 0x800fffffffffffff)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_subnormal_d2ui_rp() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rp() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rp(double 0x800fffffffffffff)
+ ret i32 %res
+}
+
+define i32 @test_neg_subnormal_d2ui_rz() {
+; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rz() {
+; CHECK-NEXT: ret i32 0
+;
+ %res = call i32 @llvm.nvvm.d2ui.rz(double 0x800fffffffffffff)
+ ret i32 %res
+}
+
+declare i32 @llvm.nvvm.f2i.rm(float)
+declare i32 @llvm.nvvm.f2i.rn(float)
+declare i32 @llvm.nvvm.f2i.rp(float)
+declare i32 @llvm.nvvm.f2i.rz(float)
+
+declare i32 @llvm.nvvm.f2i.rm.ftz(float)
+declare i32 @llvm.nvvm.f2i.rn.ftz(float)
+declare i32 @llvm.nvvm.f2i.rp.ftz(float)
+declare i32 @llvm.nvvm.f2i.rz.ftz(float)
+
+declare i32 @llvm.nvvm.d2i.rm(double)
+declare i32 @llvm.nvvm.d2i.rn(double)
+declare i32 @llvm.nvvm.d2i.rp(double)
+declare i32 @llvm.nvvm.d2i.rz(double)
+
+
+declare i32 @llvm.nvvm.f2ui.rm(float)
+declare i32 @llvm.nvvm.f2ui.rn(float)
+declare i32 @llvm.nvvm.f2ui.rp(float)
+declare i32 @llvm.nvvm.f2ui.rz(float)
+
+declare i32 @llvm.nvvm.f2ui.rm.ftz(float)
+declare i32 @llvm.nvvm.f2ui.rn.ftz(float)
+declare i32 @llvm.nvvm.f2ui.rp.ftz(float)
+declare i32 @llvm.nvvm.f2ui.rz.ftz(float)
+
+declare i32 @llvm.nvvm.d2ui.rm(double)
+declare i32 @llvm.nvvm.d2ui.rn(double)
+declare i32 @llvm.nvvm.d2ui.rp(double)
+declare i32 @llvm.nvvm.d2ui.rz(double)
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll
new file mode 100644
index 00000000000000..be38177dce2c38
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll
@@ -0,0 +1,1129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s
+
+; f2ll/f2ull and d2ll/d2ull - double/float to i64 tests
+
+;###############################################################
+;# Tests with Positive 1.5 #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2ll |
+;+-------------------------------------------------------------+
+define i64 @test_pos_1_5_f2ll_rm() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rm() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ll_rn() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rn() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn(float 1.5)
+ ret i64 %res
+}
+
+
+define i64 @test_pos_1_5_f2ll_rp() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rp() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ll_rz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rz() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz(float 1.5)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ll_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_pos_1_5_f2ll_rm_ftz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rm_ftz() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ll_rn_ftz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rn_ftz() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ll_rp_ftz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rp_ftz() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ll_rz_ftz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ll_rz_ftz() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 1.5)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ll |
+;+-------------------------------------------------------------+
+define i64 @test_pos_1_5_d2ll_rm() {
+; CHECK-LABEL: define i64 @test_pos_1_5_d2ll_rm() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.d2ll.rm(double 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_d2ll_rn() {
+; CHECK-LABEL: define i64 @test_pos_1_5_d2ll_rn() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.d2ll.rn(double 1.5)
+ ret i64 %res
+}
+
+
+define i64 @test_pos_1_5_d2ll_rp() {
+; CHECK-LABEL: define i64 @test_pos_1_5_d2ll_rp() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.d2ll.rp(double 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_d2ll_rz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_d2ll_rz() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.d2ll.rz(double 1.5)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull |
+;+-------------------------------------------------------------+
+define i64 @test_pos_1_5_f2ull_rm() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rm() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ull_rn() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rn() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn(float 1.5)
+ ret i64 %res
+}
+
+
+define i64 @test_pos_1_5_f2ull_rp() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rp() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ull_rz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rz() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz(float 1.5)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_pos_1_5_f2ull_rm_ftz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rm_ftz() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ull_rn_ftz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rn_ftz() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ull_rp_ftz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rp_ftz() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_f2ull_rz_ftz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_f2ull_rz_ftz() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 1.5)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ull |
+;+-------------------------------------------------------------+
+define i64 @test_pos_1_5_d2ull_rm() {
+; CHECK-LABEL: define i64 @test_pos_1_5_d2ull_rm() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.d2ull.rm(double 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_d2ull_rn() {
+; CHECK-LABEL: define i64 @test_pos_1_5_d2ull_rn() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.d2ull.rn(double 1.5)
+ ret i64 %res
+}
+
+
+define i64 @test_pos_1_5_d2ull_rp() {
+; CHECK-LABEL: define i64 @test_pos_1_5_d2ull_rp() {
+; CHECK-NEXT: ret i64 2
+;
+ %res = call i64 @llvm.nvvm.d2ull.rp(double 1.5)
+ ret i64 %res
+}
+
+define i64 @test_pos_1_5_d2ull_rz() {
+; CHECK-LABEL: define i64 @test_pos_1_5_d2ull_rz() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.d2ull.rz(double 1.5)
+ ret i64 %res
+}
+
+;###############################################################
+;# Tests with Negative 1.5 #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2ll |
+;+-------------------------------------------------------------+
+define i64 @test_neg_1_5_f2ll_rm() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rm() {
+; CHECK-NEXT: ret i64 -2
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ll_rn() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rn() {
+; CHECK-NEXT: ret i64 -2
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn(float -1.5)
+ ret i64 %res
+}
+
+
+define i64 @test_neg_1_5_f2ll_rp() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rp() {
+; CHECK-NEXT: ret i64 -1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ll_rz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rz() {
+; CHECK-NEXT: ret i64 -1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz(float -1.5)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ll_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_neg_1_5_f2ll_rm_ftz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rm_ftz() {
+; CHECK-NEXT: ret i64 -2
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ll_rn_ftz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rn_ftz() {
+; CHECK-NEXT: ret i64 -2
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ll_rp_ftz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rp_ftz() {
+; CHECK-NEXT: ret i64 -1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ll_rz_ftz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ll_rz_ftz() {
+; CHECK-NEXT: ret i64 -1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float -1.5)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ll |
+;+-------------------------------------------------------------+
+define i64 @test_neg_1_5_d2ll_rm() {
+; CHECK-LABEL: define i64 @test_neg_1_5_d2ll_rm() {
+; CHECK-NEXT: ret i64 -2
+;
+ %res = call i64 @llvm.nvvm.d2ll.rm(double -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_d2ll_rn() {
+; CHECK-LABEL: define i64 @test_neg_1_5_d2ll_rn() {
+; CHECK-NEXT: ret i64 -2
+;
+ %res = call i64 @llvm.nvvm.d2ll.rn(double -1.5)
+ ret i64 %res
+}
+
+
+define i64 @test_neg_1_5_d2ll_rp() {
+; CHECK-LABEL: define i64 @test_neg_1_5_d2ll_rp() {
+; CHECK-NEXT: ret i64 -1
+;
+ %res = call i64 @llvm.nvvm.d2ll.rp(double -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_d2ll_rz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_d2ll_rz() {
+; CHECK-NEXT: ret i64 -1
+;
+ %res = call i64 @llvm.nvvm.d2ll.rz(double -1.5)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull |
+;+-------------------------------------------------------------+
+define i64 @test_neg_1_5_f2ull_rm() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm(float -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ull_rn() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rn() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rn(float -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn(float -1.5)
+ ret i64 %res
+}
+
+
+define i64 @test_neg_1_5_f2ull_rp() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rp() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rp(float -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ull_rz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rz() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rz(float -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz(float -1.5)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_neg_1_5_f2ull_rm_ftz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rm_ftz() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm.ftz(float -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ull_rn_ftz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rn_ftz() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rn.ftz(float -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ull_rp_ftz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rp_ftz() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rp.ftz(float -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_f2ull_rz_ftz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rz_ftz() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rz.ftz(float -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float -1.5)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ull |
+;+-------------------------------------------------------------+
+define i64 @test_neg_1_5_d2ull_rm() {
+; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rm(double -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.d2ull.rm(double -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_d2ull_rn() {
+; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rn() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rn(double -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.d2ull.rn(double -1.5)
+ ret i64 %res
+}
+
+
+define i64 @test_neg_1_5_d2ull_rp() {
+; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rp() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rp(double -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.d2ull.rp(double -1.5)
+ ret i64 %res
+}
+
+define i64 @test_neg_1_5_d2ull_rz() {
+; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rz() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rz(double -1.500000e+00)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.d2ull.rz(double -1.5)
+ ret i64 %res
+}
+
+;###############################################################
+;# Tests with NaN #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2ll |
+;+-------------------------------------------------------------+
+define i64 @test_nan_f2ll_rm() {
+; CHECK-LABEL: define i64 @test_nan_f2ll_rm() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ll_rn() {
+; CHECK-LABEL: define i64 @test_nan_f2ll_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+
+define i64 @test_nan_f2ll_rp() {
+; CHECK-LABEL: define i64 @test_nan_f2ll_rp() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ll_rz() {
+; CHECK-LABEL: define i64 @test_nan_f2ll_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ll_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_nan_f2ll_rm_ftz() {
+; CHECK-LABEL: define i64 @test_nan_f2ll_rm_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ll_rn_ftz() {
+; CHECK-LABEL: define i64 @test_nan_f2ll_rn_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ll_rp_ftz() {
+; CHECK-LABEL: define i64 @test_nan_f2ll_rp_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ll_rz_ftz() {
+; CHECK-LABEL: define i64 @test_nan_f2ll_rz_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ll |
+;+-------------------------------------------------------------+
+define i64 @test_nan_d2ll_rm() {
+; CHECK-LABEL: define i64 @test_nan_d2ll_rm() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rm(double 0xFFF8000000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_d2ll_rn() {
+; CHECK-LABEL: define i64 @test_nan_d2ll_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rn(double 0xFFF8000000000000)
+ ret i64 %res
+}
+
+
+define i64 @test_nan_d2ll_rp() {
+; CHECK-LABEL: define i64 @test_nan_d2ll_rp() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rp(double 0xFFF8000000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_d2ll_rz() {
+; CHECK-LABEL: define i64 @test_nan_d2ll_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rz(double 0xFFF8000000000000)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull |
+;+-------------------------------------------------------------+
+define i64 @test_nan_f2ull_rm() {
+; CHECK-LABEL: define i64 @test_nan_f2ull_rm() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ull_rn() {
+; CHECK-LABEL: define i64 @test_nan_f2ull_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+
+define i64 @test_nan_f2ull_rp() {
+; CHECK-LABEL: define i64 @test_nan_f2ull_rp() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ull_rz() {
+; CHECK-LABEL: define i64 @test_nan_f2ull_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_nan_f2ull_rm_ftz() {
+; CHECK-LABEL: define i64 @test_nan_f2ull_rm_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ull_rn_ftz() {
+; CHECK-LABEL: define i64 @test_nan_f2ull_rn_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ull_rp_ftz() {
+; CHECK-LABEL: define i64 @test_nan_f2ull_rp_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_f2ull_rz_ftz() {
+; CHECK-LABEL: define i64 @test_nan_f2ull_rz_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 0x7FFFFF0000000000)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ull |
+;+-------------------------------------------------------------+
+define i64 @test_nan_d2ull_rm() {
+; CHECK-LABEL: define i64 @test_nan_d2ull_rm() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rm(double 0xFFF8000000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_d2ull_rn() {
+; CHECK-LABEL: define i64 @test_nan_d2ull_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rn(double 0xFFF8000000000000)
+ ret i64 %res
+}
+
+
+define i64 @test_nan_d2ull_rp() {
+; CHECK-LABEL: define i64 @test_nan_d2ull_rp() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rp(double 0xFFF8000000000000)
+ ret i64 %res
+}
+
+define i64 @test_nan_d2ull_rz() {
+; CHECK-LABEL: define i64 @test_nan_d2ull_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rz(double 0xFFF8000000000000)
+ ret i64 %res
+}
+
+;###############################################################
+;# Tests with Positive Subnormal #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2ll |
+;+-------------------------------------------------------------+
+define i64 @test_pos_subnormal_f2ll_rm() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rm() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ll_rn() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+
+define i64 @test_pos_subnormal_f2ll_rp() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rp() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ll_rz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ll_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_pos_subnormal_f2ll_rm_ftz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rm_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ll_rn_ftz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rn_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ll_rp_ftz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rp_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ll_rz_ftz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ll_rz_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ll |
+;+-------------------------------------------------------------+
+define i64 @test_pos_subnormal_d2ll_rm() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_d2ll_rm() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rm(double 0x000fffffffffffff)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_d2ll_rn() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_d2ll_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rn(double 0x000fffffffffffff)
+ ret i64 %res
+}
+
+
+define i64 @test_pos_subnormal_d2ll_rp() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_d2ll_rp() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.d2ll.rp(double 0x000fffffffffffff)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_d2ll_rz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_d2ll_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rz(double 0x000fffffffffffff)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull |
+;+-------------------------------------------------------------+
+define i64 @test_pos_subnormal_f2ull_rm() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rm() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ull_rn() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+
+define i64 @test_pos_subnormal_f2ull_rp() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rp() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ull_rz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_pos_subnormal_f2ull_rm_ftz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rm_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ull_rn_ftz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rn_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ull_rp_ftz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rp_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_f2ull_rz_ftz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_f2ull_rz_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 0x380FFFFFC0000000)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ull |
+;+-------------------------------------------------------------+
+define i64 @test_pos_subnormal_d2ull_rm() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_d2ull_rm() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rm(double 0x000fffffffffffff)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_d2ull_rn() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_d2ull_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rn(double 0x000fffffffffffff)
+ ret i64 %res
+}
+
+
+define i64 @test_pos_subnormal_d2ull_rp() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_d2ull_rp() {
+; CHECK-NEXT: ret i64 1
+;
+ %res = call i64 @llvm.nvvm.d2ull.rp(double 0x000fffffffffffff)
+ ret i64 %res
+}
+
+define i64 @test_pos_subnormal_d2ull_rz() {
+; CHECK-LABEL: define i64 @test_pos_subnormal_d2ull_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rz(double 0x000fffffffffffff)
+ ret i64 %res
+}
+
+;###############################################################
+;# Tests with Negative Subnormal #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2ll |
+;+-------------------------------------------------------------+
+define i64 @test_neg_subnormal_f2ll_rm() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rm() {
+; CHECK-NEXT: ret i64 -1
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ll_rn() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+
+define i64 @test_neg_subnormal_f2ll_rp() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rp() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ll_rz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ll_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_neg_subnormal_f2ll_rm_ftz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rm_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ll_rn_ftz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rn_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ll_rp_ftz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rp_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ll_rz_ftz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ll_rz_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ll |
+;+-------------------------------------------------------------+
+define i64 @test_neg_subnormal_d2ll_rm() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_d2ll_rm() {
+; CHECK-NEXT: ret i64 -1
+;
+ %res = call i64 @llvm.nvvm.d2ll.rm(double 0x800fffffffffffff)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_d2ll_rn() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_d2ll_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rn(double 0x800fffffffffffff)
+ ret i64 %res
+}
+
+
+define i64 @test_neg_subnormal_d2ll_rp() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_d2ll_rp() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rp(double 0x800fffffffffffff)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_d2ll_rz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_d2ll_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ll.rz(double 0x800fffffffffffff)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull |
+;+-------------------------------------------------------------+
+define i64 @test_neg_subnormal_f2ull_rm() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm(float 0xB80FFFFFC0000000)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ull_rn() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+
+define i64 @test_neg_subnormal_f2ull_rp() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rp() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ull_rz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ull_ftz |
+;+-------------------------------------------------------------+
+define i64 @test_neg_subnormal_f2ull_rm_ftz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rm_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ull_rn_ftz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rn_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ull_rp_ftz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rp_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_f2ull_rz_ftz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rz_ftz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 0xB80FFFFFC0000000)
+ ret i64 %res
+}
+;+-------------------------------------------------------------+
+;| d2ull |
+;+-------------------------------------------------------------+
+define i64 @test_neg_subnormal_d2ull_rm() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rm(double 0x800FFFFFFFFFFFFF)
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %res = call i64 @llvm.nvvm.d2ull.rm(double 0x800fffffffffffff)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_d2ull_rn() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rn() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rn(double 0x800fffffffffffff)
+ ret i64 %res
+}
+
+
+define i64 @test_neg_subnormal_d2ull_rp() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rp() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rp(double 0x800fffffffffffff)
+ ret i64 %res
+}
+
+define i64 @test_neg_subnormal_d2ull_rz() {
+; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rz() {
+; CHECK-NEXT: ret i64 0
+;
+ %res = call i64 @llvm.nvvm.d2ull.rz(double 0x800fffffffffffff)
+ ret i64 %res
+}
+
+declare i64 @llvm.nvvm.f2ll.rm(float)
+declare i64 @llvm.nvvm.f2ll.rn(float)
+declare i64 @llvm.nvvm.f2ll.rp(float)
+declare i64 @llvm.nvvm.f2ll.rz(float)
+
+declare i64 @llvm.nvvm.f2ll.rm.ftz(float)
+declare i64 @llvm.nvvm.f2ll.rn.ftz(float)
+declare i64 @llvm.nvvm.f2ll.rp.ftz(float)
+declare i64 @llvm.nvvm.f2ll.rz.ftz(float)
+
+declare i64 @llvm.nvvm.d2ll.rm(double)
+declare i64 @llvm.nvvm.d2ll.rn(double)
+declare i64 @llvm.nvvm.d2ll.rp(double)
+declare i64 @llvm.nvvm.d2ll.rz(double)
+
+
+declare i64 @llvm.nvvm.f2ull.rm(float)
+declare i64 @llvm.nvvm.f2ull.rn(float)
+declare i64 @llvm.nvvm.f2ull.rp(float)
+declare i64 @llvm.nvvm.f2ull.rz(float)
+
+declare i64 @llvm.nvvm.f2ull.rm.ftz(float)
+declare i64 @llvm.nvvm.f2ull.rn.ftz(float)
+declare i64 @llvm.nvvm.f2ull.rp.ftz(float)
+declare i64 @llvm.nvvm.f2ull.rz.ftz(float)
+
+declare i64 @llvm.nvvm.d2ull.rm(double)
+declare i64 @llvm.nvvm.d2ull.rn(double)
+declare i64 @llvm.nvvm.d2ull.rp(double)
+declare i64 @llvm.nvvm.d2ull.rz(double)
>From 54524d334c41d2eca93b4a026cb1eed586db7f31 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 13 Dec 2024 13:17:27 +0000
Subject: [PATCH 2/6] Move internal case statements into helper funcs
Also remove unnecessary parameter for FTZPreserveSign.
---
llvm/lib/Analysis/ConstantFolding.cpp | 274 ++++++++++++++------------
1 file changed, 143 insertions(+), 131 deletions(-)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index dbc8ff2cfca343..9ad5207cc52cee 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -291,6 +291,143 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
return ConstantVector::get(Result);
}
+//===----------------------------------------------------------------------===//
+// NVVM-specific internal helper functions
+//===----------------------------------------------------------------------===//
+
+static bool NVVMIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ // Float to i32 / i64 conversion intrinsics:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ return true;
+ }
+ return false;
+}
+
+static bool NVVMIntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ // f2i
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ // d2i
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ // f2ll
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ // d2ll
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ return true;
+ }
+ return false;
+}
+
+static APFloat::roundingMode
+NVVMIntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ // RM:
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2ui_rm:
+
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ull_rm:
+ return APFloat::rmTowardNegative;
+
+ // RN:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2ui_rn:
+
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ull_rn:
+ return APFloat::rmNearestTiesToEven;
+
+ // RP:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2ui_rp:
+
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ull_rp:
+ return APFloat::rmTowardPositive;
+
+ // RZ:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_d2ui_rz:
+
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_d2ull_rz:
+ return APFloat::rmTowardZero;
+ }
+ llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
+ return APFloat::roundingMode::Invalid;
+}
+
} // end anonymous namespace
/// If this constant is a constant offset from a global, return the global and
@@ -1902,10 +2039,9 @@ inline bool llvm_fenv_testexcept() {
return false;
}
-static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) {
+static const APFloat FTZPreserveSign(const APFloat &V) {
if (V.isDenormal())
- return APFloat::getZero(Ty->getFltSemantics(), V.isNegative());
-
+ return APFloat::getZero(V.getSemantics(), V.isNegative());
return V;
}
@@ -2431,138 +2567,14 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (U.isNaN())
return ConstantInt::get(Ty, 0);
- APFloat::roundingMode RMode = APFloat::roundingMode::Invalid;
- switch (IntrinsicID) {
- // i_rm
- case Intrinsic::nvvm_f2i_rm:
- case Intrinsic::nvvm_f2ui_rm:
- case Intrinsic::nvvm_f2i_rm_ftz:
- case Intrinsic::nvvm_f2ui_rm_ftz:
- case Intrinsic::nvvm_d2i_rm:
- case Intrinsic::nvvm_d2ui_rm:
- // ll_rm
- case Intrinsic::nvvm_f2ll_rm:
- case Intrinsic::nvvm_f2ull_rm:
- case Intrinsic::nvvm_f2ll_rm_ftz:
- case Intrinsic::nvvm_f2ull_rm_ftz:
- case Intrinsic::nvvm_d2ll_rm:
- case Intrinsic::nvvm_d2ull_rm:
- RMode = APFloat::rmTowardNegative;
- break;
-
- // i_rn
- case Intrinsic::nvvm_f2i_rn:
- case Intrinsic::nvvm_f2ui_rn:
- case Intrinsic::nvvm_f2i_rn_ftz:
- case Intrinsic::nvvm_f2ui_rn_ftz:
- case Intrinsic::nvvm_d2i_rn:
- case Intrinsic::nvvm_d2ui_rn:
- // ll_rn
- case Intrinsic::nvvm_f2ll_rn:
- case Intrinsic::nvvm_f2ull_rn:
- case Intrinsic::nvvm_f2ll_rn_ftz:
- case Intrinsic::nvvm_f2ull_rn_ftz:
- case Intrinsic::nvvm_d2ll_rn:
- case Intrinsic::nvvm_d2ull_rn:
- RMode = APFloat::rmNearestTiesToEven;
- break;
-
- // i_rp
- case Intrinsic::nvvm_f2i_rp:
- case Intrinsic::nvvm_f2ui_rp:
- case Intrinsic::nvvm_f2i_rp_ftz:
- case Intrinsic::nvvm_f2ui_rp_ftz:
- case Intrinsic::nvvm_d2i_rp:
- case Intrinsic::nvvm_d2ui_rp:
- // ll_rp
- case Intrinsic::nvvm_f2ll_rp:
- case Intrinsic::nvvm_f2ull_rp:
- case Intrinsic::nvvm_f2ll_rp_ftz:
- case Intrinsic::nvvm_f2ull_rp_ftz:
- case Intrinsic::nvvm_d2ll_rp:
- case Intrinsic::nvvm_d2ull_rp:
- RMode = APFloat::rmTowardPositive;
- break;
-
- // i_rz
- case Intrinsic::nvvm_f2i_rz:
- case Intrinsic::nvvm_f2ui_rz:
- case Intrinsic::nvvm_f2i_rz_ftz:
- case Intrinsic::nvvm_f2ui_rz_ftz:
- case Intrinsic::nvvm_d2i_rz:
- case Intrinsic::nvvm_d2ui_rz:
- // ll_rz
- case Intrinsic::nvvm_f2ll_rz:
- case Intrinsic::nvvm_f2ull_rz:
- case Intrinsic::nvvm_f2ll_rz_ftz:
- case Intrinsic::nvvm_f2ull_rz_ftz:
- case Intrinsic::nvvm_d2ll_rz:
- case Intrinsic::nvvm_d2ull_rz:
- RMode = APFloat::rmTowardZero;
- break;
- default:
- llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
- }
+ APFloat::roundingMode RMode = NVVMIntrinsicGetRoundingMode(IntrinsicID);
assert(RM != APFloat::roundingMode::Invalid);
- bool IsFTZ = false;
- switch (IntrinsicID) {
- case Intrinsic::nvvm_f2i_rm_ftz:
- case Intrinsic::nvvm_f2i_rn_ftz:
- case Intrinsic::nvvm_f2i_rp_ftz:
- case Intrinsic::nvvm_f2i_rz_ftz:
- case Intrinsic::nvvm_f2ui_rm_ftz:
- case Intrinsic::nvvm_f2ui_rn_ftz:
- case Intrinsic::nvvm_f2ui_rp_ftz:
- case Intrinsic::nvvm_f2ui_rz_ftz:
- case Intrinsic::nvvm_f2ll_rm_ftz:
- case Intrinsic::nvvm_f2ll_rn_ftz:
- case Intrinsic::nvvm_f2ll_rp_ftz:
- case Intrinsic::nvvm_f2ll_rz_ftz:
- case Intrinsic::nvvm_f2ull_rm_ftz:
- case Intrinsic::nvvm_f2ull_rn_ftz:
- case Intrinsic::nvvm_f2ull_rp_ftz:
- case Intrinsic::nvvm_f2ull_rz_ftz:
- IsFTZ = true;
- break;
- }
-
- bool IsSigned = false;
- switch (IntrinsicID) {
- // f2i
- case Intrinsic::nvvm_f2i_rm:
- case Intrinsic::nvvm_f2i_rm_ftz:
- case Intrinsic::nvvm_f2i_rn:
- case Intrinsic::nvvm_f2i_rn_ftz:
- case Intrinsic::nvvm_f2i_rp:
- case Intrinsic::nvvm_f2i_rp_ftz:
- case Intrinsic::nvvm_f2i_rz:
- case Intrinsic::nvvm_f2i_rz_ftz:
- // d2i
- case Intrinsic::nvvm_d2i_rm:
- case Intrinsic::nvvm_d2i_rn:
- case Intrinsic::nvvm_d2i_rp:
- case Intrinsic::nvvm_d2i_rz:
- // f2ll
- case Intrinsic::nvvm_f2ll_rm:
- case Intrinsic::nvvm_f2ll_rm_ftz:
- case Intrinsic::nvvm_f2ll_rn:
- case Intrinsic::nvvm_f2ll_rn_ftz:
- case Intrinsic::nvvm_f2ll_rp:
- case Intrinsic::nvvm_f2ll_rp_ftz:
- case Intrinsic::nvvm_f2ll_rz:
- case Intrinsic::nvvm_f2ll_rz_ftz:
- // d2ll
- case Intrinsic::nvvm_d2ll_rm:
- case Intrinsic::nvvm_d2ll_rn:
- case Intrinsic::nvvm_d2ll_rp:
- case Intrinsic::nvvm_d2ll_rz:
- IsSigned = true;
- break;
- }
+ bool IsFTZ = NVVMIntrinsicShouldFTZ(IntrinsicID);
+ bool IsSigned = NVVMIntrinsicConvertsToSignedInteger(IntrinsicID);
APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
- auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U;
+ auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
bool IsExact = false;
APFloat::opStatus Status =
>From 108265f807ec6e9c13e98e8cc0ffd5f80665c498 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 13 Dec 2024 16:50:58 +0000
Subject: [PATCH 3/6] Move helper functions into separate file
Move NVVM intrinsic helper functions into NVVMIntrinsicFlags.h
and then rename it NVVMIntrinsicUtils.h.
---
llvm/include/llvm/IR/NVVMIntrinsicFlags.h | 39 ----
llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 173 ++++++++++++++++++
llvm/lib/Analysis/ConstantFolding.cpp | 145 +--------------
.../NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp | 2 +-
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 2 +-
5 files changed, 179 insertions(+), 182 deletions(-)
delete mode 100644 llvm/include/llvm/IR/NVVMIntrinsicFlags.h
create mode 100644 llvm/include/llvm/IR/NVVMIntrinsicUtils.h
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicFlags.h b/llvm/include/llvm/IR/NVVMIntrinsicFlags.h
deleted file mode 100644
index dfb6e857b3a6ad..00000000000000
--- a/llvm/include/llvm/IR/NVVMIntrinsicFlags.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===--- NVVMIntrinsicFlags.h -----------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This file contains the definitions of the enumerations and flags
-/// associated with NVVM Intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_NVVMINTRINSICFLAGS_H
-#define LLVM_IR_NVVMINTRINSICFLAGS_H
-
-#include <stdint.h>
-
-namespace llvm {
-namespace nvvm {
-
-// Reduction Ops supported with TMA Copy from Shared
-// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
-// family of PTX instructions.
-enum class TMAReductionOp : uint8_t {
- ADD = 0,
- MIN = 1,
- MAX = 2,
- INC = 3,
- DEC = 4,
- AND = 5,
- OR = 6,
- XOR = 7,
-};
-
-} // namespace nvvm
-} // namespace llvm
-#endif // LLVM_IR_NVVMINTRINSICFLAGS_H
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
new file mode 100644
index 00000000000000..a463da688cb167
--- /dev/null
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -0,0 +1,173 @@
+//===--- NVVMIntrinsicUtils.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the definitions of the enumerations and flags
+/// associated with NVVM Intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
+#define LLVM_IR_NVVMINTRINSICUTILS_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+
+namespace llvm {
+namespace nvvm {
+
+// Reduction Ops supported with TMA Copy from Shared
+// to Global Memory for the "cp.reduce.async.bulk.tensor.*"
+// family of PTX instructions.
+enum class TMAReductionOp : uint8_t {
+ ADD = 0,
+ MIN = 1,
+ MAX = 2,
+ INC = 3,
+ DEC = 4,
+ AND = 5,
+ OR = 6,
+ XOR = 7,
+};
+
+bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ // Float to i32 / i64 conversion intrinsics:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ return true;
+ }
+ return false;
+}
+
+bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ // f2i
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ // d2i
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ // f2ll
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ // d2ll
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ return true;
+ }
+ return false;
+}
+
+APFloat::roundingMode IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
+ switch (IntrinsicID) {
+ // RM:
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2ui_rm:
+
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ull_rm:
+ return APFloat::rmTowardNegative;
+
+ // RN:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2ui_rn:
+
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ull_rn:
+ return APFloat::rmNearestTiesToEven;
+
+ // RP:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2ui_rp:
+
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ull_rp:
+ return APFloat::rmTowardPositive;
+
+ // RZ:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_d2ui_rz:
+
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_d2ull_rz:
+ return APFloat::rmTowardZero;
+ }
+ llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
+ return APFloat::roundingMode::Invalid;
+}
+
+} // namespace nvvm
+} // namespace llvm
+#endif // LLVM_IR_NVVMINTRINSICUTILS_H
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 9ad5207cc52cee..dca3359132609a 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -48,6 +48,7 @@
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/NVVMIntrinsicUtils.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -290,144 +291,6 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
return ConstantVector::get(Result);
}
-
-//===----------------------------------------------------------------------===//
-// NVVM-specific internal helper functions
-//===----------------------------------------------------------------------===//
-
-static bool NVVMIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
- switch (IntrinsicID) {
- // Float to i32 / i64 conversion intrinsics:
- case Intrinsic::nvvm_f2i_rm_ftz:
- case Intrinsic::nvvm_f2i_rn_ftz:
- case Intrinsic::nvvm_f2i_rp_ftz:
- case Intrinsic::nvvm_f2i_rz_ftz:
-
- case Intrinsic::nvvm_f2ui_rm_ftz:
- case Intrinsic::nvvm_f2ui_rn_ftz:
- case Intrinsic::nvvm_f2ui_rp_ftz:
- case Intrinsic::nvvm_f2ui_rz_ftz:
-
- case Intrinsic::nvvm_f2ll_rm_ftz:
- case Intrinsic::nvvm_f2ll_rn_ftz:
- case Intrinsic::nvvm_f2ll_rp_ftz:
- case Intrinsic::nvvm_f2ll_rz_ftz:
-
- case Intrinsic::nvvm_f2ull_rm_ftz:
- case Intrinsic::nvvm_f2ull_rn_ftz:
- case Intrinsic::nvvm_f2ull_rp_ftz:
- case Intrinsic::nvvm_f2ull_rz_ftz:
- return true;
- }
- return false;
-}
-
-static bool NVVMIntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
- switch (IntrinsicID) {
- // f2i
- case Intrinsic::nvvm_f2i_rm:
- case Intrinsic::nvvm_f2i_rm_ftz:
- case Intrinsic::nvvm_f2i_rn:
- case Intrinsic::nvvm_f2i_rn_ftz:
- case Intrinsic::nvvm_f2i_rp:
- case Intrinsic::nvvm_f2i_rp_ftz:
- case Intrinsic::nvvm_f2i_rz:
- case Intrinsic::nvvm_f2i_rz_ftz:
- // d2i
- case Intrinsic::nvvm_d2i_rm:
- case Intrinsic::nvvm_d2i_rn:
- case Intrinsic::nvvm_d2i_rp:
- case Intrinsic::nvvm_d2i_rz:
- // f2ll
- case Intrinsic::nvvm_f2ll_rm:
- case Intrinsic::nvvm_f2ll_rm_ftz:
- case Intrinsic::nvvm_f2ll_rn:
- case Intrinsic::nvvm_f2ll_rn_ftz:
- case Intrinsic::nvvm_f2ll_rp:
- case Intrinsic::nvvm_f2ll_rp_ftz:
- case Intrinsic::nvvm_f2ll_rz:
- case Intrinsic::nvvm_f2ll_rz_ftz:
- // d2ll
- case Intrinsic::nvvm_d2ll_rm:
- case Intrinsic::nvvm_d2ll_rn:
- case Intrinsic::nvvm_d2ll_rp:
- case Intrinsic::nvvm_d2ll_rz:
- return true;
- }
- return false;
-}
-
-static APFloat::roundingMode
-NVVMIntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
- switch (IntrinsicID) {
- // RM:
- case Intrinsic::nvvm_f2i_rm:
- case Intrinsic::nvvm_f2ui_rm:
- case Intrinsic::nvvm_f2i_rm_ftz:
- case Intrinsic::nvvm_f2ui_rm_ftz:
- case Intrinsic::nvvm_d2i_rm:
- case Intrinsic::nvvm_d2ui_rm:
-
- case Intrinsic::nvvm_f2ll_rm:
- case Intrinsic::nvvm_f2ull_rm:
- case Intrinsic::nvvm_f2ll_rm_ftz:
- case Intrinsic::nvvm_f2ull_rm_ftz:
- case Intrinsic::nvvm_d2ll_rm:
- case Intrinsic::nvvm_d2ull_rm:
- return APFloat::rmTowardNegative;
-
- // RN:
- case Intrinsic::nvvm_f2i_rn:
- case Intrinsic::nvvm_f2ui_rn:
- case Intrinsic::nvvm_f2i_rn_ftz:
- case Intrinsic::nvvm_f2ui_rn_ftz:
- case Intrinsic::nvvm_d2i_rn:
- case Intrinsic::nvvm_d2ui_rn:
-
- case Intrinsic::nvvm_f2ll_rn:
- case Intrinsic::nvvm_f2ull_rn:
- case Intrinsic::nvvm_f2ll_rn_ftz:
- case Intrinsic::nvvm_f2ull_rn_ftz:
- case Intrinsic::nvvm_d2ll_rn:
- case Intrinsic::nvvm_d2ull_rn:
- return APFloat::rmNearestTiesToEven;
-
- // RP:
- case Intrinsic::nvvm_f2i_rp:
- case Intrinsic::nvvm_f2ui_rp:
- case Intrinsic::nvvm_f2i_rp_ftz:
- case Intrinsic::nvvm_f2ui_rp_ftz:
- case Intrinsic::nvvm_d2i_rp:
- case Intrinsic::nvvm_d2ui_rp:
-
- case Intrinsic::nvvm_f2ll_rp:
- case Intrinsic::nvvm_f2ull_rp:
- case Intrinsic::nvvm_f2ll_rp_ftz:
- case Intrinsic::nvvm_f2ull_rp_ftz:
- case Intrinsic::nvvm_d2ll_rp:
- case Intrinsic::nvvm_d2ull_rp:
- return APFloat::rmTowardPositive;
-
- // RZ:
- case Intrinsic::nvvm_f2i_rz:
- case Intrinsic::nvvm_f2ui_rz:
- case Intrinsic::nvvm_f2i_rz_ftz:
- case Intrinsic::nvvm_f2ui_rz_ftz:
- case Intrinsic::nvvm_d2i_rz:
- case Intrinsic::nvvm_d2ui_rz:
-
- case Intrinsic::nvvm_f2ll_rz:
- case Intrinsic::nvvm_f2ull_rz:
- case Intrinsic::nvvm_f2ll_rz_ftz:
- case Intrinsic::nvvm_f2ull_rz_ftz:
- case Intrinsic::nvvm_d2ll_rz:
- case Intrinsic::nvvm_d2ull_rz:
- return APFloat::rmTowardZero;
- }
- llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
- return APFloat::roundingMode::Invalid;
-}
-
} // end anonymous namespace
/// If this constant is a constant offset from a global, return the global and
@@ -2567,11 +2430,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (U.isNaN())
return ConstantInt::get(Ty, 0);
- APFloat::roundingMode RMode = NVVMIntrinsicGetRoundingMode(IntrinsicID);
+ APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID);
assert(RM != APFloat::roundingMode::Invalid);
- bool IsFTZ = NVVMIntrinsicShouldFTZ(IntrinsicID);
- bool IsSigned = NVVMIntrinsicConvertsToSignedInteger(IntrinsicID);
+ bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID);
+ bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID);
APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 65e1893d3f3bdf..d34f45fcac0087 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -14,7 +14,7 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/NVVMIntrinsicFlags.h"
+#include "llvm/IR/NVVMIntrinsicUtils.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index e1fb2d7fcee030..bcb35c972b70a5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -17,7 +17,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
-#include "llvm/IR/NVVMIntrinsicFlags.h"
+#include "llvm/IR/NVVMIntrinsicUtils.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
>From 623215d12aa0c944e4ff65b6b7a894d184450be5 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 13 Dec 2024 17:38:15 +0000
Subject: [PATCH 4/6] Minor tidying
---
llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 2 +-
llvm/lib/Analysis/ConstantFolding.cpp | 3 +--
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index a463da688cb167..19c403a171325a 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -8,7 +8,7 @@
//
/// \file
/// This file contains the definitions of the enumerations and flags
-/// associated with NVVM Intrinsics.
+/// associated with NVVM Intrinsics, along with some helper functions.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index dca3359132609a..1801831e3bb9b4 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -291,6 +291,7 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
return ConstantVector::get(Result);
}
+
} // end anonymous namespace
/// If this constant is a constant offset from a global, return the global and
@@ -2431,8 +2432,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantInt::get(Ty, 0);
APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID);
- assert(RM != APFloat::roundingMode::Invalid);
-
bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID);
bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID);
>From 216709f203049a35b8456f81ea9959fab3bed507 Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 13 Dec 2024 20:39:59 +0000
Subject: [PATCH 5/6] Add back <stdint.h> include to NVVMIntrinsicUtils
Add the <stdint.h> header include back into NVVMIntrinsicUtils.h after
rebasing the commit which renamed it to a point after the original
NVVMIntrinsicFlags.h version was edited to include <stdint>
This extra include was originally added by:
f33e2369051 [clang][Modules] Fixing Build Breaks When -DLLVM_ENABLE_MODULES=ON (#119473)
---
llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index 19c403a171325a..eec162ed5c182b 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -15,6 +15,8 @@
#ifndef LLVM_IR_NVVMINTRINSICUTILS_H
#define LLVM_IR_NVVMINTRINSICUTILS_H
+#include <stdint.h>
+
#include "llvm/ADT/APFloat.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
>From 0a51bdde3fcd240096065bb05030edb4f5d2849a Mon Sep 17 00:00:00 2001
From: Lewis Crawford <lcrawford at nvidia.com>
Date: Fri, 13 Dec 2024 21:33:40 +0000
Subject: [PATCH 6/6] Mark helper functions as inline
Mark the new intrinsic helper functions as inline to avoid
linker issues.
---
llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index eec162ed5c182b..8ca073ba822534 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -38,7 +38,7 @@ enum class TMAReductionOp : uint8_t {
XOR = 7,
};
-bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+inline bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
// Float to i32 / i64 conversion intrinsics:
case Intrinsic::nvvm_f2i_rm_ftz:
@@ -65,7 +65,7 @@ bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
return false;
}
-bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
+inline bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
// f2i
case Intrinsic::nvvm_f2i_rm:
@@ -100,7 +100,8 @@ bool IntrinsicConvertsToSignedInteger(Intrinsic::ID IntrinsicID) {
return false;
}
-APFloat::roundingMode IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
+inline APFloat::roundingMode
+IntrinsicGetRoundingMode(Intrinsic::ID IntrinsicID) {
switch (IntrinsicID) {
// RM:
case Intrinsic::nvvm_f2i_rm:
More information about the llvm-commits
mailing list