[clang] [llvm] [clang][NVPTX] Add remaining float to fp16 conversions (PR #167641)
Srinivasa Ravi via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 17 21:06:31 PST 2025
https://github.com/Wolfram70 updated https://github.com/llvm/llvm-project/pull/167641
>From e0f41d4498b87a558cf8fabd2a8ec5430c208fba Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Tue, 11 Nov 2025 17:19:12 +0000
Subject: [PATCH] [clang][NVPTX] Add remaining float to fp16 conversions
This change adds intrinsics and clang builtins for the remaining float
to fp16 conversions. This includes the following conversions:
- float to bf16x2 - satfinite variants
- float to f16x2 - satfinite variants
- float to bf16 - satfinite variants
- float to f16 - all variants
Tests are added in `convert-sm80.ll` and `convert-sm80-sf.ll` for the
intrinsics and in `builtins-nvptx.c` for the clang builtins.
---
clang/include/clang/Basic/BuiltinsNVPTX.td | 21 ++
clang/test/CodeGen/builtins-nvptx.c | 49 ++++
llvm/include/llvm/IR/IntrinsicsNVVM.td | 21 +-
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 14 ++
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 30 ++-
llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll | 260 +++++++++++++++++++++
llvm/test/CodeGen/NVPTX/convert-sm80.ll | 65 ++++++
7 files changed, 451 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index ad448766e665f..6fbd2222ab289 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -579,6 +579,10 @@ def __nvvm_ff2bf16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)
def __nvvm_ff2bf16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
def __nvvm_ff2bf16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
def __nvvm_ff2bf16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2bf16x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rz_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
def __nvvm_ff2bf16x2_rs :
NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)",
SM<"100a", [SM_103a]>, PTX87>;
@@ -596,6 +600,10 @@ def __nvvm_ff2f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)"
def __nvvm_ff2f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
def __nvvm_ff2f16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
def __nvvm_ff2f16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2f16x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rz_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
def __nvvm_ff2f16x2_rs :
NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)",
SM<"100a", [SM_103a]>, PTX87>;
@@ -613,6 +621,19 @@ def __nvvm_f2bf16_rn : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
def __nvvm_f2bf16_rn_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
def __nvvm_f2bf16_rz : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
def __nvvm_f2bf16_rz_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
+def __nvvm_f2bf16_rn_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rz_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+
+def __nvvm_f2f16_rn : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rn_relu : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rz : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rz_relu : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rn_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rz_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
def __nvvm_f2tf32_rna : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX70>;
def __nvvm_f2tf32_rna_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX81>;
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index c0ed799970122..75f2588f4837b 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -1007,6 +1007,16 @@ __device__ void nvvm_cvt_sm80() {
__nvvm_ff2bf16x2_rz(1, 1);
// CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float 1.000000e+00, float 1.000000e+00)
__nvvm_ff2bf16x2_rz_relu(1, 1);
+ #if PTX >= 81
+ // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float 1.000000e+00, float 1.000000e+00)
+ __nvvm_ff2bf16x2_rn_satfinite(1, 1);
+ // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+ __nvvm_ff2bf16x2_rn_relu_satfinite(1, 1);
+ // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float 1.000000e+00, float 1.000000e+00)
+ __nvvm_ff2bf16x2_rz_satfinite(1, 1);
+ // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+ __nvvm_ff2bf16x2_rz_relu_satfinite(1, 1);
+ #endif
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn(float 1.000000e+00, float 1.000000e+00)
__nvvm_ff2f16x2_rn(1, 1);
@@ -1016,6 +1026,16 @@ __device__ void nvvm_cvt_sm80() {
__nvvm_ff2f16x2_rz(1, 1);
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float 1.000000e+00, float 1.000000e+00)
__nvvm_ff2f16x2_rz_relu(1, 1);
+ #if PTX >= 81
+ // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn.satfinite(float 1.000000e+00, float 1.000000e+00)
+ __nvvm_ff2f16x2_rn_satfinite(1, 1);
+ // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+ __nvvm_ff2f16x2_rn_relu_satfinite(1, 1);
+ // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.satfinite(float 1.000000e+00, float 1.000000e+00)
+ __nvvm_ff2f16x2_rz_satfinite(1, 1);
+ // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+ __nvvm_ff2f16x2_rz_relu_satfinite(1, 1);
+ #endif
// CHECK_PTX70_SM80: call bfloat @llvm.nvvm.f2bf16.rn(float 1.000000e+00)
__nvvm_f2bf16_rn(1);
@@ -1025,6 +1045,35 @@ __device__ void nvvm_cvt_sm80() {
__nvvm_f2bf16_rz(1);
// CHECK_PTX70_SM80: call bfloat @llvm.nvvm.f2bf16.rz.relu(float 1.000000e+00)
__nvvm_f2bf16_rz_relu(1);
+ #if PTX >= 81
+ // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rn.satfinite(float 1.000000e+00)
+ __nvvm_f2bf16_rn_satfinite(1);
+ // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rn.relu.satfinite(float 1.000000e+00)
+ __nvvm_f2bf16_rn_relu_satfinite(1);
+ // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rz.satfinite(float 1.000000e+00)
+ __nvvm_f2bf16_rz_satfinite(1);
+ // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rz.relu.satfinite(float 1.000000e+00)
+ __nvvm_f2bf16_rz_relu_satfinite(1);
+ #endif
+
+ // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rn(float 1.000000e+00)
+ __nvvm_f2f16_rn(1);
+ // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rn.relu(float 1.000000e+00)
+ __nvvm_f2f16_rn_relu(1);
+ // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rz(float 1.000000e+00)
+ __nvvm_f2f16_rz(1);
+ // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rz.relu(float 1.000000e+00)
+ __nvvm_f2f16_rz_relu(1);
+ #if PTX >= 81
+ // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rn.satfinite(float 1.000000e+00)
+ __nvvm_f2f16_rn_satfinite(1);
+ // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rn.relu.satfinite(float 1.000000e+00)
+ __nvvm_f2f16_rn_relu_satfinite(1);
+ // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rz.satfinite(float 1.000000e+00)
+ __nvvm_f2f16_rz_satfinite(1);
+ // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rz.relu.satfinite(float 1.000000e+00)
+ __nvvm_f2f16_rz_relu_satfinite(1);
+ #endif
// CHECK_PTX70_SM80: call i32 @llvm.nvvm.f2tf32.rna(float 1.000000e+00)
__nvvm_f2tf32_rna(1);
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 1b485dc8ccd1e..aef92206187d3 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1566,14 +1566,19 @@ let TargetPrefix = "nvvm" in {
foreach rnd = ["rn", "rz"] in {
foreach relu = ["", "_relu"] in {
- def int_nvvm_ff2bf16x2_ # rnd # relu : NVVMBuiltin,
- PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
-
- def int_nvvm_ff2f16x2_ # rnd # relu : NVVMBuiltin,
- PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
-
- def int_nvvm_f2bf16_ # rnd # relu : NVVMBuiltin,
- PureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
+ foreach satfinite = ["", "_satfinite"] in {
+ def int_nvvm_ff2bf16x2_ # rnd # relu # satfinite : NVVMBuiltin,
+ PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
+
+ def int_nvvm_ff2f16x2_ # rnd # relu # satfinite : NVVMBuiltin,
+ PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
+
+ def int_nvvm_f2bf16_ # rnd # relu # satfinite : NVVMBuiltin,
+ PureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
+
+ def int_nvvm_f2f16_ # rnd # relu # satfinite : NVVMBuiltin,
+ PureIntrinsic<[llvm_half_ty], [llvm_float_ty]>;
+ }
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index ff9d9723dddea..84cb39ba0d909 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -595,6 +595,15 @@ let hasSideEffects = false in {
defm CVT_bf16 : CVT_FROM_ALL<"bf16", B16, [hasPTX<78>, hasSM<90>]>;
defm CVT_f32 : CVT_FROM_ALL<"f32", B32>;
defm CVT_f64 : CVT_FROM_ALL<"f64", B64>;
+
+ multiclass CVT_FROM_FLOAT_SATFINITE<string ToName, RegisterClass RC> {
+ def _f32_sf :
+ BasicFlagsNVPTXInst<(outs RC:$dst),
+ (ins B32:$src), (ins CvtMode:$mode),
+ "cvt${mode:base}${mode:relu}.satfinite." # ToName # ".f32">;
+ }
+ defm CVT_bf16 : CVT_FROM_FLOAT_SATFINITE<"bf16", B16>;
+ defm CVT_f16 : CVT_FROM_FLOAT_SATFINITE<"f16", B16>;
// These cvts are different from those above: The source and dest registers
// are of the same type.
@@ -611,6 +620,11 @@ let hasSideEffects = false in {
(ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
"cvt${mode:base}${mode:relu}." # FromName # ".f32">,
Requires<[hasPTX<70>, hasSM<80>]>;
+
+ def _f32_sf :
+ BasicFlagsNVPTXInst<(outs RC:$dst),
+ (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
+ "cvt${mode:base}${mode:relu}.satfinite." # FromName # ".f32">;
}
defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", B32>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index ea69a54e6db37..0430aa7723ceb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1917,7 +1917,12 @@ def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b), (CVT_bf16x2_f32 $a, $b, C
def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b), (CVT_bf16x2_f32 $a, $b, CvtRN_RELU)>;
def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b), (CVT_bf16x2_f32 $a, $b, CvtRZ)>;
def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b), (CVT_bf16x2_f32 $a, $b, CvtRZ_RELU)>;
-
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+ def : Pat<(int_nvvm_ff2bf16x2_rn_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRN)>;
+ def : Pat<(int_nvvm_ff2bf16x2_rn_relu_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRN_RELU)>;
+ def : Pat<(int_nvvm_ff2bf16x2_rz_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRZ)>;
+ def : Pat<(int_nvvm_ff2bf16x2_rz_relu_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRZ_RELU)>;
+}
let Predicates = [hasPTX<87>, hasSM100aOrSM103a] in {
def : Pat<(int_nvvm_ff2bf16x2_rs f32:$a, f32:$b, i32:$c),
(CVT_bf16x2_f32_rs $a, $b, $c, CvtRS)>;
@@ -1933,6 +1938,12 @@ def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b), (CVT_f16x2_f32 $a, $b, Cvt
def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b), (CVT_f16x2_f32 $a, $b, CvtRN_RELU)>;
def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b), (CVT_f16x2_f32 $a, $b, CvtRZ)>;
def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b), (CVT_f16x2_f32 $a, $b, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+ def : Pat<(int_nvvm_ff2f16x2_rn_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRN)>;
+ def : Pat<(int_nvvm_ff2f16x2_rn_relu_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRN_RELU)>;
+ def : Pat<(int_nvvm_ff2f16x2_rz_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRZ)>;
+ def : Pat<(int_nvvm_ff2f16x2_rz_relu_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRZ_RELU)>;
+}
let Predicates = [hasPTX<87>, hasSM100aOrSM103a] in {
def : Pat<(int_nvvm_ff2f16x2_rs f32:$a, f32:$b, i32:$c),
@@ -1948,6 +1959,23 @@ def : Pat<(int_nvvm_f2bf16_rn f32:$a), (CVT_bf16_f32 $a, CvtRN)>;
def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a), (CVT_bf16_f32 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_f2bf16_rz f32:$a), (CVT_bf16_f32 $a, CvtRZ)>;
def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a), (CVT_bf16_f32 $a, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+ def : Pat<(int_nvvm_f2bf16_rz_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRZ)>;
+ def : Pat<(int_nvvm_f2bf16_rz_relu_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRZ_RELU)>;
+ def : Pat<(int_nvvm_f2bf16_rn_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRN)>;
+ def : Pat<(int_nvvm_f2bf16_rn_relu_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRN_RELU)>;
+}
+
+def : Pat<(int_nvvm_f2f16_rn f32:$a), (CVT_f16_f32 $a, CvtRN)>;
+def : Pat<(int_nvvm_f2f16_rn_relu f32:$a), (CVT_f16_f32 $a, CvtRN_RELU)>;
+def : Pat<(int_nvvm_f2f16_rz f32:$a), (CVT_f16_f32 $a, CvtRZ)>;
+def : Pat<(int_nvvm_f2f16_rz_relu f32:$a), (CVT_f16_f32 $a, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+ def : Pat<(int_nvvm_f2f16_rz_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRZ)>;
+ def : Pat<(int_nvvm_f2f16_rz_relu_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRZ_RELU)>;
+ def : Pat<(int_nvvm_f2f16_rn_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRN)>;
+ def : Pat<(int_nvvm_f2f16_rn_relu_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRN_RELU)>;
+}
def : Pat<(int_nvvm_lohi_i2d i32:$a, i32:$b), (V2I32toI64 $a, $b)>;
def : Pat<(int_nvvm_d2i_lo f64:$a), (I64toI32L $a)>;
diff --git a/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll
index f47c2f2a85156..b773c8d11248a 100644
--- a/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll
@@ -16,3 +16,263 @@ define i32 @cvt_rna_satfinite_tf32_f32(float %f1) {
%val = call i32 @llvm.nvvm.f2tf32.rna.satfinite(float %f1)
ret i32 %val
}
+
+define <2 x bfloat> @cvt_rn_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_bf16x2_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_bf16x2_f32_sf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [cvt_rn_bf16x2_f32_sf_param_1];
+; CHECK-NEXT: cvt.rn.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float %f1, float %f2)
+ ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rn_relu_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_relu_bf16x2_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_relu_bf16x2_f32_sf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [cvt_rn_relu_bf16x2_f32_sf_param_1];
+; CHECK-NEXT: cvt.rn.relu.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float %f1, float %f2)
+ ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rz_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_bf16x2_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_bf16x2_f32_sf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [cvt_rz_bf16x2_f32_sf_param_1];
+; CHECK-NEXT: cvt.rz.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float %f1, float %f2)
+ ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rz_relu_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_relu_bf16x2_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_relu_bf16x2_f32_sf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [cvt_rz_relu_bf16x2_f32_sf_param_1];
+; CHECK-NEXT: cvt.rz.relu.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float %f1, float %f2)
+ ret <2 x bfloat> %val
+}
+
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float, float)
+
+define <2 x half> @cvt_rn_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_f16x2_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_f16x2_f32_sf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [cvt_rn_f16x2_f32_sf_param_1];
+; CHECK-NEXT: cvt.rn.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.satfinite(float %f1, float %f2)
+ ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rn_relu_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_relu_f16x2_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_relu_f16x2_f32_sf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [cvt_rn_relu_f16x2_f32_sf_param_1];
+; CHECK-NEXT: cvt.rn.relu.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu.satfinite(float %f1, float %f2)
+ ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rz_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_f16x2_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_f16x2_f32_sf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [cvt_rz_f16x2_f32_sf_param_1];
+; CHECK-NEXT: cvt.rz.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %val = call <2 x half> @llvm.nvvm.ff2f16x2.rz.satfinite(float %f1, float %f2)
+ ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rz_relu_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_relu_f16x2_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_relu_f16x2_f32_sf_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [cvt_rz_relu_f16x2_f32_sf_param_1];
+; CHECK-NEXT: cvt.rz.relu.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %val = call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu.satfinite(float %f1, float %f2)
+ ret <2 x half> %val
+}
+
+declare <2 x half> @llvm.nvvm.ff2f16x2.rn.satfinite(float, float)
+declare <2 x half> @llvm.nvvm.ff2f16x2.rn.relu.satfinite(float, float)
+declare <2 x half> @llvm.nvvm.ff2f16x2.rz.satfinite(float, float)
+declare <2 x half> @llvm.nvvm.ff2f16x2.rz.relu.satfinite(float, float)
+
+define bfloat @cvt_rn_bf16_f32_sf(float %f1) {
+; CHECK-LABEL: cvt_rn_bf16_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_bf16_f32_sf_param_0];
+; CHECK-NEXT: cvt.rn.satfinite.bf16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call bfloat @llvm.nvvm.f2bf16.rn.satfinite(float %f1)
+ ret bfloat %val
+}
+
+define bfloat @cvt_rn_relu_bf16_f32_sf(float %f1) {
+; CHECK-LABEL: cvt_rn_relu_bf16_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_relu_bf16_f32_sf_param_0];
+; CHECK-NEXT: cvt.rn.relu.satfinite.bf16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call bfloat @llvm.nvvm.f2bf16.rn.relu.satfinite(float %f1)
+ ret bfloat %val
+}
+
+define bfloat @cvt_rz_bf16_f32_sf(float %f1) {
+; CHECK-LABEL: cvt_rz_bf16_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_bf16_f32_sf_param_0];
+; CHECK-NEXT: cvt.rz.satfinite.bf16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call bfloat @llvm.nvvm.f2bf16.rz.satfinite(float %f1)
+ ret bfloat %val
+}
+
+define bfloat @cvt_rz_relu_bf16_f32_sf(float %f1) {
+; CHECK-LABEL: cvt_rz_relu_bf16_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_relu_bf16_f32_sf_param_0];
+; CHECK-NEXT: cvt.rz.relu.satfinite.bf16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call bfloat @llvm.nvvm.f2bf16.rz.relu.satfinite(float %f1)
+ ret bfloat %val
+}
+
+declare bfloat @llvm.nvvm.f2bf16.rn.satfinite(float)
+declare bfloat @llvm.nvvm.f2bf16.rn.relu.satfinite(float)
+declare bfloat @llvm.nvvm.f2bf16.rz.satfinite(float)
+declare bfloat @llvm.nvvm.f2bf16.rz.relu.satfinite(float)
+
+define half @cvt_rn_f16_f32_sf(float %f1) {
+; CHECK-LABEL: cvt_rn_f16_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_f16_f32_sf_param_0];
+; CHECK-NEXT: cvt.rn.satfinite.f16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call half @llvm.nvvm.f2f16.rn.satfinite(float %f1)
+ ret half %val
+}
+
+define half @cvt_rn_relu_f16_f32_sf(float %f1) {
+; CHECK-LABEL: cvt_rn_relu_f16_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_relu_f16_f32_sf_param_0];
+; CHECK-NEXT: cvt.rn.relu.satfinite.f16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call half @llvm.nvvm.f2f16.rn.relu.satfinite(float %f1)
+ ret half %val
+}
+
+define half @cvt_rz_f16_f32_sf(float %f1) {
+; CHECK-LABEL: cvt_rz_f16_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_f16_f32_sf_param_0];
+; CHECK-NEXT: cvt.rz.satfinite.f16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call half @llvm.nvvm.f2f16.rz.satfinite(float %f1)
+ ret half %val
+}
+
+define half @cvt_rz_relu_f16_f32_sf(float %f1) {
+; CHECK-LABEL: cvt_rz_relu_f16_f32_sf(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_relu_f16_f32_sf_param_0];
+; CHECK-NEXT: cvt.rz.relu.satfinite.f16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call half @llvm.nvvm.f2f16.rz.relu.satfinite(float %f1)
+ ret half %val
+}
+
+declare half @llvm.nvvm.f2f16.rn.satfinite(float)
+declare half @llvm.nvvm.f2f16.rn.relu.satfinite(float)
+declare half @llvm.nvvm.f2f16.rz.satfinite(float)
+declare half @llvm.nvvm.f2f16.rz.relu.satfinite(float)
diff --git a/llvm/test/CodeGen/NVPTX/convert-sm80.ll b/llvm/test/CodeGen/NVPTX/convert-sm80.ll
index edf1739ae9928..a47bbabdd448c 100644
--- a/llvm/test/CodeGen/NVPTX/convert-sm80.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-sm80.ll
@@ -198,6 +198,71 @@ declare bfloat @llvm.nvvm.f2bf16.rn.relu(float)
declare bfloat @llvm.nvvm.f2bf16.rz(float)
declare bfloat @llvm.nvvm.f2bf16.rz.relu(float)
+define half @cvt_rn_f16_f32(float %f1) {
+; CHECK-LABEL: cvt_rn_f16_f32(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_f16_f32_param_0];
+; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call half @llvm.nvvm.f2f16.rn(float %f1)
+ ret half %val
+}
+
+define half @cvt_rn_relu_f16_f32(float %f1) {
+; CHECK-LABEL: cvt_rn_relu_f16_f32(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rn_relu_f16_f32_param_0];
+; CHECK-NEXT: cvt.rn.relu.f16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call half @llvm.nvvm.f2f16.rn.relu(float %f1)
+ ret half %val
+}
+
+define half @cvt_rz_f16_f32(float %f1) {
+; CHECK-LABEL: cvt_rz_f16_f32(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_f16_f32_param_0];
+; CHECK-NEXT: cvt.rz.f16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call half @llvm.nvvm.f2f16.rz(float %f1)
+ ret half %val
+}
+
+define half @cvt_rz_relu_f16_f32(float %f1) {
+; CHECK-LABEL: cvt_rz_relu_f16_f32(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [cvt_rz_relu_f16_f32_param_0];
+; CHECK-NEXT: cvt.rz.relu.f16.f32 %rs1, %r1;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
+; CHECK-NEXT: ret;
+ %val = call half @llvm.nvvm.f2f16.rz.relu(float %f1)
+ ret half %val
+}
+
+declare half @llvm.nvvm.f2f16.rn(float)
+declare half @llvm.nvvm.f2f16.rn.relu(float)
+declare half @llvm.nvvm.f2f16.rz(float)
+declare half @llvm.nvvm.f2f16.rz.relu(float)
+
define i32 @cvt_rna_tf32_f32(float %f1) {
; CHECK-LABEL: cvt_rna_tf32_f32(
; CHECK: {
More information about the cfe-commits
mailing list