[Mlir-commits] [mlir] [MLIR][NVVM] Add nvvm.fadd and nvvm.fsub Ops (PR #179162)
Srinivasa Ravi
llvmlistbot at llvm.org
Sun Feb 1 21:11:00 PST 2026
https://github.com/Wolfram70 updated https://github.com/llvm/llvm-project/pull/179162
>From 45e67d0257da597f74ad7855ccf69d9a5a0cbfe9 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Fri, 30 Jan 2026 09:26:32 +0000
Subject: [PATCH 1/2] [MLIR][NVVM] Add nvvm.fadd and nvvm.fsub Ops
This change adds the `nvvm.fadd` and `nvvm.fsub` Ops to the NVVM dialect.
`nvvm.fadd` performs floating point addition of two operands along
with any conversions necessary.
`nvvm.fsub` performs floating point subtraction of two operands and
is canonicalized to an `llvm.fneg` followed by an `nvvm.fadd` operation.
PTX ISA Reference:
1. https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add
2. https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add
3. https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 81 ++-
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 212 ++++++
.../Dialect/LLVMIR/nvvm-canonicalize.mlir | 9 +
.../LLVMIR/nvvm/fadd_all_same_types.mlir | 89 +++
.../nvvm/fadd_different_return_type.mlir | 400 ++++++++++
.../test/Target/LLVMIR/nvvm/fadd_invalid.mlir | 107 +++
.../LLVMIR/nvvm/fadd_mixed_arg_types.mlir | 684 ++++++++++++++++++
7 files changed, 1577 insertions(+), 5 deletions(-)
create mode 100644 mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 64a52acbb2278..0dce63c4e5a74 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1860,12 +1860,12 @@ def FPRoundingModeAttr : EnumAttr<NVVM_Dialect, FPRoundingMode, "fp_rnd_mode"> {
let assemblyFormat = "`<` $value `>`";
}
-def SaturationModeNone : I32EnumAttrCase<"NONE", 0, "none">;
-def SaturationModeFinite : I32EnumAttrCase<"SATFINITE", 1, "satfinite">;
+def SaturationModeNone : I32EnumCase<"NONE", 0, "none">;
+def SaturationModeFinite : I32EnumCase<"SATFINITE", 1, "satfinite">;
+def SaturationModeSat : I32EnumCase<"SAT", 2, "sat">;
-def SaturationMode : I32EnumAttr<"SaturationMode", "NVVM SaturationMode kind",
- [SaturationModeNone, SaturationModeFinite]> {
- let genSpecializedAttr = 0;
+def SaturationMode : I32Enum<"SaturationMode", "NVVM SaturationMode kind",
+ [SaturationModeNone, SaturationModeFinite, SaturationModeSat]> {
let cppNamespace = "::mlir::NVVM";
}
def SaturationModeAttr : EnumAttr<NVVM_Dialect, SaturationMode, "sat_mode"> {
@@ -6155,6 +6155,77 @@ def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
}];
}
+def NVVM_FloatAdditionOp :
+ NVVM_SingleResultIntrinsicOp<"fadd", [Pure, Commutative]> {
+ let summary = [{
+ Performs floating point addition operation with support for mixed precision
+ operands
+ }];
+ let description = [{
+ The `nvvm.fadd` operation performs floating point addition of two operands.
+
+ The rounding mode to be used is specified by the `rnd` attribute,
+ saturation mode by the `sat` attribute, and FTZ by the `ftz` unit attribute.
+
+ The result type must be at least as wide as the operands. The operands are
+ converted to the result type before addition if it is wider.
+
+ For more information, see PTX ISA - [floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add),
+ [half-precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add),
+ [mixed precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add).
+ }];
+ let arguments = (ins
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$lhs,
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$rhs,
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
+ UnitAttr:$ftz
+ );
+ let results = (outs AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
+ let hasVerifier = 1;
+
+ let llvmBuilder = [{
+ auto [ID, args] = NVVM::FloatAdditionOp::getIntrinsicIDAndArgs(*op, moduleTranslation, builder);
+ if(ID != llvm::Intrinsic::not_intrinsic) {
+ llvm::Value *addResult = createIntrinsicCall(builder, ID, args);
+ $res = ($_resultType->getScalarSizeInBits() >
+ addResult->getType()->getScalarSizeInBits())
+ ? builder.CreateFPExt(addResult, $_resultType) : addResult;
+ }
+ }];
+}
+
+def NVVM_FloatSubtractionOp :
+ NVVM_Op<"fsub", [Pure]> {
+ let summary = [{
+ Performs floating point subtraction operation with support for mixed
+ precision operands
+ }];
+ let description = [{
+ The `nvvm.fsub` operation performs floating point subtraction of two
+ operands.
+
+ It supports the same type combinations and modifiers as `nvvm.fadd`.
+ This is equivalent to `nvvm.fadd(lhs, -rhs)`.
+
+ For more information, see PTX ISA - [floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-sub),
+ [half-precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub),
+ [mixed precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-sub).
+
+ }];
+ let arguments = (ins
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$lhs,
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$rhs,
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
+ UnitAttr:$ftz
+ );
+ let results = (outs AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
+ let hasCanonicalizer = 1;
+}
+
//===----------------------------------------------------------------------===//
// NVVM tensormap.replace Op
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 76ec8b8b7cfd2..033b420d0faee 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3072,6 +3072,85 @@ LogicalResult NVVM::TensormapReplaceOp::verify() {
return success();
}
+LogicalResult NVVM::FloatAdditionOp::verify() {
+ auto resFType = getRes().getType();
+ auto lhsFType = getLhs().getType();
+ auto rhsFType = getRhs().getType();
+ auto rndMode = getRnd();
+ auto satMode = getSat();
+ auto isFTZ = getFtz();
+
+ if (satMode == NVVM::SaturationMode::SATFINITE)
+ return emitOpError("SATFINITE saturation mode is not supported for "
+ "floating point addition operation");
+
+ if (isa<VectorType>(resFType) != isa<VectorType>(lhsFType) ||
+ isa<VectorType>(resFType) != isa<VectorType>(rhsFType))
+ return emitOpError("cannot mix vector and scalar types for floating point "
+ "addition operation");
+
+ if (isa<VectorType>(lhsFType) &&
+ ((cast<VectorType>(lhsFType).getElementType() !=
+ cast<VectorType>(rhsFType).getElementType()) ||
+ (cast<VectorType>(lhsFType).getElementType() !=
+ cast<VectorType>(resFType).getElementType())))
+ return emitOpError(
+ "cannot mix different element types for vector floating point "
+ "addition operation");
+
+ if (resFType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ))
+ return emitOpError("FTZ and saturation are not supported for additions "
+ "involving f64 type");
+
+ auto getBaseFType = [](Type type) -> Type {
+ if (isa<VectorType>(type))
+ return cast<VectorType>(type).getElementType();
+ return type;
+ };
+
+ auto resBaseFType = getBaseFType(resFType);
+ auto lhsBaseFType = getBaseFType(lhsFType);
+ auto rhsBaseFType = getBaseFType(rhsFType);
+
+ if (resBaseFType.getIntOrFloatBitWidth() <
+ std::max(lhsBaseFType.getIntOrFloatBitWidth(),
+ rhsBaseFType.getIntOrFloatBitWidth()))
+ return emitOpError("result type must be at least as wide as the operands");
+
+ if (resBaseFType.isF16() && rndMode != NVVM::FPRoundingMode::RN &&
+ rndMode != NVVM::FPRoundingMode::NONE)
+ return emitOpError("only RN rounding mode is supported for f16 and "
+ "vector<2xf16> additions");
+
+ if (resBaseFType.isBF16()) {
+ if (rndMode != NVVM::FPRoundingMode::RN &&
+ rndMode != NVVM::FPRoundingMode::NONE)
+ return emitOpError("only RN rounding mode is supported for bf16 and "
+ "vector<2xbf16> additions");
+ if (satMode != NVVM::SaturationMode::NONE || isFTZ)
+ return emitOpError("FTZ and saturation are not supported for bf16 and "
+ "vector<2xbf16> additions");
+ }
+
+ if (resBaseFType.isF16() && !(lhsBaseFType.isF16() && rhsBaseFType.isF16()))
+ return emitOpError("only f16 + f16 is supported for f16 result type");
+
+ if (resBaseFType.isBF16() &&
+ !(lhsBaseFType.isBF16() && rhsBaseFType.isBF16()))
+ return emitOpError("only bf16 + bf16 is supported for bf16 result type");
+
+ // FIXME: This is a temporary check disallowing lowering to add.rn.ftz.f16(x2)
+ // PTX instructions since the corresponding LLVM intrinsic is missing. This
+ // should be removed once the intrinsics for f16 addition (with FTZ only) are
+ // available.
+ if ((isa<VectorType>(resFType) || resBaseFType.isF16()) && isFTZ &&
+ satMode == NVVM::SaturationMode::NONE)
+ return emitOpError(
+ "FTZ with no saturation is not supported for f16 additions");
+
+ return success();
+}
+
/// Packs the given `field` into the `result`.
/// The `result` is 64-bits and each `field` can be 32-bits or narrower.
static llvm::Value *
@@ -3148,6 +3227,33 @@ std::string NVVM::MBarrierTryWaitParityOp::getPtx() {
space);
}
+//===----------------------------------------------------------------------===//
+// Canonicalization patterns
+//===----------------------------------------------------------------------===//
+
+struct ConvertFsubToFnegFadd : public OpRewritePattern<FloatSubtractionOp> {
+ using OpRewritePattern<FloatSubtractionOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(FloatSubtractionOp op,
+ PatternRewriter &rewriter) const override {
+ Location loc = op.getLoc();
+
+ Value negRhs =
+ LLVM::FNegOp::create(rewriter, loc, op.getRhs().getType(), op.getRhs());
+
+ rewriter.replaceOpWithNewOp<FloatAdditionOp>(op, op.getType(), op.getLhs(),
+ negRhs, op.getRnd(),
+ op.getSat(), op.getFtz());
+
+ return success();
+ }
+};
+
+void FloatSubtractionOp::getCanonicalizationPatterns(
+ RewritePatternSet &patterns, MLIRContext *context) {
+ patterns.add<ConvertFsubToFnegFadd>(context);
+}
+
//===----------------------------------------------------------------------===//
// getIntrinsicID/getIntrinsicIDAndArgs methods
//===----------------------------------------------------------------------===//
@@ -4887,6 +4993,112 @@ mlir::NVVM::IDArgPair TensormapReplaceOp::getIntrinsicIDAndArgs(
return {IDs[fieldIndex], args};
}
+mlir::NVVM::IDArgPair FloatAdditionOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::FloatAdditionOp>(op);
+ llvm::SmallVector<llvm::Value *> args;
+ auto rndMode = thisOp.getRnd();
+ bool isRndRN = rndMode == NVVM::FPRoundingMode::RN;
+ auto isSat = thisOp.getSat() == NVVM::SaturationMode::SAT;
+ auto isFTZ = thisOp.getFtz();
+
+ llvm::Value *argLHS = mt.lookupValue(thisOp.getLhs());
+ llvm::Value *argRHS = mt.lookupValue(thisOp.getRhs());
+
+ mlir::Type lhsType = thisOp.getLhs().getType();
+ mlir::Type rhsType = thisOp.getRhs().getType();
+ mlir::Type resType = thisOp.getRes().getType();
+
+ // FIXME: Add intrinsics for add.rn.ftz.f16x2 and add.rn.ftz.f16 here when
+ // they are available.
+ static constexpr llvm::Intrinsic::ID f16IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_sat_f16,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f16,
+ llvm::Intrinsic::nvvm_add_rn_sat_v2f16,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_v2f16,
+ };
+
+ static constexpr llvm::Intrinsic::ID f32IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_f,
+ llvm::Intrinsic::nvvm_add_rm_f,
+ llvm::Intrinsic::nvvm_add_rp_f,
+ llvm::Intrinsic::nvvm_add_rz_f,
+ llvm::Intrinsic::nvvm_add_rn_sat_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_sat_f,
+ llvm::Intrinsic::nvvm_add_rm_sat_f,
+ llvm::Intrinsic::nvvm_add_rp_sat_f,
+ llvm::Intrinsic::nvvm_add_rz_sat_f,
+ llvm::Intrinsic::nvvm_add_rn_ftz_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_ftz_f,
+ llvm::Intrinsic::nvvm_add_rm_ftz_f,
+ llvm::Intrinsic::nvvm_add_rp_ftz_f,
+ llvm::Intrinsic::nvvm_add_rz_ftz_f,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rm_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rp_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rz_ftz_sat_f,
+ };
+
+ static constexpr llvm::Intrinsic::ID f64IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_d, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_d, llvm::Intrinsic::nvvm_add_rm_d,
+ llvm::Intrinsic::nvvm_add_rp_d, llvm::Intrinsic::nvvm_add_rz_d};
+
+ auto addIntrinsic = [&](llvm::Intrinsic::ID IID, llvm::Value *LHS = nullptr,
+ llvm::Value *RHS = nullptr) -> NVVM::IDArgPair {
+ args.push_back(LHS ? LHS : argLHS);
+ args.push_back(RHS ? RHS : argRHS);
+ return {IID, args};
+ };
+
+ // f16 + f16 -> f16 / vector<2xf16> + vector<2xf16> -> vector<2xf16>
+ // FIXME: Allow lowering to add.rn.ftz.f16x2 and add.rn.ftz.f16 here when the
+ // intrinsics are available.
+ bool isVectorF16Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isF16();
+ if (resType.isF16() || isVectorF16Add) {
+ if (isSat) {
+ unsigned index = (isVectorF16Add << 1) | isFTZ;
+ return addIntrinsic(f16IDs[index]);
+ } else {
+ mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ return {llvm::Intrinsic::not_intrinsic, args};
+ }
+ }
+
+ // bf16 + bf16 -> bf16 / vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
+ bool isVectorBF16Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isBF16();
+ if (resType.isBF16() || isVectorBF16Add) {
+ mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ return {llvm::Intrinsic::not_intrinsic, args};
+ }
+
+ // f64 + f64/f32/f16/bf16
+ if (resType.isF64()) {
+ llvm::Value *lhsF64 =
+ lhsType.isF64() ? argLHS
+ : builder.CreateFPExt(argLHS, builder.getDoubleTy());
+ llvm::Value *rhsF64 =
+ rhsType.isF64() ? argRHS
+ : builder.CreateFPExt(argRHS, builder.getDoubleTy());
+ unsigned index = static_cast<unsigned>(rndMode);
+ return addIntrinsic(f64IDs[index], lhsF64, rhsF64);
+ }
+
+ // f16 + f16 -> !f16 / bf16 + bf16 -> !bf16 / f16 + bf16 / f32 + f32/f16/bf16
+ llvm::Value *lhsF32 = lhsType.isF32()
+ ? argLHS
+ : builder.CreateFPExt(argLHS, builder.getFloatTy());
+ llvm::Value *rhsF32 = rhsType.isF32()
+ ? argRHS
+ : builder.CreateFPExt(argRHS, builder.getFloatTy());
+ unsigned index = ((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
+ return addIntrinsic(f32IDs[index], lhsF32, rhsF32);
+}
+
//===----------------------------------------------------------------------===//
// NVVM tcgen05.mma functions
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir b/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
new file mode 100644
index 0000000000000..76d0a1453edf9
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
@@ -0,0 +1,9 @@
+// RUN: mlir-opt %s -split-input-file --canonicalize | FileCheck %s
+
+// CHECK-LABEL: @fsub_canonicalize
+llvm.func @fsub_canonicalize(%arg0 : f32, %arg1 : f32) -> f32 {
+ // CHECK: %[[NEG_ARG1:.*]] = llvm.fneg %arg1 : f32
+ // CHECK: %[[ADD_RESULT:.*]] = nvvm.fadd %arg0, %[[NEG_ARG1]] : f32, f32 -> f32
+ %0 = nvvm.fsub %arg0, %arg1 : f32, f32 -> f32
+ llvm.return %0 : f32
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
new file mode 100644
index 0000000000000..2aa2bf3a4906b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
@@ -0,0 +1,89 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f16 + f16 -> f16
+llvm.func @fadd_f16_f16(%a : f16, %b : f16) -> f16 {
+ // CHECK-LABEL: define half @fadd_f16_f16(half %0, half %1) {
+ // CHECK-NEXT: %3 = fadd half %0, %1
+ // CHECK-NEXT: %4 = fadd half %3, %3
+ // CHECK-NEXT: %5 = call half @llvm.nvvm.add.rn.sat.f16(half %4, half %4)
+ // CHECK-NEXT: %6 = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %5, half %5)
+ // CHECK-NEXT: ret half %6
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, f16 -> f16
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f16
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f16
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f16
+ llvm.return %f4 : f16
+}
+
+// bf16 + bf16 -> bf16
+llvm.func @fadd_bf16_bf16(%a : bf16, %b : bf16) -> bf16 {
+ // CHECK-LABEL: define bfloat @fadd_bf16_bf16(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fadd bfloat %0, %1
+ // CHECK-NEXT: %4 = fadd bfloat %3, %3
+ // CHECK-NEXT: ret bfloat %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : bf16, bf16 -> bf16
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> bf16
+ llvm.return %f2 : bf16
+}
+
+// f32 + f32 -> f32
+llvm.func @fadd_f32_f32(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f32_f32(float %0, float %1) {
+ // CHECK-NEXT: %3 = call float @llvm.nvvm.add.rn.f(float %0, float %1)
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %3)
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %4, float %4)
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %5)
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %6, float %6)
+ // CHECK-NEXT: %8 = call float @llvm.nvvm.add.rm.f(float %7, float %7)
+ // CHECK-NEXT: %9 = call float @llvm.nvvm.add.rm.sat.f(float %8, float %8)
+ // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rm.ftz.f(float %9, float %9)
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %10, float %10)
+ // CHECK-NEXT: %12 = call float @llvm.nvvm.add.rp.f(float %11, float %11)
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.sat.f(float %12, float %12)
+ // CHECK-NEXT: %14 = call float @llvm.nvvm.add.rp.ftz.f(float %13, float %13)
+ // CHECK-NEXT: %15 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %14, float %14)
+ // CHECK-NEXT: %16 = call float @llvm.nvvm.add.rz.f(float %15, float %15)
+ // CHECK-NEXT: %17 = call float @llvm.nvvm.add.rz.sat.f(float %16, float %16)
+ // CHECK-NEXT: %18 = call float @llvm.nvvm.add.rz.ftz.f(float %17, float %17)
+ // CHECK-NEXT: %19 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %18, float %18)
+ // CHECK-NEXT: ret float %19
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f32, f32 -> f32
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f32
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f32, f32 -> f32
+ %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f6 = nvvm.fadd %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f32
+ %f7 = nvvm.fadd %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f8 = nvvm.fadd %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f32, f32 -> f32
+ %f9 = nvvm.fadd %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f10 = nvvm.fadd %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f32
+ %f11 = nvvm.fadd %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f12 = nvvm.fadd %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f32, f32 -> f32
+ %f13 = nvvm.fadd %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f14 = nvvm.fadd %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f32
+ %f15 = nvvm.fadd %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f16 = nvvm.fadd %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f32, f32 -> f32
+ %f17 = nvvm.fadd %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ llvm.return %f17 : f32
+}
+
+// f64 + f64 -> f64
+llvm.func @fadd_f64_f64(%a : f64, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f64_f64(double %0, double %1) {
+ // CHECK-NEXT: %3 = call double @llvm.nvvm.add.rn.d(double %0, double %1)
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %3)
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %4, double %4)
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %5)
+ // CHECK-NEXT: %7 = call double @llvm.nvvm.add.rz.d(double %6, double %6)
+ // CHECK-NEXT: ret double %7
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f64, f64 -> f64
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f64, f64 -> f64
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64, f64 -> f64
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64, f64 -> f64
+ %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rz>} : f64, f64 -> f64
+ llvm.return %f5 : f64
+}
\ No newline at end of file
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir
new file mode 100644
index 0000000000000..8f54272bd31ff
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir
@@ -0,0 +1,400 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f16 + f16 -> f32
+llvm.func @fadd_f16_f16_rn(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rn(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rn_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rn_sat(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rn_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rn_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rn_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rn_sat_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rm(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rm(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rm_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rm_sat(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rm_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rm_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rm_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rm_sat_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rp(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rp(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rp_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rp_sat(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rp_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rp_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rp_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rp_sat_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rz_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rz_sat(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rz_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rz_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rz_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rz_sat_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+// bf16 + bf16 -> f32
+llvm.func @fadd_bf16_bf16_rn(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rn(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rn_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rn_sat(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rn_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rn_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rn_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rn_sat_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rm(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rm(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rm_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rm_sat(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rm_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rm_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rm_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rm_sat_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rp(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rp(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rp_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rp_sat(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rp_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rp_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rp_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rp_sat_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rz_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rz_sat(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rz_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rz_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rz_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rz_sat_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+// f32 + f32 -> f64
+llvm.func @fadd_f32_f32_rn(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f32_rn(float %0, float %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = fpext float %1 to double
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rn.d(double %3, double %4)
+ // CHECK-NEXT: ret double %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f32_rm(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f32_rm(float %0, float %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = fpext float %1 to double
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %3, double %4)
+ // CHECK-NEXT: ret double %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f32_rp(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f32_rp(float %0, float %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = fpext float %1 to double
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rp.d(double %3, double %4)
+ // CHECK-NEXT: ret double %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f32_rz(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f32_rz(float %0, float %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = fpext float %1 to double
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rz.d(double %3, double %4)
+ // CHECK-NEXT: ret double %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f64
+ llvm.return %f1 : f64
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir
new file mode 100644
index 0000000000000..a267e5889912f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir
@@ -0,0 +1,107 @@
+// RUN: mlir-translate --mlir-to-llvmir --split-input-file --verify-diagnostics %s
+
+// -----
+
+llvm.func @fadd_invalid_sat_mode(%a : f16, %b : f16) -> f32 {
+ // expected-error at +1 {{SATFINITE saturation mode is not supported for floating point addition operation}}
+ %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<satfinite>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+// -----
+
+llvm.func @fadd_invalid_vector_scalar_mix(%a : vector<2xf16>, %b : f16) -> f32 {
+ // expected-error at +1 {{cannot mix vector and scalar types for floating point addition operation}}
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+// -----
+
+llvm.func @fadd_invalid_vector_element_types_mix(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
+ // expected-error at +1 {{cannot mix different element types for vector floating point addition operation}}
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xbf16>
+ llvm.return %f1 : vector<2xbf16>
+}
+
+// -----
+
+llvm.func @fadd_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{FTZ and saturation are not supported for additions involving f64 type}}
+ %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz} : f64, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+// -----
+
+llvm.func @fadd_invalid_result_width(%a : f64, %b : f64) -> f32 {
+ // expected-error at +1 {{result type must be at least as wide as the operands}}
+ %f1 = nvvm.fadd %a, %b : f64, f64 -> f32
+ llvm.return %f1 : f32
+}
+
+// ----
+
+llvm.func @fadd_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
+ // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f16
+ llvm.return %f1 : f16
+}
+
+// -----
+
+llvm.func @fadd_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
+ // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ llvm.return %f1 : vector<2xf16>
+}
+
+// -----
+
+llvm.func @fadd_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 {
+ // expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> bf16
+ llvm.return %f1 : bf16
+}
+
+// -----
+
+llvm.func @fadd_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
+ // expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ llvm.return %f1 : vector<2xbf16>
+}
+
+// -----
+
+llvm.func @fadd_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 {
+ // expected-error at +1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions}}
+ %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> bf16
+ llvm.return %f1 : bf16
+}
+
+// -----
+
+llvm.func @fadd_invalid_f16_result_type(%a : f16, %b : bf16) -> f16 {
+ // expected-error at +1 {{only f16 + f16 is supported for f16 result type}}
+ %f1 = nvvm.fadd %a, %b : f16, bf16 -> f16
+ llvm.return %f1 : f16
+}
+
+// -----
+
+llvm.func @fadd_invalid_bf16_result_type(%a : bf16, %b : f16) -> bf16 {
+ // expected-error at +1 {{only bf16 + bf16 is supported for bf16 result type}}
+ %f1 = nvvm.fadd %a, %b : bf16, f16 -> bf16
+ llvm.return %f1 : bf16
+}
+
+// -----
+
+// FIXME: Remove this test once intrinsics for f16 addition (with FTZ only) are
+// available.
+llvm.func @fadd_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 {
+ // expected-error at +1 {{FTZ with no saturation is not supported for f16 additions}}
+ %f1 = nvvm.fadd %a, %b {ftz} : f16, f16 -> f16
+ llvm.return %f1 : f16
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir
new file mode 100644
index 0000000000000..badaad054717d
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir
@@ -0,0 +1,684 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f16 + bf16 -> f32
+llvm.func @fadd_f16_bf16(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rn(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rn(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rn_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rn_sat(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rn_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rn_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rn_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rn_sat_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rm(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rm(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rm_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rm_sat(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rm_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rm_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rm_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rm_sat_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rp(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rp(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rp_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rp_sat(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rp_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rp_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rp_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rp_sat_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rz_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rz_sat(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rz_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rz_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rz_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rz_sat_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+// f16 + f32 -> f32
+llvm.func @fadd_f16_f32(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rn(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rn(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rn_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rn_sat(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rn_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rn_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rn_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rn_sat_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rm(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rm(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rm_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rm_sat(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rm_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rm_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rm_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rm_sat_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rp(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rp(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rp_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rp_sat(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rp_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rp_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rp_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rp_sat_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rz_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rz_sat(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rz_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rz_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rz_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rz_sat_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+// f16 + f64 -> f64
+llvm.func @fadd_f16_f64(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f16_f64_rn(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64_rn(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f16_f64_rm(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64_rm(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f16_f64_rp(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64_rp(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f16_f64_rz(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64_rz(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+// bf16 + f32 -> f32
+llvm.func @fadd_bf16_f32(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rn(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rn(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rn_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rn_sat(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rn_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rn_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rn_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rn_sat_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rm(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rm(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rm_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rm_sat(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rm_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rm_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rm_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rm_sat_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rp(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rp(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rp_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rp_sat(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rp_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rp_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rp_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rp_sat_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rz_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rz_sat(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rz_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rz_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rz_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rz_sat_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+// bf16 + f64 -> f64
+llvm.func @fadd_bf16_f64(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_bf16_f64_rn(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64_rn(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_bf16_f64_rm(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64_rm(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_bf16_f64_rp(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64_rp(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_bf16_f64_rz(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64_rz(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+// f32 + f64 -> f64
+llvm.func @fadd_f32_f64(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f64_rn(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64_rn(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f64_rm(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64_rm(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f64_rp(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64_rp(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f64_rz(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64_rz(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
>From 740faebabe687ae9d71181b700ebb26f5019704e Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Mon, 2 Feb 2026 05:10:40 +0000
Subject: [PATCH 2/2] remove unused variable
---
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 033b420d0faee..4f8237bdc9c43 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -4998,7 +4998,6 @@ mlir::NVVM::IDArgPair FloatAdditionOp::getIntrinsicIDAndArgs(
auto thisOp = cast<NVVM::FloatAdditionOp>(op);
llvm::SmallVector<llvm::Value *> args;
auto rndMode = thisOp.getRnd();
- bool isRndRN = rndMode == NVVM::FPRoundingMode::RN;
auto isSat = thisOp.getSat() == NVVM::SaturationMode::SAT;
auto isFTZ = thisOp.getFtz();
More information about the Mlir-commits
mailing list