[Mlir-commits] [mlir] [MLIR][NVVM] Add nvvm.addf and nvvm.subf Ops (PR #179162)
Srinivasa Ravi
llvmlistbot at llvm.org
Mon Feb 9 00:24:52 PST 2026
https://github.com/Wolfram70 updated https://github.com/llvm/llvm-project/pull/179162
>From 45e67d0257da597f74ad7855ccf69d9a5a0cbfe9 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Fri, 30 Jan 2026 09:26:32 +0000
Subject: [PATCH 1/9] [MLIR][NVVM] Add nvvm.fadd and nvvm.fsub Ops
This change adds the `nvvm.fadd` and `nvvm.fsub` Ops to the NVVM dialect.
`nvvm.fadd` performs floating point addition of two operands along
with any conversions necessary.
`nvvm.fsub` performs floating point subtraction of two operands and
is canonicalized to an `llvm.fneg` followed by an `nvvm.fadd` operation.
PTX ISA Reference:
1. https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add
2. https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add
3. https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 81 ++-
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 212 ++++++
.../Dialect/LLVMIR/nvvm-canonicalize.mlir | 9 +
.../LLVMIR/nvvm/fadd_all_same_types.mlir | 89 +++
.../nvvm/fadd_different_return_type.mlir | 400 ++++++++++
.../test/Target/LLVMIR/nvvm/fadd_invalid.mlir | 107 +++
.../LLVMIR/nvvm/fadd_mixed_arg_types.mlir | 684 ++++++++++++++++++
7 files changed, 1577 insertions(+), 5 deletions(-)
create mode 100644 mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 64a52acbb2278..0dce63c4e5a74 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1860,12 +1860,12 @@ def FPRoundingModeAttr : EnumAttr<NVVM_Dialect, FPRoundingMode, "fp_rnd_mode"> {
let assemblyFormat = "`<` $value `>`";
}
-def SaturationModeNone : I32EnumAttrCase<"NONE", 0, "none">;
-def SaturationModeFinite : I32EnumAttrCase<"SATFINITE", 1, "satfinite">;
+def SaturationModeNone : I32EnumCase<"NONE", 0, "none">;
+def SaturationModeFinite : I32EnumCase<"SATFINITE", 1, "satfinite">;
+def SaturationModeSat : I32EnumCase<"SAT", 2, "sat">;
-def SaturationMode : I32EnumAttr<"SaturationMode", "NVVM SaturationMode kind",
- [SaturationModeNone, SaturationModeFinite]> {
- let genSpecializedAttr = 0;
+def SaturationMode : I32Enum<"SaturationMode", "NVVM SaturationMode kind",
+ [SaturationModeNone, SaturationModeFinite, SaturationModeSat]> {
let cppNamespace = "::mlir::NVVM";
}
def SaturationModeAttr : EnumAttr<NVVM_Dialect, SaturationMode, "sat_mode"> {
@@ -6155,6 +6155,77 @@ def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
}];
}
+def NVVM_FloatAdditionOp :
+ NVVM_SingleResultIntrinsicOp<"fadd", [Pure, Commutative]> {
+ let summary = [{
+ Performs floating point addition operation with support for mixed precision
+ operands
+ }];
+ let description = [{
+ The `nvvm.fadd` operation performs floating point addition of two operands.
+
+ The rounding mode to be used is specified by the `rnd` attribute,
+ saturation mode by the `sat` attribute, and FTZ by the `ftz` unit attribute.
+
+ The result type must be at least as wide as the operands. The operands are
+ converted to the result type before addition if it is wider.
+
+ For more information, see PTX ISA - [floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add),
+ [half-precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add),
+ [mixed precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add).
+ }];
+ let arguments = (ins
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$lhs,
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$rhs,
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
+ UnitAttr:$ftz
+ );
+ let results = (outs AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
+ let hasVerifier = 1;
+
+ let llvmBuilder = [{
+ auto [ID, args] = NVVM::FloatAdditionOp::getIntrinsicIDAndArgs(*op, moduleTranslation, builder);
+ if(ID != llvm::Intrinsic::not_intrinsic) {
+ llvm::Value *addResult = createIntrinsicCall(builder, ID, args);
+ $res = ($_resultType->getScalarSizeInBits() >
+ addResult->getType()->getScalarSizeInBits())
+ ? builder.CreateFPExt(addResult, $_resultType) : addResult;
+ }
+ }];
+}
+
+def NVVM_FloatSubtractionOp :
+ NVVM_Op<"fsub", [Pure]> {
+ let summary = [{
+ Performs floating point subtraction operation with support for mixed
+ precision operands
+ }];
+ let description = [{
+ The `nvvm.fsub` operation performs floating point subtraction of two
+ operands.
+
+ It supports the same type combinations and modifiers as `nvvm.fadd`.
+ This is equivalent to `nvvm.fadd(lhs, -rhs)`.
+
+ For more information, see PTX ISA - [floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-sub),
+ [half-precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub),
+ [mixed precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-sub).
+
+ }];
+ let arguments = (ins
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$lhs,
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$rhs,
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
+ UnitAttr:$ftz
+ );
+ let results = (outs AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
+ let hasCanonicalizer = 1;
+}
+
//===----------------------------------------------------------------------===//
// NVVM tensormap.replace Op
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 76ec8b8b7cfd2..033b420d0faee 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3072,6 +3072,85 @@ LogicalResult NVVM::TensormapReplaceOp::verify() {
return success();
}
+LogicalResult NVVM::FloatAdditionOp::verify() {
+ auto resFType = getRes().getType();
+ auto lhsFType = getLhs().getType();
+ auto rhsFType = getRhs().getType();
+ auto rndMode = getRnd();
+ auto satMode = getSat();
+ auto isFTZ = getFtz();
+
+ if (satMode == NVVM::SaturationMode::SATFINITE)
+ return emitOpError("SATFINITE saturation mode is not supported for "
+ "floating point addition operation");
+
+ if (isa<VectorType>(resFType) != isa<VectorType>(lhsFType) ||
+ isa<VectorType>(resFType) != isa<VectorType>(rhsFType))
+ return emitOpError("cannot mix vector and scalar types for floating point "
+ "addition operation");
+
+ if (isa<VectorType>(lhsFType) &&
+ ((cast<VectorType>(lhsFType).getElementType() !=
+ cast<VectorType>(rhsFType).getElementType()) ||
+ (cast<VectorType>(lhsFType).getElementType() !=
+ cast<VectorType>(resFType).getElementType())))
+ return emitOpError(
+ "cannot mix different element types for vector floating point "
+ "addition operation");
+
+ if (resFType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ))
+ return emitOpError("FTZ and saturation are not supported for additions "
+ "involving f64 type");
+
+ auto getBaseFType = [](Type type) -> Type {
+ if (isa<VectorType>(type))
+ return cast<VectorType>(type).getElementType();
+ return type;
+ };
+
+ auto resBaseFType = getBaseFType(resFType);
+ auto lhsBaseFType = getBaseFType(lhsFType);
+ auto rhsBaseFType = getBaseFType(rhsFType);
+
+ if (resBaseFType.getIntOrFloatBitWidth() <
+ std::max(lhsBaseFType.getIntOrFloatBitWidth(),
+ rhsBaseFType.getIntOrFloatBitWidth()))
+ return emitOpError("result type must be at least as wide as the operands");
+
+ if (resBaseFType.isF16() && rndMode != NVVM::FPRoundingMode::RN &&
+ rndMode != NVVM::FPRoundingMode::NONE)
+ return emitOpError("only RN rounding mode is supported for f16 and "
+ "vector<2xf16> additions");
+
+ if (resBaseFType.isBF16()) {
+ if (rndMode != NVVM::FPRoundingMode::RN &&
+ rndMode != NVVM::FPRoundingMode::NONE)
+ return emitOpError("only RN rounding mode is supported for bf16 and "
+ "vector<2xbf16> additions");
+ if (satMode != NVVM::SaturationMode::NONE || isFTZ)
+ return emitOpError("FTZ and saturation are not supported for bf16 and "
+ "vector<2xbf16> additions");
+ }
+
+ if (resBaseFType.isF16() && !(lhsBaseFType.isF16() && rhsBaseFType.isF16()))
+ return emitOpError("only f16 + f16 is supported for f16 result type");
+
+ if (resBaseFType.isBF16() &&
+ !(lhsBaseFType.isBF16() && rhsBaseFType.isBF16()))
+ return emitOpError("only bf16 + bf16 is supported for bf16 result type");
+
+ // FIXME: This is a temporary check disallowing lowering to add.rn.ftz.f16(x2)
+ // PTX instructions since the corresponding LLVM intrinsic is missing. This
+ // should be removed once the intrinsics for f16 addition (with FTZ only) are
+ // available.
+ if ((isa<VectorType>(resFType) || resBaseFType.isF16()) && isFTZ &&
+ satMode == NVVM::SaturationMode::NONE)
+ return emitOpError(
+ "FTZ with no saturation is not supported for f16 additions");
+
+ return success();
+}
+
/// Packs the given `field` into the `result`.
/// The `result` is 64-bits and each `field` can be 32-bits or narrower.
static llvm::Value *
@@ -3148,6 +3227,33 @@ std::string NVVM::MBarrierTryWaitParityOp::getPtx() {
space);
}
+//===----------------------------------------------------------------------===//
+// Canonicalization patterns
+//===----------------------------------------------------------------------===//
+
+struct ConvertFsubToFnegFadd : public OpRewritePattern<FloatSubtractionOp> {
+ using OpRewritePattern<FloatSubtractionOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(FloatSubtractionOp op,
+ PatternRewriter &rewriter) const override {
+ Location loc = op.getLoc();
+
+ Value negRhs =
+ LLVM::FNegOp::create(rewriter, loc, op.getRhs().getType(), op.getRhs());
+
+ rewriter.replaceOpWithNewOp<FloatAdditionOp>(op, op.getType(), op.getLhs(),
+ negRhs, op.getRnd(),
+ op.getSat(), op.getFtz());
+
+ return success();
+ }
+};
+
+void FloatSubtractionOp::getCanonicalizationPatterns(
+ RewritePatternSet &patterns, MLIRContext *context) {
+ patterns.add<ConvertFsubToFnegFadd>(context);
+}
+
//===----------------------------------------------------------------------===//
// getIntrinsicID/getIntrinsicIDAndArgs methods
//===----------------------------------------------------------------------===//
@@ -4887,6 +4993,112 @@ mlir::NVVM::IDArgPair TensormapReplaceOp::getIntrinsicIDAndArgs(
return {IDs[fieldIndex], args};
}
+mlir::NVVM::IDArgPair FloatAdditionOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::FloatAdditionOp>(op);
+ llvm::SmallVector<llvm::Value *> args;
+ auto rndMode = thisOp.getRnd();
+ bool isRndRN = rndMode == NVVM::FPRoundingMode::RN;
+ auto isSat = thisOp.getSat() == NVVM::SaturationMode::SAT;
+ auto isFTZ = thisOp.getFtz();
+
+ llvm::Value *argLHS = mt.lookupValue(thisOp.getLhs());
+ llvm::Value *argRHS = mt.lookupValue(thisOp.getRhs());
+
+ mlir::Type lhsType = thisOp.getLhs().getType();
+ mlir::Type rhsType = thisOp.getRhs().getType();
+ mlir::Type resType = thisOp.getRes().getType();
+
+ // FIXME: Add intrinsics for add.rn.ftz.f16x2 and add.rn.ftz.f16 here when
+ // they are available.
+ static constexpr llvm::Intrinsic::ID f16IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_sat_f16,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f16,
+ llvm::Intrinsic::nvvm_add_rn_sat_v2f16,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_v2f16,
+ };
+
+ static constexpr llvm::Intrinsic::ID f32IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_f,
+ llvm::Intrinsic::nvvm_add_rm_f,
+ llvm::Intrinsic::nvvm_add_rp_f,
+ llvm::Intrinsic::nvvm_add_rz_f,
+ llvm::Intrinsic::nvvm_add_rn_sat_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_sat_f,
+ llvm::Intrinsic::nvvm_add_rm_sat_f,
+ llvm::Intrinsic::nvvm_add_rp_sat_f,
+ llvm::Intrinsic::nvvm_add_rz_sat_f,
+ llvm::Intrinsic::nvvm_add_rn_ftz_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_ftz_f,
+ llvm::Intrinsic::nvvm_add_rm_ftz_f,
+ llvm::Intrinsic::nvvm_add_rp_ftz_f,
+ llvm::Intrinsic::nvvm_add_rz_ftz_f,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rm_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rp_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rz_ftz_sat_f,
+ };
+
+ static constexpr llvm::Intrinsic::ID f64IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_d, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_d, llvm::Intrinsic::nvvm_add_rm_d,
+ llvm::Intrinsic::nvvm_add_rp_d, llvm::Intrinsic::nvvm_add_rz_d};
+
+ auto addIntrinsic = [&](llvm::Intrinsic::ID IID, llvm::Value *LHS = nullptr,
+ llvm::Value *RHS = nullptr) -> NVVM::IDArgPair {
+ args.push_back(LHS ? LHS : argLHS);
+ args.push_back(RHS ? RHS : argRHS);
+ return {IID, args};
+ };
+
+ // f16 + f16 -> f16 / vector<2xf16> + vector<2xf16> -> vector<2xf16>
+ // FIXME: Allow lowering to add.rn.ftz.f16x2 and add.rn.ftz.f16 here when the
+ // intrinsics are available.
+ bool isVectorF16Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isF16();
+ if (resType.isF16() || isVectorF16Add) {
+ if (isSat) {
+ unsigned index = (isVectorF16Add << 1) | isFTZ;
+ return addIntrinsic(f16IDs[index]);
+ } else {
+ mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ return {llvm::Intrinsic::not_intrinsic, args};
+ }
+ }
+
+ // bf16 + bf16 -> bf16 / vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
+ bool isVectorBF16Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isBF16();
+ if (resType.isBF16() || isVectorBF16Add) {
+ mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ return {llvm::Intrinsic::not_intrinsic, args};
+ }
+
+ // f64 + f64/f32/f16/bf16
+ if (resType.isF64()) {
+ llvm::Value *lhsF64 =
+ lhsType.isF64() ? argLHS
+ : builder.CreateFPExt(argLHS, builder.getDoubleTy());
+ llvm::Value *rhsF64 =
+ rhsType.isF64() ? argRHS
+ : builder.CreateFPExt(argRHS, builder.getDoubleTy());
+ unsigned index = static_cast<unsigned>(rndMode);
+ return addIntrinsic(f64IDs[index], lhsF64, rhsF64);
+ }
+
+ // f16 + f16 -> !f16 / bf16 + bf16 -> !bf16 / f16 + bf16 / f32 + f32/f16/bf16
+ llvm::Value *lhsF32 = lhsType.isF32()
+ ? argLHS
+ : builder.CreateFPExt(argLHS, builder.getFloatTy());
+ llvm::Value *rhsF32 = rhsType.isF32()
+ ? argRHS
+ : builder.CreateFPExt(argRHS, builder.getFloatTy());
+ unsigned index = ((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
+ return addIntrinsic(f32IDs[index], lhsF32, rhsF32);
+}
+
//===----------------------------------------------------------------------===//
// NVVM tcgen05.mma functions
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir b/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
new file mode 100644
index 0000000000000..76d0a1453edf9
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
@@ -0,0 +1,9 @@
+// RUN: mlir-opt %s -split-input-file --canonicalize | FileCheck %s
+
+// CHECK-LABEL: @fsub_canonicalize
+llvm.func @fsub_canonicalize(%arg0 : f32, %arg1 : f32) -> f32 {
+ // CHECK: %[[NEG_ARG1:.*]] = llvm.fneg %arg1 : f32
+ // CHECK: %[[ADD_RESULT:.*]] = nvvm.fadd %arg0, %[[NEG_ARG1]] : f32, f32 -> f32
+ %0 = nvvm.fsub %arg0, %arg1 : f32, f32 -> f32
+ llvm.return %0 : f32
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
new file mode 100644
index 0000000000000..2aa2bf3a4906b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
@@ -0,0 +1,89 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f16 + f16 -> f16
+llvm.func @fadd_f16_f16(%a : f16, %b : f16) -> f16 {
+ // CHECK-LABEL: define half @fadd_f16_f16(half %0, half %1) {
+ // CHECK-NEXT: %3 = fadd half %0, %1
+ // CHECK-NEXT: %4 = fadd half %3, %3
+ // CHECK-NEXT: %5 = call half @llvm.nvvm.add.rn.sat.f16(half %4, half %4)
+ // CHECK-NEXT: %6 = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %5, half %5)
+ // CHECK-NEXT: ret half %6
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, f16 -> f16
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f16
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f16
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f16
+ llvm.return %f4 : f16
+}
+
+// bf16 + bf16 -> bf16
+llvm.func @fadd_bf16_bf16(%a : bf16, %b : bf16) -> bf16 {
+ // CHECK-LABEL: define bfloat @fadd_bf16_bf16(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fadd bfloat %0, %1
+ // CHECK-NEXT: %4 = fadd bfloat %3, %3
+ // CHECK-NEXT: ret bfloat %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : bf16, bf16 -> bf16
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> bf16
+ llvm.return %f2 : bf16
+}
+
+// f32 + f32 -> f32
+llvm.func @fadd_f32_f32(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f32_f32(float %0, float %1) {
+ // CHECK-NEXT: %3 = call float @llvm.nvvm.add.rn.f(float %0, float %1)
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %3)
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %4, float %4)
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %5)
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %6, float %6)
+ // CHECK-NEXT: %8 = call float @llvm.nvvm.add.rm.f(float %7, float %7)
+ // CHECK-NEXT: %9 = call float @llvm.nvvm.add.rm.sat.f(float %8, float %8)
+ // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rm.ftz.f(float %9, float %9)
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %10, float %10)
+ // CHECK-NEXT: %12 = call float @llvm.nvvm.add.rp.f(float %11, float %11)
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.sat.f(float %12, float %12)
+ // CHECK-NEXT: %14 = call float @llvm.nvvm.add.rp.ftz.f(float %13, float %13)
+ // CHECK-NEXT: %15 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %14, float %14)
+ // CHECK-NEXT: %16 = call float @llvm.nvvm.add.rz.f(float %15, float %15)
+ // CHECK-NEXT: %17 = call float @llvm.nvvm.add.rz.sat.f(float %16, float %16)
+ // CHECK-NEXT: %18 = call float @llvm.nvvm.add.rz.ftz.f(float %17, float %17)
+ // CHECK-NEXT: %19 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %18, float %18)
+ // CHECK-NEXT: ret float %19
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f32, f32 -> f32
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f32
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f32, f32 -> f32
+ %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f6 = nvvm.fadd %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f32
+ %f7 = nvvm.fadd %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f8 = nvvm.fadd %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f32, f32 -> f32
+ %f9 = nvvm.fadd %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f10 = nvvm.fadd %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f32
+ %f11 = nvvm.fadd %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f12 = nvvm.fadd %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f32, f32 -> f32
+ %f13 = nvvm.fadd %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f14 = nvvm.fadd %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f32
+ %f15 = nvvm.fadd %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f16 = nvvm.fadd %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f32, f32 -> f32
+ %f17 = nvvm.fadd %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ llvm.return %f17 : f32
+}
+
+// f64 + f64 -> f64
+llvm.func @fadd_f64_f64(%a : f64, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f64_f64(double %0, double %1) {
+ // CHECK-NEXT: %3 = call double @llvm.nvvm.add.rn.d(double %0, double %1)
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %3)
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %4, double %4)
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %5)
+ // CHECK-NEXT: %7 = call double @llvm.nvvm.add.rz.d(double %6, double %6)
+ // CHECK-NEXT: ret double %7
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f64, f64 -> f64
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f64, f64 -> f64
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64, f64 -> f64
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64, f64 -> f64
+ %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rz>} : f64, f64 -> f64
+ llvm.return %f5 : f64
+}
\ No newline at end of file
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir
new file mode 100644
index 0000000000000..8f54272bd31ff
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir
@@ -0,0 +1,400 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f16 + f16 -> f32
+llvm.func @fadd_f16_f16_rn(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rn(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rn_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rn_sat(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rn_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rn_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rn_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rn_sat_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rm(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rm(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rm_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rm_sat(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rm_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rm_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rm_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rm_sat_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rp(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rp(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rp_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rp_sat(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rp_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rp_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rp_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rp_sat_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rz_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rz_sat(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rz_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rz_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f16_rz_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f16_rz_sat_ftz(half %0, half %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext half %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+// bf16 + bf16 -> f32
+llvm.func @fadd_bf16_bf16_rn(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rn(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rn_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rn_sat(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rn_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rn_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rn_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rn_sat_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rm(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rm(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rm_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rm_sat(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rm_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rm_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rm_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rm_sat_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rp(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rp(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rp_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rp_sat(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rp_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rp_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rp_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rp_sat_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rz_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rz_sat(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rz_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rz_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_bf16_rz_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_bf16_rz_sat_ftz(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+// f32 + f32 -> f64
+llvm.func @fadd_f32_f32_rn(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f32_rn(float %0, float %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = fpext float %1 to double
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rn.d(double %3, double %4)
+ // CHECK-NEXT: ret double %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f32_rm(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f32_rm(float %0, float %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = fpext float %1 to double
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %3, double %4)
+ // CHECK-NEXT: ret double %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f32_rp(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f32_rp(float %0, float %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = fpext float %1 to double
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rp.d(double %3, double %4)
+ // CHECK-NEXT: ret double %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f32_rz(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f32_rz(float %0, float %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = fpext float %1 to double
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rz.d(double %3, double %4)
+ // CHECK-NEXT: ret double %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f64
+ llvm.return %f1 : f64
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir
new file mode 100644
index 0000000000000..a267e5889912f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir
@@ -0,0 +1,107 @@
+// RUN: mlir-translate --mlir-to-llvmir --split-input-file --verify-diagnostics %s
+
+// -----
+
+llvm.func @fadd_invalid_sat_mode(%a : f16, %b : f16) -> f32 {
+ // expected-error at +1 {{SATFINITE saturation mode is not supported for floating point addition operation}}
+ %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<satfinite>} : f16, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+// -----
+
+llvm.func @fadd_invalid_vector_scalar_mix(%a : vector<2xf16>, %b : f16) -> f32 {
+ // expected-error at +1 {{cannot mix vector and scalar types for floating point addition operation}}
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, f16 -> f32
+ llvm.return %f1 : f32
+}
+
+// -----
+
+llvm.func @fadd_invalid_vector_element_types_mix(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
+ // expected-error at +1 {{cannot mix different element types for vector floating point addition operation}}
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xbf16>
+ llvm.return %f1 : vector<2xbf16>
+}
+
+// -----
+
+llvm.func @fadd_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{FTZ and saturation are not supported for additions involving f64 type}}
+ %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz} : f64, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+// -----
+
+llvm.func @fadd_invalid_result_width(%a : f64, %b : f64) -> f32 {
+ // expected-error at +1 {{result type must be at least as wide as the operands}}
+ %f1 = nvvm.fadd %a, %b : f64, f64 -> f32
+ llvm.return %f1 : f32
+}
+
+// ----
+
+llvm.func @fadd_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
+ // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f16
+ llvm.return %f1 : f16
+}
+
+// -----
+
+llvm.func @fadd_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
+ // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ llvm.return %f1 : vector<2xf16>
+}
+
+// -----
+
+llvm.func @fadd_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 {
+ // expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> bf16
+ llvm.return %f1 : bf16
+}
+
+// -----
+
+llvm.func @fadd_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
+ // expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ llvm.return %f1 : vector<2xbf16>
+}
+
+// -----
+
+llvm.func @fadd_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 {
+ // expected-error at +1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions}}
+ %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> bf16
+ llvm.return %f1 : bf16
+}
+
+// -----
+
+llvm.func @fadd_invalid_f16_result_type(%a : f16, %b : bf16) -> f16 {
+ // expected-error at +1 {{only f16 + f16 is supported for f16 result type}}
+ %f1 = nvvm.fadd %a, %b : f16, bf16 -> f16
+ llvm.return %f1 : f16
+}
+
+// -----
+
+llvm.func @fadd_invalid_bf16_result_type(%a : bf16, %b : f16) -> bf16 {
+ // expected-error at +1 {{only bf16 + bf16 is supported for bf16 result type}}
+ %f1 = nvvm.fadd %a, %b : bf16, f16 -> bf16
+ llvm.return %f1 : bf16
+}
+
+// -----
+
+// FIXME: Remove this test once intrinsics for f16 addition (with FTZ only) are
+// available.
+llvm.func @fadd_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 {
+ // expected-error at +1 {{FTZ with no saturation is not supported for f16 additions}}
+ %f1 = nvvm.fadd %a, %b {ftz} : f16, f16 -> f16
+ llvm.return %f1 : f16
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir
new file mode 100644
index 0000000000000..badaad054717d
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir
@@ -0,0 +1,684 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f16 + bf16 -> f32
+llvm.func @fadd_f16_bf16(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rn(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rn(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rn_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rn_sat(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rn_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rn_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rn_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rn_sat_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rm(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rm(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rm_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rm_sat(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rm_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rm_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rm_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rm_sat_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rp(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rp(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rp_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rp_sat(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rp_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rp_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rp_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rp_sat_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rz_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rz_sat(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rz_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rz_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_bf16_rz_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_bf16_rz_sat_ftz(half %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = fpext bfloat %1 to float
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
+ // CHECK-NEXT: ret float %5
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ llvm.return %f1 : f32
+}
+
+// f16 + f32 -> f32
+llvm.func @fadd_f16_f32(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rn(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rn(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rn_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rn_sat(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rn_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rn_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rn_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rn_sat_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rm(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rm(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rm_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rm_sat(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rm_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rm_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rm_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rm_sat_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rp(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rp(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rp_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rp_sat(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rp_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rp_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rp_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rp_sat_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rz_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rz_sat(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rz_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rz_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_f16_f32_rz_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f16_f32_rz_sat_ftz(half %0, float %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+// f16 + f64 -> f64
+llvm.func @fadd_f16_f64(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f16_f64_rn(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64_rn(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f16_f64_rm(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64_rm(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f16_f64_rp(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64_rp(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f16_f64_rz(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f16_f64_rz(half %0, double %1) {
+ // CHECK-NEXT: %3 = fpext half %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+// bf16 + f32 -> f32
+llvm.func @fadd_bf16_f32(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rn(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rn(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rn_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rn_sat(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rn_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rn_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rn_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rn_sat_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rm(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rm(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rm_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rm_sat(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rm_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rm_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rm_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rm_sat_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rp(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rp(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rp_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rp_sat(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rp_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rp_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rp_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rp_sat_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rz_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rz_sat(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rz_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rz_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+llvm.func @fadd_bf16_f32_rz_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_bf16_f32_rz_sat_ftz(bfloat %0, float %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to float
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
+ // CHECK-NEXT: ret float %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ llvm.return %f1 : f32
+}
+
+// bf16 + f64 -> f64
+llvm.func @fadd_bf16_f64(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_bf16_f64_rn(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64_rn(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_bf16_f64_rm(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64_rm(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_bf16_f64_rp(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64_rp(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_bf16_f64_rz(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_bf16_f64_rz(bfloat %0, double %1) {
+ // CHECK-NEXT: %3 = fpext bfloat %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+// f32 + f64 -> f64
+llvm.func @fadd_f32_f64(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f64_rn(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64_rn(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f64_rm(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64_rm(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f64_rp(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64_rp(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
+
+llvm.func @fadd_f32_f64_rz(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fadd_f32_f64_rz(float %0, double %1) {
+ // CHECK-NEXT: %3 = fpext float %0 to double
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
+ // CHECK-NEXT: ret double %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f64 -> f64
+ llvm.return %f1 : f64
+}
>From 740faebabe687ae9d71181b700ebb26f5019704e Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Mon, 2 Feb 2026 05:10:40 +0000
Subject: [PATCH 2/9] remove unused variable
---
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 033b420d0faee..4f8237bdc9c43 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -4998,7 +4998,6 @@ mlir::NVVM::IDArgPair FloatAdditionOp::getIntrinsicIDAndArgs(
auto thisOp = cast<NVVM::FloatAdditionOp>(op);
llvm::SmallVector<llvm::Value *> args;
auto rndMode = thisOp.getRnd();
- bool isRndRN = rndMode == NVVM::FPRoundingMode::RN;
auto isSat = thisOp.getSat() == NVVM::SaturationMode::SAT;
auto isFTZ = thisOp.getFtz();
>From 6eabf6d3d962e37301389f2ea2527c75a7e4ebd3 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Mon, 2 Feb 2026 06:28:00 +0000
Subject: [PATCH 3/9] refactor nvvm.fadd verifier
---
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 52 +++++++++++-----------
1 file changed, 25 insertions(+), 27 deletions(-)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 4f8237bdc9c43..f8799dda68014 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3078,22 +3078,31 @@ LogicalResult NVVM::FloatAdditionOp::verify() {
auto rhsFType = getRhs().getType();
auto rndMode = getRnd();
auto satMode = getSat();
- auto isFTZ = getFtz();
+ bool isFTZ = getFtz();
+
+ auto getBaseFType = [](Type type) -> Type {
+ if (isa<VectorType>(type))
+ return cast<VectorType>(type).getElementType();
+ return type;
+ };
+
+ auto resBaseFType = getBaseFType(resFType);
+ auto lhsBaseFType = getBaseFType(lhsFType);
+ auto rhsBaseFType = getBaseFType(rhsFType);
+
+ bool sameTypeOperation =
+ llvm::all_equal({lhsBaseFType, rhsBaseFType, resBaseFType});
if (satMode == NVVM::SaturationMode::SATFINITE)
return emitOpError("SATFINITE saturation mode is not supported for "
"floating point addition operation");
- if (isa<VectorType>(resFType) != isa<VectorType>(lhsFType) ||
- isa<VectorType>(resFType) != isa<VectorType>(rhsFType))
+ if (!llvm::all_equal({isa<VectorType>(resFType), isa<VectorType>(lhsFType),
+ isa<VectorType>(rhsFType)}))
return emitOpError("cannot mix vector and scalar types for floating point "
"addition operation");
- if (isa<VectorType>(lhsFType) &&
- ((cast<VectorType>(lhsFType).getElementType() !=
- cast<VectorType>(rhsFType).getElementType()) ||
- (cast<VectorType>(lhsFType).getElementType() !=
- cast<VectorType>(resFType).getElementType())))
+ if (isa<VectorType>(resFType) && !sameTypeOperation)
return emitOpError(
"cannot mix different element types for vector floating point "
"addition operation");
@@ -3102,25 +3111,19 @@ LogicalResult NVVM::FloatAdditionOp::verify() {
return emitOpError("FTZ and saturation are not supported for additions "
"involving f64 type");
- auto getBaseFType = [](Type type) -> Type {
- if (isa<VectorType>(type))
- return cast<VectorType>(type).getElementType();
- return type;
- };
-
- auto resBaseFType = getBaseFType(resFType);
- auto lhsBaseFType = getBaseFType(lhsFType);
- auto rhsBaseFType = getBaseFType(rhsFType);
-
if (resBaseFType.getIntOrFloatBitWidth() <
std::max(lhsBaseFType.getIntOrFloatBitWidth(),
rhsBaseFType.getIntOrFloatBitWidth()))
return emitOpError("result type must be at least as wide as the operands");
- if (resBaseFType.isF16() && rndMode != NVVM::FPRoundingMode::RN &&
- rndMode != NVVM::FPRoundingMode::NONE)
+ if (resBaseFType.isF16()) {
+ if (!(rndMode == NVVM::FPRoundingMode::RN ||
+ rndMode == NVVM::FPRoundingMode::NONE))
return emitOpError("only RN rounding mode is supported for f16 and "
"vector<2xf16> additions");
+ if (!sameTypeOperation)
+ return emitOpError("only f16 + f16 is supported for f16 result type");
+ }
if (resBaseFType.isBF16()) {
if (rndMode != NVVM::FPRoundingMode::RN &&
@@ -3130,15 +3133,10 @@ LogicalResult NVVM::FloatAdditionOp::verify() {
if (satMode != NVVM::SaturationMode::NONE || isFTZ)
return emitOpError("FTZ and saturation are not supported for bf16 and "
"vector<2xbf16> additions");
+ if (!sameTypeOperation)
+ return emitOpError("only bf16 + bf16 is supported for bf16 result type");
}
- if (resBaseFType.isF16() && !(lhsBaseFType.isF16() && rhsBaseFType.isF16()))
- return emitOpError("only f16 + f16 is supported for f16 result type");
-
- if (resBaseFType.isBF16() &&
- !(lhsBaseFType.isBF16() && rhsBaseFType.isBF16()))
- return emitOpError("only bf16 + bf16 is supported for bf16 result type");
-
// FIXME: This is a temporary check disallowing lowering to add.rn.ftz.f16(x2)
// PTX instructions since the corresponding LLVM intrinsic is missing. This
// should be removed once the intrinsics for f16 addition (with FTZ only) are
>From 7e770a308054dbc321b73d67eb837c87bfc6112f Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Mon, 2 Feb 2026 09:17:09 +0000
Subject: [PATCH 4/9] address comments
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 50 +++++++++++++++------
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 4 +-
2 files changed, 39 insertions(+), 15 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 0dce63c4e5a74..53f5a45915705 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1860,16 +1860,37 @@ def FPRoundingModeAttr : EnumAttr<NVVM_Dialect, FPRoundingMode, "fp_rnd_mode"> {
let assemblyFormat = "`<` $value `>`";
}
-def SaturationModeNone : I32EnumCase<"NONE", 0, "none">;
-def SaturationModeFinite : I32EnumCase<"SATFINITE", 1, "satfinite">;
-def SaturationModeSat : I32EnumCase<"SAT", 2, "sat">;
+def SaturationModeNone : I32EnumAttrCase<"NONE", 0, "none">;
+def SaturationModeFinite : I32EnumAttrCase<"SATFINITE", 1, "satfinite">;
+def SaturationModeSat : I32EnumAttrCase<"SAT", 2, "sat">;
-def SaturationMode : I32Enum<"SaturationMode", "NVVM SaturationMode kind",
+def SaturationMode : I32EnumAttr<"SaturationMode", "NVVM SaturationMode kind",
[SaturationModeNone, SaturationModeFinite, SaturationModeSat]> {
+ let genSpecializedAttr = 0;
let cppNamespace = "::mlir::NVVM";
}
def SaturationModeAttr : EnumAttr<NVVM_Dialect, SaturationMode, "sat_mode"> {
- let assemblyFormat = "`<` $value `>`";
+ let summary = "Describes the saturation mode";
+ let description = [{
+ A `nvvm.sat_mode` attribute specifies the saturation mode for instructions
+ involving floating points or integers. It can be one of the following
+ values:
+ - `none`: No saturation is applied.
+ - `satfinite`: If the absolute value of input (ignoring sign) is greater
+ than the `MAX_NORM` of the specified destination format, then the result
+ is the sign-preserved `MAX_NORM` of the destination format and a positive
+ `MAX_NORM` in unsigned datatypes for which the destination sign is not
+ supported. If the input is `NaN`, then the result can be `NaN` or th
+ `MAX_NORM` of the destination format, depending on the format.
+ - `sat`: For integer destination types, this limits the value to `MININT..
+ MAXINT` and applies to both signed and unsigned integer datatypes. For
+ floating point destination types (applies to only `F16`, `F32`, and `F64`
+ types), this limits the value to the range `[0.0, 1.0]` and flushes NaN
+ results to positive zero.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-cvt)
+}];
+ let assemblyFormat = "`<` $value `>`";
}
def NVVM_ConvertFloatToTF32Op : NVVM_Op<"convert.float.to.tf32"> {
@@ -6155,6 +6176,8 @@ def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
}];
}
+def NVVMFloatType : AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>;
+
def NVVM_FloatAdditionOp :
NVVM_SingleResultIntrinsicOp<"fadd", [Pure, Commutative]> {
let summary = [{
@@ -6167,21 +6190,22 @@ def NVVM_FloatAdditionOp :
The rounding mode to be used is specified by the `rnd` attribute,
saturation mode by the `sat` attribute, and FTZ by the `ftz` unit attribute.
- The result type must be at least as wide as the operands. The operands are
- converted to the result type before addition if it is wider.
+ The result type must be at least as wide as the operands. When the type of
+ the `res` is wider than the type of the operands, the operands are first
+ converted to the result type, and then the addition is performed.
For more information, see PTX ISA - [floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add),
[half-precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add),
[mixed precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add).
}];
let arguments = (ins
- AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$lhs,
- AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$rhs,
+ NVVMFloatType:$lhs,
+ NVVMFloatType:$rhs,
DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
UnitAttr:$ftz
);
- let results = (outs AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$res);
+ let results = (outs NVVMFloatType:$res);
let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
let hasVerifier = 1;
@@ -6215,13 +6239,13 @@ def NVVM_FloatSubtractionOp :
}];
let arguments = (ins
- AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$lhs,
- AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$rhs,
+ NVVMFloatType:$lhs,
+ NVVMFloatType:$rhs,
DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
UnitAttr:$ftz
);
- let results = (outs AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$res);
+ let results = (outs NVVMFloatType:$res);
let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
let hasCanonicalizer = 1;
}
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index f8799dda68014..ea919e3831ddf 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3119,8 +3119,8 @@ LogicalResult NVVM::FloatAdditionOp::verify() {
if (resBaseFType.isF16()) {
if (!(rndMode == NVVM::FPRoundingMode::RN ||
rndMode == NVVM::FPRoundingMode::NONE))
- return emitOpError("only RN rounding mode is supported for f16 and "
- "vector<2xf16> additions");
+ return emitOpError("only RN rounding mode is supported for f16 and "
+ "vector<2xf16> additions");
if (!sameTypeOperation)
return emitOpError("only f16 + f16 is supported for f16 result type");
}
>From 98941b45552f268e009d8aacb05060e544f47d10 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Tue, 3 Feb 2026 12:22:26 +0000
Subject: [PATCH 5/9] address comments and add vector support
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 34 +-
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 180 +--
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 155 +++
.../nvvm/{ => fadd}/fadd_all_same_types.mlir | 20 +-
.../fadd_different_return_type.mlir | 32 +-
.../LLVMIR/nvvm/{ => fadd}/fadd_invalid.mlir | 46 +-
.../nvvm/{ => fadd}/fadd_mixed_arg_types.mlir | 48 +-
.../nvvm/fadd/fadd_vector_all_same_types.mlir | 285 ++++
.../fadd/fadd_vector_mixed_arg_types.mlir | 1229 +++++++++++++++++
9 files changed, 1806 insertions(+), 223 deletions(-)
rename mlir/test/Target/LLVMIR/nvvm/{ => fadd}/fadd_all_same_types.mlir (88%)
rename mlir/test/Target/LLVMIR/nvvm/{ => fadd}/fadd_different_return_type.mlir (93%)
rename mlir/test/Target/LLVMIR/nvvm/{ => fadd}/fadd_invalid.mlir (71%)
rename mlir/test/Target/LLVMIR/nvvm/{ => fadd}/fadd_mixed_arg_types.mlir (94%)
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir
create mode 100644 mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 53f5a45915705..175516cdc4c65 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6176,10 +6176,9 @@ def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
}];
}
-def NVVMFloatType : AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>;
+def SIMTFloatType : AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16, F32, F64]>]>;
-def NVVM_FloatAdditionOp :
- NVVM_SingleResultIntrinsicOp<"fadd", [Pure, Commutative]> {
+def NVVM_FAddOp : NVVM_Op<"fadd", [Pure, Commutative]> {
let summary = [{
Performs floating point addition operation with support for mixed precision
operands
@@ -6199,29 +6198,26 @@ def NVVM_FloatAdditionOp :
[mixed precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add).
}];
let arguments = (ins
- NVVMFloatType:$lhs,
- NVVMFloatType:$rhs,
+ SIMTFloatType:$lhs,
+ SIMTFloatType:$rhs,
DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
- UnitAttr:$ftz
+ DefaultValuedAttr<BoolAttr, "false">:$ftz
);
- let results = (outs NVVMFloatType:$res);
+ let results = (outs SIMTFloatType:$res);
let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
let hasVerifier = 1;
+ let extraClassDeclaration = [{
+ static void lowerFAddToLLVMIR(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+ }];
let llvmBuilder = [{
- auto [ID, args] = NVVM::FloatAdditionOp::getIntrinsicIDAndArgs(*op, moduleTranslation, builder);
- if(ID != llvm::Intrinsic::not_intrinsic) {
- llvm::Value *addResult = createIntrinsicCall(builder, ID, args);
- $res = ($_resultType->getScalarSizeInBits() >
- addResult->getType()->getScalarSizeInBits())
- ? builder.CreateFPExt(addResult, $_resultType) : addResult;
- }
+ NVVM::FAddOp::lowerFAddToLLVMIR(*op, moduleTranslation, builder);
}];
}
-def NVVM_FloatSubtractionOp :
- NVVM_Op<"fsub", [Pure]> {
+def NVVM_FSubOp : NVVM_Op<"fsub", [Pure]> {
let summary = [{
Performs floating point subtraction operation with support for mixed
precision operands
@@ -6239,13 +6235,13 @@ def NVVM_FloatSubtractionOp :
}];
let arguments = (ins
- NVVMFloatType:$lhs,
- NVVMFloatType:$rhs,
+ SIMTFloatType:$lhs,
+ SIMTFloatType:$rhs,
DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
UnitAttr:$ftz
);
- let results = (outs NVVMFloatType:$res);
+ let results = (outs SIMTFloatType:$res);
let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
let hasCanonicalizer = 1;
}
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index ea919e3831ddf..5b31e1bbc717a 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3072,12 +3072,12 @@ LogicalResult NVVM::TensormapReplaceOp::verify() {
return success();
}
-LogicalResult NVVM::FloatAdditionOp::verify() {
- auto resFType = getRes().getType();
- auto lhsFType = getLhs().getType();
- auto rhsFType = getRhs().getType();
- auto rndMode = getRnd();
- auto satMode = getSat();
+LogicalResult NVVM::FAddOp::verify() {
+ mlir::Type resFType = getRes().getType();
+ mlir::Type lhsFType = getLhs().getType();
+ mlir::Type rhsFType = getRhs().getType();
+ mlir::NVVM::FPRoundingMode rndMode = getRnd();
+ mlir::NVVM::SaturationMode satMode = getSat();
bool isFTZ = getFtz();
auto getBaseFType = [](Type type) -> Type {
@@ -3090,39 +3090,50 @@ LogicalResult NVVM::FloatAdditionOp::verify() {
auto lhsBaseFType = getBaseFType(lhsFType);
auto rhsBaseFType = getBaseFType(rhsFType);
+ // Supported operand types based on result types are:
+ // Result Type : Operand Type(s)
+ // f64 : f64, f32, f16, bf16
+ // f32 : f32, f16, bf16
+ // f16 : f16
+ // bf16 : bf16
+ // vector<2xf64> : vector<2x{f64, f32, f16, bf16}
+ // vector<2xf32> : vector<2x{f32, f16, bf16}
+ // vector<2xf16> : vector<2xf16>
+ // vector<2xbf16> : vector<2xbf16>
+
bool sameTypeOperation =
llvm::all_equal({lhsBaseFType, rhsBaseFType, resBaseFType});
- if (satMode == NVVM::SaturationMode::SATFINITE)
- return emitOpError("SATFINITE saturation mode is not supported for "
- "floating point addition operation");
-
if (!llvm::all_equal({isa<VectorType>(resFType), isa<VectorType>(lhsFType),
isa<VectorType>(rhsFType)}))
return emitOpError("cannot mix vector and scalar types for floating point "
"addition operation");
- if (isa<VectorType>(resFType) && !sameTypeOperation)
- return emitOpError(
- "cannot mix different element types for vector floating point "
- "addition operation");
-
- if (resFType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ))
- return emitOpError("FTZ and saturation are not supported for additions "
- "involving f64 type");
-
if (resBaseFType.getIntOrFloatBitWidth() <
std::max(lhsBaseFType.getIntOrFloatBitWidth(),
rhsBaseFType.getIntOrFloatBitWidth()))
return emitOpError("result type must be at least as wide as the operands");
+ if ((resBaseFType.isF16() || resBaseFType.isBF16()) && !sameTypeOperation) {
+ return emitOpError(
+ "only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type");
+ }
+
+ // Modifier constraints
+
+ if (satMode == NVVM::SaturationMode::SATFINITE)
+ return emitOpError("SATFINITE saturation mode is not supported for "
+ "floating point addition operation");
+
+ if (resBaseFType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ))
+ return emitOpError("FTZ and saturation are not supported for additions "
+ "involving f64 type");
+
if (resBaseFType.isF16()) {
if (!(rndMode == NVVM::FPRoundingMode::RN ||
rndMode == NVVM::FPRoundingMode::NONE))
return emitOpError("only RN rounding mode is supported for f16 and "
"vector<2xf16> additions");
- if (!sameTypeOperation)
- return emitOpError("only f16 + f16 is supported for f16 result type");
}
if (resBaseFType.isBF16()) {
@@ -3133,18 +3144,15 @@ LogicalResult NVVM::FloatAdditionOp::verify() {
if (satMode != NVVM::SaturationMode::NONE || isFTZ)
return emitOpError("FTZ and saturation are not supported for bf16 and "
"vector<2xbf16> additions");
- if (!sameTypeOperation)
- return emitOpError("only bf16 + bf16 is supported for bf16 result type");
}
// FIXME: This is a temporary check disallowing lowering to add.rn.ftz.f16(x2)
// PTX instructions since the corresponding LLVM intrinsic is missing. This
// should be removed once the intrinsics for f16 addition (with FTZ only) are
// available.
- if ((isa<VectorType>(resFType) || resBaseFType.isF16()) && isFTZ &&
- satMode == NVVM::SaturationMode::NONE)
+ if (resBaseFType.isF16() && isFTZ && satMode == NVVM::SaturationMode::NONE)
return emitOpError(
- "FTZ with no saturation is not supported for f16 additions");
+ "FTZ with no saturation is not supported for f16 result type");
return success();
}
@@ -3229,26 +3237,25 @@ std::string NVVM::MBarrierTryWaitParityOp::getPtx() {
// Canonicalization patterns
//===----------------------------------------------------------------------===//
-struct ConvertFsubToFnegFadd : public OpRewritePattern<FloatSubtractionOp> {
- using OpRewritePattern<FloatSubtractionOp>::OpRewritePattern;
+struct ConvertFsubToFnegFadd : public OpRewritePattern<FSubOp> {
+ using OpRewritePattern<FSubOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(FloatSubtractionOp op,
+ LogicalResult matchAndRewrite(FSubOp op,
PatternRewriter &rewriter) const override {
Location loc = op.getLoc();
Value negRhs =
LLVM::FNegOp::create(rewriter, loc, op.getRhs().getType(), op.getRhs());
- rewriter.replaceOpWithNewOp<FloatAdditionOp>(op, op.getType(), op.getLhs(),
- negRhs, op.getRnd(),
- op.getSat(), op.getFtz());
+ rewriter.replaceOpWithNewOp<FAddOp>(op, op.getType(), op.getLhs(), negRhs,
+ op.getRnd(), op.getSat(), op.getFtz());
return success();
}
};
-void FloatSubtractionOp::getCanonicalizationPatterns(
- RewritePatternSet &patterns, MLIRContext *context) {
+void FSubOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
+ MLIRContext *context) {
patterns.add<ConvertFsubToFnegFadd>(context);
}
@@ -4991,111 +4998,6 @@ mlir::NVVM::IDArgPair TensormapReplaceOp::getIntrinsicIDAndArgs(
return {IDs[fieldIndex], args};
}
-mlir::NVVM::IDArgPair FloatAdditionOp::getIntrinsicIDAndArgs(
- Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
- auto thisOp = cast<NVVM::FloatAdditionOp>(op);
- llvm::SmallVector<llvm::Value *> args;
- auto rndMode = thisOp.getRnd();
- auto isSat = thisOp.getSat() == NVVM::SaturationMode::SAT;
- auto isFTZ = thisOp.getFtz();
-
- llvm::Value *argLHS = mt.lookupValue(thisOp.getLhs());
- llvm::Value *argRHS = mt.lookupValue(thisOp.getRhs());
-
- mlir::Type lhsType = thisOp.getLhs().getType();
- mlir::Type rhsType = thisOp.getRhs().getType();
- mlir::Type resType = thisOp.getRes().getType();
-
- // FIXME: Add intrinsics for add.rn.ftz.f16x2 and add.rn.ftz.f16 here when
- // they are available.
- static constexpr llvm::Intrinsic::ID f16IDs[] = {
- llvm::Intrinsic::nvvm_add_rn_sat_f16,
- llvm::Intrinsic::nvvm_add_rn_ftz_sat_f16,
- llvm::Intrinsic::nvvm_add_rn_sat_v2f16,
- llvm::Intrinsic::nvvm_add_rn_ftz_sat_v2f16,
- };
-
- static constexpr llvm::Intrinsic::ID f32IDs[] = {
- llvm::Intrinsic::nvvm_add_rn_f, // default rounding mode RN
- llvm::Intrinsic::nvvm_add_rn_f,
- llvm::Intrinsic::nvvm_add_rm_f,
- llvm::Intrinsic::nvvm_add_rp_f,
- llvm::Intrinsic::nvvm_add_rz_f,
- llvm::Intrinsic::nvvm_add_rn_sat_f, // default rounding mode RN
- llvm::Intrinsic::nvvm_add_rn_sat_f,
- llvm::Intrinsic::nvvm_add_rm_sat_f,
- llvm::Intrinsic::nvvm_add_rp_sat_f,
- llvm::Intrinsic::nvvm_add_rz_sat_f,
- llvm::Intrinsic::nvvm_add_rn_ftz_f, // default rounding mode RN
- llvm::Intrinsic::nvvm_add_rn_ftz_f,
- llvm::Intrinsic::nvvm_add_rm_ftz_f,
- llvm::Intrinsic::nvvm_add_rp_ftz_f,
- llvm::Intrinsic::nvvm_add_rz_ftz_f,
- llvm::Intrinsic::nvvm_add_rn_ftz_sat_f, // default rounding mode RN
- llvm::Intrinsic::nvvm_add_rn_ftz_sat_f,
- llvm::Intrinsic::nvvm_add_rm_ftz_sat_f,
- llvm::Intrinsic::nvvm_add_rp_ftz_sat_f,
- llvm::Intrinsic::nvvm_add_rz_ftz_sat_f,
- };
-
- static constexpr llvm::Intrinsic::ID f64IDs[] = {
- llvm::Intrinsic::nvvm_add_rn_d, // default rounding mode RN
- llvm::Intrinsic::nvvm_add_rn_d, llvm::Intrinsic::nvvm_add_rm_d,
- llvm::Intrinsic::nvvm_add_rp_d, llvm::Intrinsic::nvvm_add_rz_d};
-
- auto addIntrinsic = [&](llvm::Intrinsic::ID IID, llvm::Value *LHS = nullptr,
- llvm::Value *RHS = nullptr) -> NVVM::IDArgPair {
- args.push_back(LHS ? LHS : argLHS);
- args.push_back(RHS ? RHS : argRHS);
- return {IID, args};
- };
-
- // f16 + f16 -> f16 / vector<2xf16> + vector<2xf16> -> vector<2xf16>
- // FIXME: Allow lowering to add.rn.ftz.f16x2 and add.rn.ftz.f16 here when the
- // intrinsics are available.
- bool isVectorF16Add = isa<VectorType>(resType) &&
- cast<VectorType>(resType).getElementType().isF16();
- if (resType.isF16() || isVectorF16Add) {
- if (isSat) {
- unsigned index = (isVectorF16Add << 1) | isFTZ;
- return addIntrinsic(f16IDs[index]);
- } else {
- mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
- return {llvm::Intrinsic::not_intrinsic, args};
- }
- }
-
- // bf16 + bf16 -> bf16 / vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
- bool isVectorBF16Add = isa<VectorType>(resType) &&
- cast<VectorType>(resType).getElementType().isBF16();
- if (resType.isBF16() || isVectorBF16Add) {
- mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
- return {llvm::Intrinsic::not_intrinsic, args};
- }
-
- // f64 + f64/f32/f16/bf16
- if (resType.isF64()) {
- llvm::Value *lhsF64 =
- lhsType.isF64() ? argLHS
- : builder.CreateFPExt(argLHS, builder.getDoubleTy());
- llvm::Value *rhsF64 =
- rhsType.isF64() ? argRHS
- : builder.CreateFPExt(argRHS, builder.getDoubleTy());
- unsigned index = static_cast<unsigned>(rndMode);
- return addIntrinsic(f64IDs[index], lhsF64, rhsF64);
- }
-
- // f16 + f16 -> !f16 / bf16 + bf16 -> !bf16 / f16 + bf16 / f32 + f32/f16/bf16
- llvm::Value *lhsF32 = lhsType.isF32()
- ? argLHS
- : builder.CreateFPExt(argLHS, builder.getFloatTy());
- llvm::Value *rhsF32 = rhsType.isF32()
- ? argRHS
- : builder.CreateFPExt(argRHS, builder.getFloatTy());
- unsigned index = ((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
- return addIntrinsic(f32IDs[index], lhsF32, rhsF32);
-}
-
//===----------------------------------------------------------------------===//
// NVVM tcgen05.mma functions
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index b7427a559fb79..0ed6726523926 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -446,6 +446,161 @@ getFenceProxySyncRestrictID(NVVM::MemOrderKind order) {
nvvm_fence_proxy_async_generic_release_sync_restrict_space_cta_scope_cluster;
}
+void NVVM::FAddOp::lowerFAddToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::FAddOp>(op);
+ NVVM::FPRoundingMode rndMode = thisOp.getRnd();
+ NVVM::SaturationMode satMode = thisOp.getSat();
+ bool isFTZ = thisOp.getFtz();
+ bool isSat = satMode != NVVM::SaturationMode::NONE;
+
+ llvm::Value *argLHS = mt.lookupValue(thisOp.getLhs());
+ llvm::Value *argRHS = mt.lookupValue(thisOp.getRhs());
+
+ mlir::Type lhsType = thisOp.getLhs().getType();
+ mlir::Type rhsType = thisOp.getRhs().getType();
+ mlir::Type resType = thisOp.getRes().getType();
+
+ // FIXME: Add intrinsics for add.rn.ftz.f16x2 and add.rn.ftz.f16 here when
+ // they are available.
+ static constexpr llvm::Intrinsic::ID f16IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_sat_f16,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f16,
+ llvm::Intrinsic::nvvm_add_rn_sat_v2f16,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_v2f16,
+ };
+
+ static constexpr llvm::Intrinsic::ID f32IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_f,
+ llvm::Intrinsic::nvvm_add_rm_f,
+ llvm::Intrinsic::nvvm_add_rp_f,
+ llvm::Intrinsic::nvvm_add_rz_f,
+ llvm::Intrinsic::nvvm_add_rn_sat_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_sat_f,
+ llvm::Intrinsic::nvvm_add_rm_sat_f,
+ llvm::Intrinsic::nvvm_add_rp_sat_f,
+ llvm::Intrinsic::nvvm_add_rz_sat_f,
+ llvm::Intrinsic::nvvm_add_rn_ftz_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_ftz_f,
+ llvm::Intrinsic::nvvm_add_rm_ftz_f,
+ llvm::Intrinsic::nvvm_add_rp_ftz_f,
+ llvm::Intrinsic::nvvm_add_rz_ftz_f,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rm_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rp_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rz_ftz_sat_f,
+ };
+
+ static constexpr llvm::Intrinsic::ID f64IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_d, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_d, llvm::Intrinsic::nvvm_add_rm_d,
+ llvm::Intrinsic::nvvm_add_rp_d, llvm::Intrinsic::nvvm_add_rz_d};
+
+ auto addIntrinsic = [&](llvm::Intrinsic::ID IID, llvm::Value *LHS = nullptr,
+ llvm::Value *RHS = nullptr) -> llvm::CallInst * {
+ llvm::SmallVector<llvm::Value *, 2> callArgs;
+ callArgs.push_back(LHS ? LHS : argLHS);
+ callArgs.push_back(RHS ? RHS : argRHS);
+ return createIntrinsicCall(builder, IID, callArgs);
+ };
+
+ // f16 + f16 -> f16 / vector<2xf16> + vector<2xf16> -> vector<2xf16>
+ // FIXME: Allow lowering to add.rn.ftz.f16x2 and add.rn.ftz.f16 here when the
+ // intrinsics are available.
+ bool isVectorF16Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isF16();
+ if (resType.isF16() || isVectorF16Add) {
+ if (isSat) {
+ unsigned index = (isVectorF16Add << 1) | isFTZ;
+ mt.mapValue(thisOp.getRes(), addIntrinsic(f16IDs[index]));
+ return;
+ } else {
+ mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ return;
+ }
+ }
+
+ // bf16 + bf16 -> bf16 / vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
+ bool isVectorBF16Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isBF16();
+ if (resType.isBF16() || isVectorBF16Add) {
+ mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ return;
+ }
+
+ // Helper functions for casting and adding vectors
+ auto getCastedFloat = [&](mlir::Type elemType, llvm::Value *value,
+ llvm::Type *targetType) -> llvm::Value * {
+ return (mt.convertType(elemType) == targetType)
+ ? value
+ : builder.CreateFPExt(value, targetType);
+ };
+ auto addVector = [&](llvm::Type *targetType, llvm::Intrinsic::ID intrinsicID,
+ llvm::Value *result) -> llvm::Value * {
+ auto lhsElemType = cast<VectorType>(lhsType).getElementType();
+ auto rhsElemType = cast<VectorType>(rhsType).getElementType();
+ for (int64_t i = 0; i < 2; ++i) {
+ llvm::Value *lhsElemi =
+ builder.CreateExtractElement(argLHS, builder.getInt32(i));
+ llvm::Value *rhsElemi =
+ builder.CreateExtractElement(argRHS, builder.getInt32(i));
+ llvm::Value *lhsCasted =
+ getCastedFloat(lhsElemType, lhsElemi, targetType);
+ llvm::Value *rhsCasted =
+ getCastedFloat(rhsElemType, rhsElemi, targetType);
+ llvm::Value *sum = addIntrinsic(intrinsicID, lhsCasted, rhsCasted);
+ result = builder.CreateInsertElement(result, sum, builder.getInt32(i));
+ };
+ return result;
+ };
+
+ // f64 + f64/f32/f16/bf16
+ bool isVectorF64Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isF64();
+
+ if (resType.isF64()) {
+ llvm::Value *lhsF64 =
+ getCastedFloat(lhsType, argLHS, builder.getDoubleTy());
+ llvm::Value *rhsF64 =
+ getCastedFloat(rhsType, argRHS, builder.getDoubleTy());
+ unsigned index = static_cast<unsigned>(rndMode);
+ mt.mapValue(thisOp.getRes(), addIntrinsic(f64IDs[index], lhsF64, rhsF64));
+ return;
+ } else if (isVectorF64Add) {
+ llvm::Value *result = llvm::UndefValue::get(
+ llvm::FixedVectorType::get(builder.getDoubleTy(), 2));
+ unsigned index = static_cast<unsigned>(rndMode);
+ result = addVector(builder.getDoubleTy(), f64IDs[index], result);
+ mt.mapValue(thisOp.getRes(), result);
+ return;
+ }
+
+ // f16 + f16 -> !f16 / bf16 + bf16 -> !bf16 / f16 + bf16 / f32 +
+ // f32/f16/bf16
+ bool isVectorF32Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isF32();
+
+ if (resType.isF32()) {
+ llvm::Value *lhsF32 = getCastedFloat(lhsType, argLHS, builder.getFloatTy());
+ llvm::Value *rhsF32 = getCastedFloat(rhsType, argRHS, builder.getFloatTy());
+ unsigned index =
+ ((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
+ mt.mapValue(thisOp.getRes(), addIntrinsic(f32IDs[index], lhsF32, rhsF32));
+ return;
+ } else if (isVectorF32Add) {
+ llvm::Value *result = llvm::UndefValue::get(
+ llvm::FixedVectorType::get(builder.getFloatTy(), 2));
+ unsigned index =
+ ((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
+
+ result = addVector(builder.getFloatTy(), f32IDs[index], result);
+ mt.mapValue(thisOp.getRes(), result);
+ return;
+ }
+}
+
namespace {
/// Implementation of the dialect interface that converts operations belonging
/// to the NVVM dialect to LLVM IR.
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_all_same_types.mlir
similarity index 88%
rename from mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
rename to mlir/test/Target/LLVMIR/nvvm/fadd/fadd_all_same_types.mlir
index 2aa2bf3a4906b..6233c8cb62bf9 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_all_same_types.mlir
@@ -12,7 +12,7 @@ llvm.func @fadd_f16_f16(%a : f16, %b : f16) -> f16 {
%f1 = nvvm.fadd %a, %b : f16, f16 -> f16
%f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f16
%f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f16
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f16
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f16
llvm.return %f4 : f16
}
@@ -53,20 +53,20 @@ llvm.func @fadd_f32_f32(%a : f32, %b : f32) -> f32 {
%f1 = nvvm.fadd %a, %b : f32, f32 -> f32
%f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f32
%f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f32, f32 -> f32
- %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f32, f32 -> f32
+ %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
%f6 = nvvm.fadd %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f32
%f7 = nvvm.fadd %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f8 = nvvm.fadd %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f32, f32 -> f32
- %f9 = nvvm.fadd %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f8 = nvvm.fadd %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f32, f32 -> f32
+ %f9 = nvvm.fadd %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
%f10 = nvvm.fadd %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f32
%f11 = nvvm.fadd %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f12 = nvvm.fadd %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f32, f32 -> f32
- %f13 = nvvm.fadd %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f12 = nvvm.fadd %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f32, f32 -> f32
+ %f13 = nvvm.fadd %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
%f14 = nvvm.fadd %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f32
%f15 = nvvm.fadd %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f16 = nvvm.fadd %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f32, f32 -> f32
- %f17 = nvvm.fadd %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f16 = nvvm.fadd %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f32, f32 -> f32
+ %f17 = nvvm.fadd %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
llvm.return %f17 : f32
}
@@ -86,4 +86,4 @@ llvm.func @fadd_f64_f64(%a : f64, %b : f64) -> f64 {
%f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64, f64 -> f64
%f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rz>} : f64, f64 -> f64
llvm.return %f5 : f64
-}
\ No newline at end of file
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_different_return_type.mlir
similarity index 93%
rename from mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir
rename to mlir/test/Target/LLVMIR/nvvm/fadd/fadd_different_return_type.mlir
index 8f54272bd31ff..daac2ccbeca12 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_different_return_type.mlir
@@ -30,7 +30,7 @@ llvm.func @fadd_f16_f16_rn_ftz(%a : f16, %b : f16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, f16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
@@ -41,7 +41,7 @@ llvm.func @fadd_f16_f16_rn_sat_ftz(%a : f16, %b : f16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
@@ -74,7 +74,7 @@ llvm.func @fadd_f16_f16_rm_ftz(%a : f16, %b : f16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, f16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
@@ -85,7 +85,7 @@ llvm.func @fadd_f16_f16_rm_sat_ftz(%a : f16, %b : f16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
@@ -118,7 +118,7 @@ llvm.func @fadd_f16_f16_rp_ftz(%a : f16, %b : f16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, f16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
@@ -129,7 +129,7 @@ llvm.func @fadd_f16_f16_rp_sat_ftz(%a : f16, %b : f16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
@@ -162,7 +162,7 @@ llvm.func @fadd_f16_f16_rz_ftz(%a : f16, %b : f16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, f16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
@@ -173,7 +173,7 @@ llvm.func @fadd_f16_f16_rz_sat_ftz(%a : f16, %b : f16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
@@ -207,7 +207,7 @@ llvm.func @fadd_bf16_bf16_rn_ftz(%a : bf16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : bf16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -218,7 +218,7 @@ llvm.func @fadd_bf16_bf16_rn_sat_ftz(%a : bf16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -251,7 +251,7 @@ llvm.func @fadd_bf16_bf16_rm_ftz(%a : bf16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : bf16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -262,7 +262,7 @@ llvm.func @fadd_bf16_bf16_rm_sat_ftz(%a : bf16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -295,7 +295,7 @@ llvm.func @fadd_bf16_bf16_rp_ftz(%a : bf16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : bf16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -306,7 +306,7 @@ llvm.func @fadd_bf16_bf16_rp_sat_ftz(%a : bf16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -339,7 +339,7 @@ llvm.func @fadd_bf16_bf16_rz_ftz(%a : bf16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : bf16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -350,7 +350,7 @@ llvm.func @fadd_bf16_bf16_rz_sat_ftz(%a : bf16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_invalid.mlir
similarity index 71%
rename from mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir
rename to mlir/test/Target/LLVMIR/nvvm/fadd/fadd_invalid.mlir
index a267e5889912f..6a287d06f14c5 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_invalid.mlir
@@ -18,17 +18,9 @@ llvm.func @fadd_invalid_vector_scalar_mix(%a : vector<2xf16>, %b : f16) -> f32 {
// -----
-llvm.func @fadd_invalid_vector_element_types_mix(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
- // expected-error at +1 {{cannot mix different element types for vector floating point addition operation}}
- %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xbf16>
- llvm.return %f1 : vector<2xbf16>
-}
-
-// -----
-
llvm.func @fadd_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
// expected-error at +1 {{FTZ and saturation are not supported for additions involving f64 type}}
- %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz} : f64, f64 -> f64
+ %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : f64, f64 -> f64
llvm.return %f1 : f64
}
@@ -40,7 +32,15 @@ llvm.func @fadd_invalid_result_width(%a : f64, %b : f64) -> f32 {
llvm.return %f1 : f32
}
-// ----
+// -----
+
+llvm.func @fadd_invalid_result_width_vector(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf32> {
+ // expected-error at +1 {{result type must be at least as wide as the operands}}
+ %f1 = nvvm.fadd %a, %b : vector<2xf32>, vector<2xf64> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+// -----
llvm.func @fadd_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
// expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
@@ -76,32 +76,48 @@ llvm.func @fadd_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>
llvm.func @fadd_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 {
// expected-error at +1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions}}
- %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz} : bf16, bf16 -> bf16
+ %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> bf16
llvm.return %f1 : bf16
}
// -----
llvm.func @fadd_invalid_f16_result_type(%a : f16, %b : bf16) -> f16 {
- // expected-error at +1 {{only f16 + f16 is supported for f16 result type}}
+ // expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
%f1 = nvvm.fadd %a, %b : f16, bf16 -> f16
llvm.return %f1 : f16
}
// -----
+llvm.func @fadd_invalid_f16_result_type_vector(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf16> {
+ // expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xf16>
+ llvm.return %f1 : vector<2xf16>
+}
+
+// -----
+
llvm.func @fadd_invalid_bf16_result_type(%a : bf16, %b : f16) -> bf16 {
- // expected-error at +1 {{only bf16 + bf16 is supported for bf16 result type}}
+ // expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
%f1 = nvvm.fadd %a, %b : bf16, f16 -> bf16
llvm.return %f1 : bf16
}
// -----
+llvm.func @fadd_invalid_bf16_result_type_vector(%a : vector<2xbf16>, %b : vector<2xf16>) -> vector<2xbf16> {
+ // expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
+ %f1 = nvvm.fadd %a, %b : vector<2xbf16>, vector<2xf16> -> vector<2xbf16>
+ llvm.return %f1 : vector<2xbf16>
+}
+
+// -----
+
// FIXME: Remove this test once intrinsics for f16 addition (with FTZ only) are
// available.
llvm.func @fadd_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 {
- // expected-error at +1 {{FTZ with no saturation is not supported for f16 additions}}
- %f1 = nvvm.fadd %a, %b {ftz} : f16, f16 -> f16
+ // expected-error at +1 {{FTZ with no saturation is not supported for f16 result type}}
+ %f1 = nvvm.fadd %a, %b {ftz=true} : f16, f16 -> f16
llvm.return %f1 : f16
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_mixed_arg_types.mlir
similarity index 94%
rename from mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir
rename to mlir/test/Target/LLVMIR/nvvm/fadd/fadd_mixed_arg_types.mlir
index badaad054717d..7da148b4bc2b6 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_mixed_arg_types.mlir
@@ -41,7 +41,7 @@ llvm.func @fadd_f16_bf16_rn_ftz(%a : f16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -52,7 +52,7 @@ llvm.func @fadd_f16_bf16_rn_sat_ftz(%a : f16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -85,7 +85,7 @@ llvm.func @fadd_f16_bf16_rm_ftz(%a : f16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -96,7 +96,7 @@ llvm.func @fadd_f16_bf16_rm_sat_ftz(%a : f16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -129,7 +129,7 @@ llvm.func @fadd_f16_bf16_rp_ftz(%a : f16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -140,7 +140,7 @@ llvm.func @fadd_f16_bf16_rp_sat_ftz(%a : f16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -173,7 +173,7 @@ llvm.func @fadd_f16_bf16_rz_ftz(%a : f16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -184,7 +184,7 @@ llvm.func @fadd_f16_bf16_rz_sat_ftz(%a : f16, %b : bf16) -> f32 {
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, bf16 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
@@ -225,7 +225,7 @@ llvm.func @fadd_f16_f32_rn_ftz(%a : f16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
@@ -235,7 +235,7 @@ llvm.func @fadd_f16_f32_rn_sat_ftz(%a : f16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
@@ -265,7 +265,7 @@ llvm.func @fadd_f16_f32_rm_ftz(%a : f16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
@@ -275,7 +275,7 @@ llvm.func @fadd_f16_f32_rm_sat_ftz(%a : f16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
@@ -305,7 +305,7 @@ llvm.func @fadd_f16_f32_rp_ftz(%a : f16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
@@ -315,7 +315,7 @@ llvm.func @fadd_f16_f32_rp_sat_ftz(%a : f16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
@@ -345,7 +345,7 @@ llvm.func @fadd_f16_f32_rz_ftz(%a : f16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
@@ -355,7 +355,7 @@ llvm.func @fadd_f16_f32_rz_sat_ftz(%a : f16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
@@ -447,7 +447,7 @@ llvm.func @fadd_bf16_f32_rn_ftz(%a : bf16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : bf16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
@@ -457,7 +457,7 @@ llvm.func @fadd_bf16_f32_rn_sat_ftz(%a : bf16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
@@ -487,7 +487,7 @@ llvm.func @fadd_bf16_f32_rm_ftz(%a : bf16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : bf16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
@@ -497,7 +497,7 @@ llvm.func @fadd_bf16_f32_rm_sat_ftz(%a : bf16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
@@ -527,7 +527,7 @@ llvm.func @fadd_bf16_f32_rp_ftz(%a : bf16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : bf16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
@@ -537,7 +537,7 @@ llvm.func @fadd_bf16_f32_rp_sat_ftz(%a : bf16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
@@ -567,7 +567,7 @@ llvm.func @fadd_bf16_f32_rz_ftz(%a : bf16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : bf16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
@@ -577,7 +577,7 @@ llvm.func @fadd_bf16_f32_rz_sat_ftz(%a : bf16, %b : f32) -> f32 {
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz} : bf16, f32 -> f32
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir
new file mode 100644
index 0000000000000..2bac44df92d0d
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir
@@ -0,0 +1,285 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// vector<2xf16> + vector<2xf16> -> vector<2xf16>
+llvm.func @fadd_vector_f16_f16(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
+ // CHECK-LABEL: define <2 x half> @fadd_vector_f16_f16(<2 x half> %0, <2 x half> %1) {
+ // CHECK-NEXT: %3 = fadd <2 x half> %0, %1
+ // CHECK-NEXT: %4 = fadd <2 x half> %3, %3
+ // CHECK-NEXT: %5 = call <2 x half> @llvm.nvvm.add.rn.sat.v2f16(<2 x half> %4, <2 x half> %4)
+ // CHECK-NEXT: %6 = call <2 x half> @llvm.nvvm.add.rn.ftz.sat.v2f16(<2 x half> %5, <2 x half> %5)
+ // CHECK-NEXT: ret <2 x half> %3
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ llvm.return %f1 : vector<2xf16>
+}
+
+// vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
+llvm.func @fadd_vector_bf16_bf16(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
+ // CHECK-LABEL: define <2 x bfloat> @fadd_vector_bf16_bf16(<2 x bfloat> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = fadd <2 x bfloat> %0, %1
+ // CHECK-NEXT: %4 = fadd <2 x bfloat> %3, %3
+ // CHECK-NEXT: ret <2 x bfloat> %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ llvm.return %f2 : vector<2xbf16>
+}
+
+// vector<2xf32> + vector<2xf32> -> vector<2xf32>
+llvm.func @fadd_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f32_f32_rn(<2 x float> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
+ // CHECK-NEXT: %6 = insertelement <2 x float> undef, float %5, i32 0
+ // CHECK-NEXT: %7 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %8 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %9 = call float @llvm.nvvm.add.rn.f(float %7, float %8)
+ // CHECK-NEXT: %10 = insertelement <2 x float> %6, float %9, i32 1
+ // CHECK-NEXT: %11 = extractelement <2 x float> %10, i32 0
+ // CHECK-NEXT: %12 = extractelement <2 x float> %10, i32 0
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> undef, float %13, i32 0
+ // CHECK-NEXT: %15 = extractelement <2 x float> %10, i32 1
+ // CHECK-NEXT: %16 = extractelement <2 x float> %10, i32 1
+ // CHECK-NEXT: %17 = call float @llvm.nvvm.add.rn.f(float %15, float %16)
+ // CHECK-NEXT: %18 = insertelement <2 x float> %14, float %17, i32 1
+ // CHECK-NEXT: %19 = extractelement <2 x float> %18, i32 0
+ // CHECK-NEXT: %20 = extractelement <2 x float> %18, i32 0
+ // CHECK-NEXT: %21 = call float @llvm.nvvm.add.rn.sat.f(float %19, float %20)
+ // CHECK-NEXT: %22 = insertelement <2 x float> undef, float %21, i32 0
+ // CHECK-NEXT: %23 = extractelement <2 x float> %18, i32 1
+ // CHECK-NEXT: %24 = extractelement <2 x float> %18, i32 1
+ // CHECK-NEXT: %25 = call float @llvm.nvvm.add.rn.sat.f(float %23, float %24)
+ // CHECK-NEXT: %26 = insertelement <2 x float> %22, float %25, i32 1
+ // CHECK-NEXT: %27 = extractelement <2 x float> %26, i32 0
+ // CHECK-NEXT: %28 = extractelement <2 x float> %26, i32 0
+ // CHECK-NEXT: %29 = call float @llvm.nvvm.add.rn.ftz.f(float %27, float %28)
+ // CHECK-NEXT: %30 = insertelement <2 x float> undef, float %29, i32 0
+ // CHECK-NEXT: %31 = extractelement <2 x float> %26, i32 1
+ // CHECK-NEXT: %32 = extractelement <2 x float> %26, i32 1
+ // CHECK-NEXT: %33 = call float @llvm.nvvm.add.rn.ftz.f(float %31, float %32)
+ // CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
+ // CHECK-NEXT: %35 = extractelement <2 x float> %34, i32 0
+ // CHECK-NEXT: %36 = extractelement <2 x float> %34, i32 0
+ // CHECK-NEXT: %37 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %35, float %36)
+ // CHECK-NEXT: %38 = insertelement <2 x float> undef, float %37, i32 0
+ // CHECK-NEXT: %39 = extractelement <2 x float> %34, i32 1
+ // CHECK-NEXT: %40 = extractelement <2 x float> %34, i32 1
+ // CHECK-NEXT: %41 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %39, float %40)
+ // CHECK-NEXT: %42 = insertelement <2 x float> %38, float %41, i32 1
+ // CHECK-NEXT: ret <2 x float> %34
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f4 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f32_f32_rm(<2 x float> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
+ // CHECK-NEXT: %6 = insertelement <2 x float> undef, float %5, i32 0
+ // CHECK-NEXT: %7 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %8 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %9 = call float @llvm.nvvm.add.rm.f(float %7, float %8)
+ // CHECK-NEXT: %10 = insertelement <2 x float> %6, float %9, i32 1
+ // CHECK-NEXT: %11 = extractelement <2 x float> %10, i32 0
+ // CHECK-NEXT: %12 = extractelement <2 x float> %10, i32 0
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> undef, float %13, i32 0
+ // CHECK-NEXT: %15 = extractelement <2 x float> %10, i32 1
+ // CHECK-NEXT: %16 = extractelement <2 x float> %10, i32 1
+ // CHECK-NEXT: %17 = call float @llvm.nvvm.add.rm.sat.f(float %15, float %16)
+ // CHECK-NEXT: %18 = insertelement <2 x float> %14, float %17, i32 1
+ // CHECK-NEXT: %19 = extractelement <2 x float> %18, i32 0
+ // CHECK-NEXT: %20 = extractelement <2 x float> %18, i32 0
+ // CHECK-NEXT: %21 = call float @llvm.nvvm.add.rm.ftz.f(float %19, float %20)
+ // CHECK-NEXT: %22 = insertelement <2 x float> undef, float %21, i32 0
+ // CHECK-NEXT: %23 = extractelement <2 x float> %18, i32 1
+ // CHECK-NEXT: %24 = extractelement <2 x float> %18, i32 1
+ // CHECK-NEXT: %25 = call float @llvm.nvvm.add.rm.ftz.f(float %23, float %24)
+ // CHECK-NEXT: %26 = insertelement <2 x float> %22, float %25, i32 1
+ // CHECK-NEXT: %27 = extractelement <2 x float> %26, i32 0
+ // CHECK-NEXT: %28 = extractelement <2 x float> %26, i32 0
+ // CHECK-NEXT: %29 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %27, float %28)
+ // CHECK-NEXT: %30 = insertelement <2 x float> undef, float %29, i32 0
+ // CHECK-NEXT: %31 = extractelement <2 x float> %26, i32 1
+ // CHECK-NEXT: %32 = extractelement <2 x float> %26, i32 1
+ // CHECK-NEXT: %33 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %31, float %32)
+ // CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
+ // CHECK-NEXT: ret <2 x float> %34
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f4 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f32_f32_rp(<2 x float> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
+ // CHECK-NEXT: %6 = insertelement <2 x float> undef, float %5, i32 0
+ // CHECK-NEXT: %7 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %8 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %9 = call float @llvm.nvvm.add.rp.f(float %7, float %8)
+ // CHECK-NEXT: %10 = insertelement <2 x float> %6, float %9, i32 1
+ // CHECK-NEXT: %11 = extractelement <2 x float> %10, i32 0
+ // CHECK-NEXT: %12 = extractelement <2 x float> %10, i32 0
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> undef, float %13, i32 0
+ // CHECK-NEXT: %15 = extractelement <2 x float> %10, i32 1
+ // CHECK-NEXT: %16 = extractelement <2 x float> %10, i32 1
+ // CHECK-NEXT: %17 = call float @llvm.nvvm.add.rp.sat.f(float %15, float %16)
+ // CHECK-NEXT: %18 = insertelement <2 x float> %14, float %17, i32 1
+ // CHECK-NEXT: %19 = extractelement <2 x float> %18, i32 0
+ // CHECK-NEXT: %20 = extractelement <2 x float> %18, i32 0
+ // CHECK-NEXT: %21 = call float @llvm.nvvm.add.rp.ftz.f(float %19, float %20)
+ // CHECK-NEXT: %22 = insertelement <2 x float> undef, float %21, i32 0
+ // CHECK-NEXT: %23 = extractelement <2 x float> %18, i32 1
+ // CHECK-NEXT: %24 = extractelement <2 x float> %18, i32 1
+ // CHECK-NEXT: %25 = call float @llvm.nvvm.add.rp.ftz.f(float %23, float %24)
+ // CHECK-NEXT: %26 = insertelement <2 x float> %22, float %25, i32 1
+ // CHECK-NEXT: %27 = extractelement <2 x float> %26, i32 0
+ // CHECK-NEXT: %28 = extractelement <2 x float> %26, i32 0
+ // CHECK-NEXT: %29 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %27, float %28)
+ // CHECK-NEXT: %30 = insertelement <2 x float> undef, float %29, i32 0
+ // CHECK-NEXT: %31 = extractelement <2 x float> %26, i32 1
+ // CHECK-NEXT: %32 = extractelement <2 x float> %26, i32 1
+ // CHECK-NEXT: %33 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %31, float %32)
+ // CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
+ // CHECK-NEXT: ret <2 x float> %34
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f4 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f32_f32_rz(<2 x float> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
+ // CHECK-NEXT: %6 = insertelement <2 x float> undef, float %5, i32 0
+ // CHECK-NEXT: %7 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %8 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %9 = call float @llvm.nvvm.add.rz.f(float %7, float %8)
+ // CHECK-NEXT: %10 = insertelement <2 x float> %6, float %9, i32 1
+ // CHECK-NEXT: %11 = extractelement <2 x float> %10, i32 0
+ // CHECK-NEXT: %12 = extractelement <2 x float> %10, i32 0
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> undef, float %13, i32 0
+ // CHECK-NEXT: %15 = extractelement <2 x float> %10, i32 1
+ // CHECK-NEXT: %16 = extractelement <2 x float> %10, i32 1
+ // CHECK-NEXT: %17 = call float @llvm.nvvm.add.rz.sat.f(float %15, float %16)
+ // CHECK-NEXT: %18 = insertelement <2 x float> %14, float %17, i32 1
+ // CHECK-NEXT: %19 = extractelement <2 x float> %18, i32 0
+ // CHECK-NEXT: %20 = extractelement <2 x float> %18, i32 0
+ // CHECK-NEXT: %21 = call float @llvm.nvvm.add.rz.ftz.f(float %19, float %20)
+ // CHECK-NEXT: %22 = insertelement <2 x float> undef, float %21, i32 0
+ // CHECK-NEXT: %23 = extractelement <2 x float> %18, i32 1
+ // CHECK-NEXT: %24 = extractelement <2 x float> %18, i32 1
+ // CHECK-NEXT: %25 = call float @llvm.nvvm.add.rz.ftz.f(float %23, float %24)
+ // CHECK-NEXT: %26 = insertelement <2 x float> %22, float %25, i32 1
+ // CHECK-NEXT: %27 = extractelement <2 x float> %26, i32 0
+ // CHECK-NEXT: %28 = extractelement <2 x float> %26, i32 0
+ // CHECK-NEXT: %29 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %27, float %28)
+ // CHECK-NEXT: %30 = insertelement <2 x float> undef, float %29, i32 0
+ // CHECK-NEXT: %31 = extractelement <2 x float> %26, i32 1
+ // CHECK-NEXT: %32 = extractelement <2 x float> %26, i32 1
+ // CHECK-NEXT: %33 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %31, float %32)
+ // CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
+ // CHECK-NEXT: ret <2 x float> %34
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f4 : vector<2xf32>
+}
+
+// vector<2xf64> + vector<2xf64> -> vector<2xf64>
+llvm.func @fadd_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f64_f64_rn(<2 x double> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rn.d(double %3, double %4)
+ // CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+ // CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
+ // CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %9 = call double @llvm.nvvm.add.rn.d(double %7, double %8)
+ // CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
+ // CHECK-NEXT: %11 = extractelement <2 x double> %10, i32 0
+ // CHECK-NEXT: %12 = extractelement <2 x double> %10, i32 0
+ // CHECK-NEXT: %13 = call double @llvm.nvvm.add.rn.d(double %11, double %12)
+ // CHECK-NEXT: %14 = insertelement <2 x double> undef, double %13, i32 0
+ // CHECK-NEXT: %15 = extractelement <2 x double> %10, i32 1
+ // CHECK-NEXT: %16 = extractelement <2 x double> %10, i32 1
+ // CHECK-NEXT: %17 = call double @llvm.nvvm.add.rn.d(double %15, double %16)
+ // CHECK-NEXT: %18 = insertelement <2 x double> %14, double %17, i32 1
+ // CHECK-NEXT: ret <2 x double> %18
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f2 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f64_f64_rm(<2 x double> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %3, double %4)
+ // CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+ // CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
+ // CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %9 = call double @llvm.nvvm.add.rm.d(double %7, double %8)
+ // CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
+ // CHECK-NEXT: ret <2 x double> %10
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f64_f64_rp(<2 x double> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rp.d(double %3, double %4)
+ // CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+ // CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
+ // CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %9 = call double @llvm.nvvm.add.rp.d(double %7, double %8)
+ // CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
+ // CHECK-NEXT: ret <2 x double> %10
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f64_f64_rz(<2 x double> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rz.d(double %3, double %4)
+ // CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+ // CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
+ // CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %9 = call double @llvm.nvvm.add.rz.d(double %7, double %8)
+ // CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
+ // CHECK-NEXT: ret <2 x double> %10
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir
new file mode 100644
index 0000000000000..c5f70aed32c8b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir
@@ -0,0 +1,1229 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// vector<2xf16> + vector<2xbf16> -> vector<2xf32>
+llvm.func @fadd_vector_f16_bf16_f32(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rn(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rn(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rn_sat(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rn_ftz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.ftz.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rn_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rm(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rm(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rm_sat(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rm_ftz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.ftz.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rm_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rp(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rp(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rp_sat(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rp_ftz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.ftz.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rp_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rz_sat(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rz_ftz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.ftz.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_bf16_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rz_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = fpext bfloat %4 to float
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %6)
+ // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
+ // CHECK-NEXT: %11 = fpext half %9 to float
+ // CHECK-NEXT: %12 = fpext bfloat %10 to float
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %11, float %12)
+ // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
+ // CHECK-NEXT: ret <2 x float> %14
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+// vector<2xf16> + vector<2xf32> -> vector<2xf32>
+llvm.func @fadd_vector_f16_f32_f32(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rn(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rn(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rn_sat(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rn_ftz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.ftz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rn_sat_ftz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rm(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rm(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rm_sat(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rm_ftz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rm_sat_ftz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rp(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rp(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rp_sat(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rp_ftz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.ftz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rp_sat_ftz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rz_sat(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rz_ftz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.ftz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_f16_f32_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rz_sat_ftz(<2 x half> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+// vector<2xf16> + vector<2xf64> -> vector<2xf64>
+llvm.func @fadd_vector_f16_f64_f64(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64(<2 x half> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f16_f64_f64_rn(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64_rn(<2 x half> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f16_f64_f64_rm(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64_rm(<2 x half> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rm.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f16_f64_f64_rp(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64_rp(<2 x half> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rp.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f16_f64_f64_rz(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64_rz(<2 x half> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext half %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext half %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rz.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+// vector<2xbf16> + vector<2xf32> -> vector<2xf32>
+llvm.func @fadd_vector_bf16_f32_f32(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rn(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rn(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rn_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rn_sat(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rn_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rn_ftz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.ftz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rn_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rn_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rm(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rm(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rm_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rm_sat(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rm_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rm_ftz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rm_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rm_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rp(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rp(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rp_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rp_sat(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rp_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rp_ftz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.ftz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rp_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rp_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rz_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rz_sat(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rz_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rz_ftz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.ftz.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+llvm.func @fadd_vector_bf16_f32_f32_rz_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rz_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to float
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %4)
+ // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to float
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %10, float %9)
+ // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
+ // CHECK-NEXT: ret <2 x float> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ llvm.return %f1 : vector<2xf32>
+}
+
+// vector<2xbf16> + vector<2xf64> -> vector<2xf64>
+llvm.func @fadd_vector_bf16_f64_f64(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64(<2 x bfloat> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_bf16_f64_f64_rn(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64_rn(<2 x bfloat> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_bf16_f64_f64_rm(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64_rm(<2 x bfloat> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rm.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_bf16_f64_f64_rp(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64_rp(<2 x bfloat> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rp.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_bf16_f64_f64_rz(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64_rz(<2 x bfloat> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext bfloat %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext bfloat %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rz.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+// vector<2xf32> + vector<2xf64> -> vector<2xf64>
+llvm.func @fadd_vector_f32_f64_f64(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64(<2 x float> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext float %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext float %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f32_f64_f64_rn(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64_rn(<2 x float> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext float %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext float %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f32_f64_f64_rm(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64_rm(<2 x float> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext float %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext float %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rm.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f32_f64_f64_rp(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64_rp(<2 x float> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext float %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext float %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rp.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
+
+llvm.func @fadd_vector_f32_f64_f64_rz(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64_rz(<2 x float> %0, <2 x double> %1) {
+ // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
+ // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
+ // CHECK-NEXT: %5 = fpext float %3 to double
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
+ // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
+ // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
+ // CHECK-NEXT: %10 = fpext float %8 to double
+ // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rz.d(double %10, double %9)
+ // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
+ // CHECK-NEXT: ret <2 x double> %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ llvm.return %f1 : vector<2xf64>
+}
>From f753954c49b9a575320cc845a3f0ca89122c5429 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Tue, 3 Feb 2026 12:54:57 +0000
Subject: [PATCH 6/9] replace undef with poison
---
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 4 +-
.../nvvm/fadd/fadd_vector_all_same_types.mlir | 44 +++---
.../fadd/fadd_vector_mixed_arg_types.mlir | 132 +++++++++---------
3 files changed, 90 insertions(+), 90 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 0ed6726523926..2623b0ea4a2f4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -569,7 +569,7 @@ void NVVM::FAddOp::lowerFAddToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
mt.mapValue(thisOp.getRes(), addIntrinsic(f64IDs[index], lhsF64, rhsF64));
return;
} else if (isVectorF64Add) {
- llvm::Value *result = llvm::UndefValue::get(
+ llvm::Value *result = llvm::PoisonValue::get(
llvm::FixedVectorType::get(builder.getDoubleTy(), 2));
unsigned index = static_cast<unsigned>(rndMode);
result = addVector(builder.getDoubleTy(), f64IDs[index], result);
@@ -590,7 +590,7 @@ void NVVM::FAddOp::lowerFAddToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
mt.mapValue(thisOp.getRes(), addIntrinsic(f32IDs[index], lhsF32, rhsF32));
return;
} else if (isVectorF32Add) {
- llvm::Value *result = llvm::UndefValue::get(
+ llvm::Value *result = llvm::PoisonValue::get(
llvm::FixedVectorType::get(builder.getFloatTy(), 2));
unsigned index =
((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir
index 2bac44df92d0d..b55cf92405a8e 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir
@@ -34,7 +34,7 @@ llvm.func @fadd_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
- // CHECK-NEXT: %6 = insertelement <2 x float> undef, float %5, i32 0
+ // CHECK-NEXT: %6 = insertelement <2 x float> poison, float %5, i32 0
// CHECK-NEXT: %7 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %8 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %9 = call float @llvm.nvvm.add.rn.f(float %7, float %8)
@@ -42,7 +42,7 @@ llvm.func @fadd_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %11 = extractelement <2 x float> %10, i32 0
// CHECK-NEXT: %12 = extractelement <2 x float> %10, i32 0
// CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> undef, float %13, i32 0
+ // CHECK-NEXT: %14 = insertelement <2 x float> poison, float %13, i32 0
// CHECK-NEXT: %15 = extractelement <2 x float> %10, i32 1
// CHECK-NEXT: %16 = extractelement <2 x float> %10, i32 1
// CHECK-NEXT: %17 = call float @llvm.nvvm.add.rn.f(float %15, float %16)
@@ -50,7 +50,7 @@ llvm.func @fadd_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %19 = extractelement <2 x float> %18, i32 0
// CHECK-NEXT: %20 = extractelement <2 x float> %18, i32 0
// CHECK-NEXT: %21 = call float @llvm.nvvm.add.rn.sat.f(float %19, float %20)
- // CHECK-NEXT: %22 = insertelement <2 x float> undef, float %21, i32 0
+ // CHECK-NEXT: %22 = insertelement <2 x float> poison, float %21, i32 0
// CHECK-NEXT: %23 = extractelement <2 x float> %18, i32 1
// CHECK-NEXT: %24 = extractelement <2 x float> %18, i32 1
// CHECK-NEXT: %25 = call float @llvm.nvvm.add.rn.sat.f(float %23, float %24)
@@ -58,7 +58,7 @@ llvm.func @fadd_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %27 = extractelement <2 x float> %26, i32 0
// CHECK-NEXT: %28 = extractelement <2 x float> %26, i32 0
// CHECK-NEXT: %29 = call float @llvm.nvvm.add.rn.ftz.f(float %27, float %28)
- // CHECK-NEXT: %30 = insertelement <2 x float> undef, float %29, i32 0
+ // CHECK-NEXT: %30 = insertelement <2 x float> poison, float %29, i32 0
// CHECK-NEXT: %31 = extractelement <2 x float> %26, i32 1
// CHECK-NEXT: %32 = extractelement <2 x float> %26, i32 1
// CHECK-NEXT: %33 = call float @llvm.nvvm.add.rn.ftz.f(float %31, float %32)
@@ -66,7 +66,7 @@ llvm.func @fadd_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %35 = extractelement <2 x float> %34, i32 0
// CHECK-NEXT: %36 = extractelement <2 x float> %34, i32 0
// CHECK-NEXT: %37 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %35, float %36)
- // CHECK-NEXT: %38 = insertelement <2 x float> undef, float %37, i32 0
+ // CHECK-NEXT: %38 = insertelement <2 x float> poison, float %37, i32 0
// CHECK-NEXT: %39 = extractelement <2 x float> %34, i32 1
// CHECK-NEXT: %40 = extractelement <2 x float> %34, i32 1
// CHECK-NEXT: %41 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %39, float %40)
@@ -86,7 +86,7 @@ llvm.func @fadd_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
- // CHECK-NEXT: %6 = insertelement <2 x float> undef, float %5, i32 0
+ // CHECK-NEXT: %6 = insertelement <2 x float> poison, float %5, i32 0
// CHECK-NEXT: %7 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %8 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %9 = call float @llvm.nvvm.add.rm.f(float %7, float %8)
@@ -94,7 +94,7 @@ llvm.func @fadd_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %11 = extractelement <2 x float> %10, i32 0
// CHECK-NEXT: %12 = extractelement <2 x float> %10, i32 0
// CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> undef, float %13, i32 0
+ // CHECK-NEXT: %14 = insertelement <2 x float> poison, float %13, i32 0
// CHECK-NEXT: %15 = extractelement <2 x float> %10, i32 1
// CHECK-NEXT: %16 = extractelement <2 x float> %10, i32 1
// CHECK-NEXT: %17 = call float @llvm.nvvm.add.rm.sat.f(float %15, float %16)
@@ -102,7 +102,7 @@ llvm.func @fadd_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %19 = extractelement <2 x float> %18, i32 0
// CHECK-NEXT: %20 = extractelement <2 x float> %18, i32 0
// CHECK-NEXT: %21 = call float @llvm.nvvm.add.rm.ftz.f(float %19, float %20)
- // CHECK-NEXT: %22 = insertelement <2 x float> undef, float %21, i32 0
+ // CHECK-NEXT: %22 = insertelement <2 x float> poison, float %21, i32 0
// CHECK-NEXT: %23 = extractelement <2 x float> %18, i32 1
// CHECK-NEXT: %24 = extractelement <2 x float> %18, i32 1
// CHECK-NEXT: %25 = call float @llvm.nvvm.add.rm.ftz.f(float %23, float %24)
@@ -110,7 +110,7 @@ llvm.func @fadd_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %27 = extractelement <2 x float> %26, i32 0
// CHECK-NEXT: %28 = extractelement <2 x float> %26, i32 0
// CHECK-NEXT: %29 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %27, float %28)
- // CHECK-NEXT: %30 = insertelement <2 x float> undef, float %29, i32 0
+ // CHECK-NEXT: %30 = insertelement <2 x float> poison, float %29, i32 0
// CHECK-NEXT: %31 = extractelement <2 x float> %26, i32 1
// CHECK-NEXT: %32 = extractelement <2 x float> %26, i32 1
// CHECK-NEXT: %33 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %31, float %32)
@@ -129,7 +129,7 @@ llvm.func @fadd_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
- // CHECK-NEXT: %6 = insertelement <2 x float> undef, float %5, i32 0
+ // CHECK-NEXT: %6 = insertelement <2 x float> poison, float %5, i32 0
// CHECK-NEXT: %7 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %8 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %9 = call float @llvm.nvvm.add.rp.f(float %7, float %8)
@@ -137,7 +137,7 @@ llvm.func @fadd_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %11 = extractelement <2 x float> %10, i32 0
// CHECK-NEXT: %12 = extractelement <2 x float> %10, i32 0
// CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> undef, float %13, i32 0
+ // CHECK-NEXT: %14 = insertelement <2 x float> poison, float %13, i32 0
// CHECK-NEXT: %15 = extractelement <2 x float> %10, i32 1
// CHECK-NEXT: %16 = extractelement <2 x float> %10, i32 1
// CHECK-NEXT: %17 = call float @llvm.nvvm.add.rp.sat.f(float %15, float %16)
@@ -145,7 +145,7 @@ llvm.func @fadd_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %19 = extractelement <2 x float> %18, i32 0
// CHECK-NEXT: %20 = extractelement <2 x float> %18, i32 0
// CHECK-NEXT: %21 = call float @llvm.nvvm.add.rp.ftz.f(float %19, float %20)
- // CHECK-NEXT: %22 = insertelement <2 x float> undef, float %21, i32 0
+ // CHECK-NEXT: %22 = insertelement <2 x float> poison, float %21, i32 0
// CHECK-NEXT: %23 = extractelement <2 x float> %18, i32 1
// CHECK-NEXT: %24 = extractelement <2 x float> %18, i32 1
// CHECK-NEXT: %25 = call float @llvm.nvvm.add.rp.ftz.f(float %23, float %24)
@@ -153,7 +153,7 @@ llvm.func @fadd_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %27 = extractelement <2 x float> %26, i32 0
// CHECK-NEXT: %28 = extractelement <2 x float> %26, i32 0
// CHECK-NEXT: %29 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %27, float %28)
- // CHECK-NEXT: %30 = insertelement <2 x float> undef, float %29, i32 0
+ // CHECK-NEXT: %30 = insertelement <2 x float> poison, float %29, i32 0
// CHECK-NEXT: %31 = extractelement <2 x float> %26, i32 1
// CHECK-NEXT: %32 = extractelement <2 x float> %26, i32 1
// CHECK-NEXT: %33 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %31, float %32)
@@ -172,7 +172,7 @@ llvm.func @fadd_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
- // CHECK-NEXT: %6 = insertelement <2 x float> undef, float %5, i32 0
+ // CHECK-NEXT: %6 = insertelement <2 x float> poison, float %5, i32 0
// CHECK-NEXT: %7 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %8 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %9 = call float @llvm.nvvm.add.rz.f(float %7, float %8)
@@ -180,7 +180,7 @@ llvm.func @fadd_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %11 = extractelement <2 x float> %10, i32 0
// CHECK-NEXT: %12 = extractelement <2 x float> %10, i32 0
// CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> undef, float %13, i32 0
+ // CHECK-NEXT: %14 = insertelement <2 x float> poison, float %13, i32 0
// CHECK-NEXT: %15 = extractelement <2 x float> %10, i32 1
// CHECK-NEXT: %16 = extractelement <2 x float> %10, i32 1
// CHECK-NEXT: %17 = call float @llvm.nvvm.add.rz.sat.f(float %15, float %16)
@@ -188,7 +188,7 @@ llvm.func @fadd_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %19 = extractelement <2 x float> %18, i32 0
// CHECK-NEXT: %20 = extractelement <2 x float> %18, i32 0
// CHECK-NEXT: %21 = call float @llvm.nvvm.add.rz.ftz.f(float %19, float %20)
- // CHECK-NEXT: %22 = insertelement <2 x float> undef, float %21, i32 0
+ // CHECK-NEXT: %22 = insertelement <2 x float> poison, float %21, i32 0
// CHECK-NEXT: %23 = extractelement <2 x float> %18, i32 1
// CHECK-NEXT: %24 = extractelement <2 x float> %18, i32 1
// CHECK-NEXT: %25 = call float @llvm.nvvm.add.rz.ftz.f(float %23, float %24)
@@ -196,7 +196,7 @@ llvm.func @fadd_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %27 = extractelement <2 x float> %26, i32 0
// CHECK-NEXT: %28 = extractelement <2 x float> %26, i32 0
// CHECK-NEXT: %29 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %27, float %28)
- // CHECK-NEXT: %30 = insertelement <2 x float> undef, float %29, i32 0
+ // CHECK-NEXT: %30 = insertelement <2 x float> poison, float %29, i32 0
// CHECK-NEXT: %31 = extractelement <2 x float> %26, i32 1
// CHECK-NEXT: %32 = extractelement <2 x float> %26, i32 1
// CHECK-NEXT: %33 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %31, float %32)
@@ -216,7 +216,7 @@ llvm.func @fadd_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rn.d(double %3, double %4)
- // CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+ // CHECK-NEXT: %6 = insertelement <2 x double> poison, double %5, i32 0
// CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
// CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %9 = call double @llvm.nvvm.add.rn.d(double %7, double %8)
@@ -224,7 +224,7 @@ llvm.func @fadd_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %11 = extractelement <2 x double> %10, i32 0
// CHECK-NEXT: %12 = extractelement <2 x double> %10, i32 0
// CHECK-NEXT: %13 = call double @llvm.nvvm.add.rn.d(double %11, double %12)
- // CHECK-NEXT: %14 = insertelement <2 x double> undef, double %13, i32 0
+ // CHECK-NEXT: %14 = insertelement <2 x double> poison, double %13, i32 0
// CHECK-NEXT: %15 = extractelement <2 x double> %10, i32 1
// CHECK-NEXT: %16 = extractelement <2 x double> %10, i32 1
// CHECK-NEXT: %17 = call double @llvm.nvvm.add.rn.d(double %15, double %16)
@@ -241,7 +241,7 @@ llvm.func @fadd_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %3, double %4)
- // CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+ // CHECK-NEXT: %6 = insertelement <2 x double> poison, double %5, i32 0
// CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
// CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %9 = call double @llvm.nvvm.add.rm.d(double %7, double %8)
@@ -257,7 +257,7 @@ llvm.func @fadd_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rp.d(double %3, double %4)
- // CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+ // CHECK-NEXT: %6 = insertelement <2 x double> poison, double %5, i32 0
// CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
// CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %9 = call double @llvm.nvvm.add.rp.d(double %7, double %8)
@@ -273,7 +273,7 @@ llvm.func @fadd_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rz.d(double %3, double %4)
- // CHECK-NEXT: %6 = insertelement <2 x double> undef, double %5, i32 0
+ // CHECK-NEXT: %6 = insertelement <2 x double> poison, double %5, i32 0
// CHECK-NEXT: %7 = extractelement <2 x double> %0, i32 1
// CHECK-NEXT: %8 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %9 = call double @llvm.nvvm.add.rz.d(double %7, double %8)
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir
index c5f70aed32c8b..d534d4b09d182 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir
@@ -8,7 +8,7 @@ llvm.func @fadd_vector_f16_bf16_f32(%a : vector<2xf16>, %b : vector<2xbf16>) ->
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -28,7 +28,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rn(%a : vector<2xf16>, %b : vector<2xbf16>)
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -48,7 +48,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -68,7 +68,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -88,7 +88,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -108,7 +108,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rm(%a : vector<2xf16>, %b : vector<2xbf16>)
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -128,7 +128,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -148,7 +148,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -168,7 +168,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -188,7 +188,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rp(%a : vector<2xf16>, %b : vector<2xbf16>)
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -208,7 +208,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -228,7 +228,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -248,7 +248,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -268,7 +268,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rz(%a : vector<2xf16>, %b : vector<2xbf16>)
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -288,7 +288,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -308,7 +308,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -328,7 +328,7 @@ llvm.func @fadd_vector_f16_bf16_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = fpext bfloat %4 to float
// CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> undef, float %7, i32 0
+ // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
// CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
// CHECK-NEXT: %11 = fpext half %9 to float
@@ -348,7 +348,7 @@ llvm.func @fadd_vector_f16_f32_f32(%a : vector<2xf16>, %b : vector<2xf32>) -> ve
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -366,7 +366,7 @@ llvm.func @fadd_vector_f16_f32_f32_rn(%a : vector<2xf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -384,7 +384,7 @@ llvm.func @fadd_vector_f16_f32_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -402,7 +402,7 @@ llvm.func @fadd_vector_f16_f32_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -420,7 +420,7 @@ llvm.func @fadd_vector_f16_f32_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2x
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -438,7 +438,7 @@ llvm.func @fadd_vector_f16_f32_f32_rm(%a : vector<2xf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -456,7 +456,7 @@ llvm.func @fadd_vector_f16_f32_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -474,7 +474,7 @@ llvm.func @fadd_vector_f16_f32_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -492,7 +492,7 @@ llvm.func @fadd_vector_f16_f32_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2x
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -510,7 +510,7 @@ llvm.func @fadd_vector_f16_f32_f32_rp(%a : vector<2xf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -528,7 +528,7 @@ llvm.func @fadd_vector_f16_f32_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -546,7 +546,7 @@ llvm.func @fadd_vector_f16_f32_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -564,7 +564,7 @@ llvm.func @fadd_vector_f16_f32_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2x
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -582,7 +582,7 @@ llvm.func @fadd_vector_f16_f32_f32_rz(%a : vector<2xf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -600,7 +600,7 @@ llvm.func @fadd_vector_f16_f32_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -618,7 +618,7 @@ llvm.func @fadd_vector_f16_f32_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -636,7 +636,7 @@ llvm.func @fadd_vector_f16_f32_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2x
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to float
@@ -655,7 +655,7 @@ llvm.func @fadd_vector_f16_f64_f64(%a : vector<2xf16>, %b : vector<2xf64>) -> ve
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to double
@@ -673,7 +673,7 @@ llvm.func @fadd_vector_f16_f64_f64_rn(%a : vector<2xf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to double
@@ -691,7 +691,7 @@ llvm.func @fadd_vector_f16_f64_f64_rm(%a : vector<2xf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to double
@@ -709,7 +709,7 @@ llvm.func @fadd_vector_f16_f64_f64_rp(%a : vector<2xf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to double
@@ -727,7 +727,7 @@ llvm.func @fadd_vector_f16_f64_f64_rz(%a : vector<2xf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext half %8 to double
@@ -746,7 +746,7 @@ llvm.func @fadd_vector_bf16_f32_f32(%a : vector<2xbf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -764,7 +764,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rn(%a : vector<2xbf16>, %b : vector<2xf32>)
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -782,7 +782,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rn_sat(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -800,7 +800,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rn_ftz(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -818,7 +818,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rn_sat_ftz(%a : vector<2xbf16>, %b : vector<
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -836,7 +836,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rm(%a : vector<2xbf16>, %b : vector<2xf32>)
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -854,7 +854,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rm_sat(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -872,7 +872,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rm_ftz(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -890,7 +890,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rm_sat_ftz(%a : vector<2xbf16>, %b : vector<
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -908,7 +908,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rp(%a : vector<2xbf16>, %b : vector<2xf32>)
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -926,7 +926,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rp_sat(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -944,7 +944,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rp_ftz(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -962,7 +962,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rp_sat_ftz(%a : vector<2xbf16>, %b : vector<
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -980,7 +980,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rz(%a : vector<2xbf16>, %b : vector<2xf32>)
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -998,7 +998,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rz_sat(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -1016,7 +1016,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rz_ftz(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -1034,7 +1034,7 @@ llvm.func @fadd_vector_bf16_f32_f32_rz_sat_ftz(%a : vector<2xbf16>, %b : vector<
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
// CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> undef, float %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to float
@@ -1053,7 +1053,7 @@ llvm.func @fadd_vector_bf16_f64_f64(%a : vector<2xbf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to double
@@ -1071,7 +1071,7 @@ llvm.func @fadd_vector_bf16_f64_f64_rn(%a : vector<2xbf16>, %b : vector<2xf64>)
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to double
@@ -1089,7 +1089,7 @@ llvm.func @fadd_vector_bf16_f64_f64_rm(%a : vector<2xbf16>, %b : vector<2xf64>)
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to double
@@ -1107,7 +1107,7 @@ llvm.func @fadd_vector_bf16_f64_f64_rp(%a : vector<2xbf16>, %b : vector<2xf64>)
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to double
@@ -1125,7 +1125,7 @@ llvm.func @fadd_vector_bf16_f64_f64_rz(%a : vector<2xbf16>, %b : vector<2xf64>)
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext bfloat %8 to double
@@ -1144,7 +1144,7 @@ llvm.func @fadd_vector_f32_f64_f64(%a : vector<2xf32>, %b : vector<2xf64>) -> ve
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext float %8 to double
@@ -1162,7 +1162,7 @@ llvm.func @fadd_vector_f32_f64_f64_rn(%a : vector<2xf32>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext float %8 to double
@@ -1180,7 +1180,7 @@ llvm.func @fadd_vector_f32_f64_f64_rm(%a : vector<2xf32>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext float %8 to double
@@ -1198,7 +1198,7 @@ llvm.func @fadd_vector_f32_f64_f64_rp(%a : vector<2xf32>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext float %8 to double
@@ -1216,7 +1216,7 @@ llvm.func @fadd_vector_f32_f64_f64_rz(%a : vector<2xf32>, %b : vector<2xf64>) ->
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
// CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> undef, double %6, i32 0
+ // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
// CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
// CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
// CHECK-NEXT: %10 = fpext float %8 to double
>From c730315d5a27e7138084d75b10a36e98b399776b Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Wed, 4 Feb 2026 08:23:03 +0000
Subject: [PATCH 7/9] address comments
---
.../include/mlir/Dialect/LLVMIR/NVVMDialect.h | 17 +
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 96 +++--
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 33 +-
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 4 +-
.../Dialect/LLVMIR/nvvm-canonicalize.mlir | 8 +-
.../addf_all_same_types.mlir} | 56 +--
.../addf_different_return_type.mlir} | 216 +++++-----
.../addf_invalid.mlir} | 68 +--
.../addf_mixed_arg_types.mlir} | 396 +++++++++---------
.../addf_vector_all_same_types.mlir} | 96 ++---
.../addf_vector_mixed_arg_types.mlir} | 396 +++++++++---------
11 files changed, 702 insertions(+), 684 deletions(-)
rename mlir/test/Target/LLVMIR/nvvm/{fadd/fadd_all_same_types.mlir => addf/addf_all_same_types.mlir} (67%)
rename mlir/test/Target/LLVMIR/nvvm/{fadd/fadd_different_return_type.mlir => addf/addf_different_return_type.mlir} (60%)
rename mlir/test/Target/LLVMIR/nvvm/{fadd/fadd_invalid.mlir => addf/addf_invalid.mlir} (55%)
rename mlir/test/Target/LLVMIR/nvvm/{fadd/fadd_mixed_arg_types.mlir => addf/addf_mixed_arg_types.mlir} (57%)
rename mlir/test/Target/LLVMIR/nvvm/{fadd/fadd_vector_all_same_types.mlir => addf/addf_vector_all_same_types.mlir} (83%)
rename mlir/test/Target/LLVMIR/nvvm/{fadd/fadd_vector_mixed_arg_types.mlir => addf/addf_vector_mixed_arg_types.mlir} (82%)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
index 6bd582d66ed25..19f5bd05c8510 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
@@ -60,6 +60,23 @@ std::pair<mlir::Type, unsigned> inferMMAType(mlir::NVVM::MMATypes type,
mlir::NVVM::MMAFrag frag, int nRow,
int nCol,
mlir::MLIRContext *context);
+
+/// Returns true if the result type (resType) has bitwidth >= operand type
+/// (opType). For vector types, compares element type bitwidths.
+inline bool isResultTypeAtLeastAsWideAsOperand(Type opType, Type resType) {
+ auto getBaseType = [](Type type) -> Type {
+ return isa<VectorType>(type) ? cast<VectorType>(type).getElementType()
+ : type;
+ };
+ Type opBase = getBaseType(opType);
+ Type resBase = getBaseType(resType);
+ return resBase.getIntOrFloatBitWidth() >= opBase.getIntOrFloatBitWidth();
+}
+
+/// Returns true if both types are vectors or both are scalars (no mixing).
+inline bool areBothScalarsOrBothVectors(Type type1, Type type2) {
+ return isa<VectorType>(type1) == isa<VectorType>(type2);
+}
} // namespace NVVM
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 175516cdc4c65..c3e6c5ad11832 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6178,71 +6178,93 @@ def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
def SIMTFloatType : AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16, F32, F64]>]>;
-def NVVM_FAddOp : NVVM_Op<"fadd", [Pure, Commutative]> {
+class ResultAtLeastAsWideAs<string operandArg> :
+ TypesMatchWith<"result type must be at least as wide as " # operandArg # " operand",
+ operandArg, "res", "$_self",
+ "::mlir::NVVM::isResultTypeAtLeastAsWideAsOperand">;
+
+class AllScalarsOrAllVectors<string lhsArg, string rhsArg> :
+ TypesMatchWith<"cannot mix vector and scalar operands",
+ lhsArg, rhsArg, "$_self",
+ "::mlir::NVVM::areBothScalarsOrBothVectors">;
+
+class NVVM_FloatBinaryOp<string mnemonic, list<Trait> traits = []> :
+ NVVM_Op<mnemonic, traits # [Pure, ResultsAreFloatLike,
+ ResultAtLeastAsWideAs<"lhs">,
+ ResultAtLeastAsWideAs<"rhs">,
+ AllScalarsOrAllVectors<"lhs", "rhs">]>,
+ Arguments<(ins SIMTFloatType:$lhs, SIMTFloatType:$rhs,
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
+ DefaultValuedAttr<BoolAttr, "false">:$ftz)>,
+ Results<(outs SIMTFloatType:$res)> {
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
+}
+
+def NVVM_AddFOp : NVVM_FloatBinaryOp<"addf", [Commutative]> {
let summary = [{
Performs floating point addition operation with support for mixed precision
operands
}];
let description = [{
- The `nvvm.fadd` operation performs floating point addition of two operands.
-
- The rounding mode to be used is specified by the `rnd` attribute,
- saturation mode by the `sat` attribute, and FTZ by the `ftz` unit attribute.
-
- The result type must be at least as wide as the operands. When the type of
- the `res` is wider than the type of the operands, the operands are first
- converted to the result type, and then the addition is performed.
-
- For more information, see PTX ISA - [floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add),
- [half-precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add),
- [mixed precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add).
+ The `nvvm.addf` operation performs floating point addition of two operands.
+
+ The rounding mode is specified by the `rnd` attribute, saturation mode by
+ the `sat` attribute, and flush-to-zero by the `ftz` attribute.
+
+ **Type constraints:**
+ - The result type must be at least as wide as both operands.
+ - Operands and result must be all scalars or all vectors (no mixing).
+ - When operands are narrower than the result, they are extended to the
+ result type before addition. When this occurs, the modifiers that are
+ supported for the addition will depend upon the result type.
+
+ **Supported type combinations:**
+
+ | Result Type | Allowed Operand Types |
+ |--------------------|------------------------------------------------------------|
+ | `f16` | `f16` |
+ | `bf16` | `bf16` |
+ | `f32` | `f32`, `f16`, `bf16` |
+ | `f64` | `f64`, `f32`, `f16`, `bf16` |
+ | `vector<2xf16>` | `vector<2xf16>` |
+ | `vector<2xbf16>` | `vector<2xbf16>` |
+ | `vector<2xf32>` | `vector<2xf32>`, `vector<2xf16>`, `vector<2xbf16>` |
+ | `vector<2xf64>` | `vector<2xf64>`, `vector<2xf32>`, `vector<2xf16>`, `vector<2xbf16>` |
+
+ For more information, see PTX ISA:
+ - [floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add)
+ - [half-precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add)
+ - [mixed precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add)
}];
- let arguments = (ins
- SIMTFloatType:$lhs,
- SIMTFloatType:$rhs,
- DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
- DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
- DefaultValuedAttr<BoolAttr, "false">:$ftz
- );
- let results = (outs SIMTFloatType:$res);
- let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
let hasVerifier = 1;
let extraClassDeclaration = [{
- static void lowerFAddToLLVMIR(
+ static void lowerAddFToLLVMIR(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
}];
let llvmBuilder = [{
- NVVM::FAddOp::lowerFAddToLLVMIR(*op, moduleTranslation, builder);
+ NVVM::AddFOp::lowerAddFToLLVMIR(*op, moduleTranslation, builder);
}];
}
-def NVVM_FSubOp : NVVM_Op<"fsub", [Pure]> {
+def NVVM_SubFOp : NVVM_FloatBinaryOp<"subf"> {
let summary = [{
Performs floating point subtraction operation with support for mixed
precision operands
}];
let description = [{
- The `nvvm.fsub` operation performs floating point subtraction of two
+ The `nvvm.subf` operation performs floating point subtraction of two
operands.
- It supports the same type combinations and modifiers as `nvvm.fadd`.
- This is equivalent to `nvvm.fadd(lhs, -rhs)`.
+ It supports the same type combinations and modifiers as `nvvm.addf`.
+ This is equivalent to `nvvm.addf(lhs, -rhs)`.
For more information, see PTX ISA - [floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-sub),
[half-precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub),
[mixed precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-sub).
}];
- let arguments = (ins
- SIMTFloatType:$lhs,
- SIMTFloatType:$rhs,
- DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
- DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
- UnitAttr:$ftz
- );
- let results = (outs SIMTFloatType:$res);
- let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
let hasCanonicalizer = 1;
}
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 5b31e1bbc717a..b5084eb647987 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3072,7 +3072,7 @@ LogicalResult NVVM::TensormapReplaceOp::verify() {
return success();
}
-LogicalResult NVVM::FAddOp::verify() {
+LogicalResult NVVM::AddFOp::verify() {
mlir::Type resFType = getRes().getType();
mlir::Type lhsFType = getLhs().getType();
mlir::Type rhsFType = getRhs().getType();
@@ -3090,30 +3090,9 @@ LogicalResult NVVM::FAddOp::verify() {
auto lhsBaseFType = getBaseFType(lhsFType);
auto rhsBaseFType = getBaseFType(rhsFType);
- // Supported operand types based on result types are:
- // Result Type : Operand Type(s)
- // f64 : f64, f32, f16, bf16
- // f32 : f32, f16, bf16
- // f16 : f16
- // bf16 : bf16
- // vector<2xf64> : vector<2x{f64, f32, f16, bf16}
- // vector<2xf32> : vector<2x{f32, f16, bf16}
- // vector<2xf16> : vector<2xf16>
- // vector<2xbf16> : vector<2xbf16>
-
bool sameTypeOperation =
llvm::all_equal({lhsBaseFType, rhsBaseFType, resBaseFType});
- if (!llvm::all_equal({isa<VectorType>(resFType), isa<VectorType>(lhsFType),
- isa<VectorType>(rhsFType)}))
- return emitOpError("cannot mix vector and scalar types for floating point "
- "addition operation");
-
- if (resBaseFType.getIntOrFloatBitWidth() <
- std::max(lhsBaseFType.getIntOrFloatBitWidth(),
- rhsBaseFType.getIntOrFloatBitWidth()))
- return emitOpError("result type must be at least as wide as the operands");
-
if ((resBaseFType.isF16() || resBaseFType.isBF16()) && !sameTypeOperation) {
return emitOpError(
"only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type");
@@ -3237,24 +3216,24 @@ std::string NVVM::MBarrierTryWaitParityOp::getPtx() {
// Canonicalization patterns
//===----------------------------------------------------------------------===//
-struct ConvertFsubToFnegFadd : public OpRewritePattern<FSubOp> {
- using OpRewritePattern<FSubOp>::OpRewritePattern;
+struct ConvertFsubToFnegFadd : public OpRewritePattern<SubFOp> {
+ using OpRewritePattern<SubFOp>::OpRewritePattern;
- LogicalResult matchAndRewrite(FSubOp op,
+ LogicalResult matchAndRewrite(SubFOp op,
PatternRewriter &rewriter) const override {
Location loc = op.getLoc();
Value negRhs =
LLVM::FNegOp::create(rewriter, loc, op.getRhs().getType(), op.getRhs());
- rewriter.replaceOpWithNewOp<FAddOp>(op, op.getType(), op.getLhs(), negRhs,
+ rewriter.replaceOpWithNewOp<AddFOp>(op, op.getType(), op.getLhs(), negRhs,
op.getRnd(), op.getSat(), op.getFtz());
return success();
}
};
-void FSubOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
+void SubFOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
MLIRContext *context) {
patterns.add<ConvertFsubToFnegFadd>(context);
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 2623b0ea4a2f4..458ce77be615a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -446,9 +446,9 @@ getFenceProxySyncRestrictID(NVVM::MemOrderKind order) {
nvvm_fence_proxy_async_generic_release_sync_restrict_space_cta_scope_cluster;
}
-void NVVM::FAddOp::lowerFAddToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
+void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
llvm::IRBuilderBase &builder) {
- auto thisOp = cast<NVVM::FAddOp>(op);
+ auto thisOp = cast<NVVM::AddFOp>(op);
NVVM::FPRoundingMode rndMode = thisOp.getRnd();
NVVM::SaturationMode satMode = thisOp.getSat();
bool isFTZ = thisOp.getFtz();
diff --git a/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir b/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
index 76d0a1453edf9..fa2e2cc0ed564 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
@@ -1,9 +1,9 @@
// RUN: mlir-opt %s -split-input-file --canonicalize | FileCheck %s
-// CHECK-LABEL: @fsub_canonicalize
-llvm.func @fsub_canonicalize(%arg0 : f32, %arg1 : f32) -> f32 {
+// CHECK-LABEL: @subf_canonicalize
+llvm.func @subf_canonicalize(%arg0 : f32, %arg1 : f32) -> f32 {
// CHECK: %[[NEG_ARG1:.*]] = llvm.fneg %arg1 : f32
- // CHECK: %[[ADD_RESULT:.*]] = nvvm.fadd %arg0, %[[NEG_ARG1]] : f32, f32 -> f32
- %0 = nvvm.fsub %arg0, %arg1 : f32, f32 -> f32
+ // CHECK: %[[ADD_RESULT:.*]] = nvvm.addf %arg0, %[[NEG_ARG1]] : f32, f32 -> f32
+ %0 = nvvm.subf %arg0, %arg1 : f32, f32 -> f32
llvm.return %0 : f32
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_all_same_types.mlir
similarity index 67%
rename from mlir/test/Target/LLVMIR/nvvm/fadd/fadd_all_same_types.mlir
rename to mlir/test/Target/LLVMIR/nvvm/addf/addf_all_same_types.mlir
index 6233c8cb62bf9..39ec7d50e50b4 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_all_same_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_all_same_types.mlir
@@ -9,10 +9,10 @@ llvm.func @fadd_f16_f16(%a : f16, %b : f16) -> f16 {
// CHECK-NEXT: %6 = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %5, half %5)
// CHECK-NEXT: ret half %6
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : f16, f16 -> f16
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f16
- %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f16
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f16
+ %f1 = nvvm.addf %a, %b : f16, f16 -> f16
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f16
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f16
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f16
llvm.return %f4 : f16
}
@@ -23,8 +23,8 @@ llvm.func @fadd_bf16_bf16(%a : bf16, %b : bf16) -> bf16 {
// CHECK-NEXT: %4 = fadd bfloat %3, %3
// CHECK-NEXT: ret bfloat %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : bf16, bf16 -> bf16
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> bf16
+ %f1 = nvvm.addf %a, %b : bf16, bf16 -> bf16
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> bf16
llvm.return %f2 : bf16
}
@@ -50,23 +50,23 @@ llvm.func @fadd_f32_f32(%a : f32, %b : f32) -> f32 {
// CHECK-NEXT: %19 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %18, float %18)
// CHECK-NEXT: ret float %19
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : f32, f32 -> f32
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f32
- %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f32, f32 -> f32
- %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
- %f6 = nvvm.fadd %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f32
- %f7 = nvvm.fadd %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f8 = nvvm.fadd %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f32, f32 -> f32
- %f9 = nvvm.fadd %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
- %f10 = nvvm.fadd %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f32
- %f11 = nvvm.fadd %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f12 = nvvm.fadd %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f32, f32 -> f32
- %f13 = nvvm.fadd %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
- %f14 = nvvm.fadd %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f32
- %f15 = nvvm.fadd %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f16 = nvvm.fadd %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f32, f32 -> f32
- %f17 = nvvm.fadd %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
+ %f1 = nvvm.addf %a, %b : f32, f32 -> f32
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f32
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f32, f32 -> f32
+ %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
+ %f6 = nvvm.addf %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f32
+ %f7 = nvvm.addf %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f8 = nvvm.addf %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f32, f32 -> f32
+ %f9 = nvvm.addf %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
+ %f10 = nvvm.addf %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f32
+ %f11 = nvvm.addf %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f12 = nvvm.addf %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f32, f32 -> f32
+ %f13 = nvvm.addf %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
+ %f14 = nvvm.addf %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f32
+ %f15 = nvvm.addf %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f16 = nvvm.addf %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f32, f32 -> f32
+ %f17 = nvvm.addf %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
llvm.return %f17 : f32
}
@@ -80,10 +80,10 @@ llvm.func @fadd_f64_f64(%a : f64, %b : f64) -> f64 {
// CHECK-NEXT: %7 = call double @llvm.nvvm.add.rz.d(double %6, double %6)
// CHECK-NEXT: ret double %7
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : f64, f64 -> f64
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f64, f64 -> f64
- %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64, f64 -> f64
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64, f64 -> f64
- %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rz>} : f64, f64 -> f64
+ %f1 = nvvm.addf %a, %b : f64, f64 -> f64
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f64, f64 -> f64
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64, f64 -> f64
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64, f64 -> f64
+ %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rz>} : f64, f64 -> f64
llvm.return %f5 : f64
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_different_return_type.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_different_return_type.mlir
similarity index 60%
rename from mlir/test/Target/LLVMIR/nvvm/fadd/fadd_different_return_type.mlir
rename to mlir/test/Target/LLVMIR/nvvm/addf/addf_different_return_type.mlir
index daac2ccbeca12..46776c529b8ab 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_different_return_type.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_different_return_type.mlir
@@ -1,400 +1,400 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// f16 + f16 -> f32
-llvm.func @fadd_f16_f16_rn(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rn(half %0, half %1) {
+llvm.func @addf_f16_f16_rn(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rn(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rn_sat(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rn_sat(half %0, half %1) {
+llvm.func @addf_f16_f16_rn_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rn_sat(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rn_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rn_ftz(half %0, half %1) {
+llvm.func @addf_f16_f16_rn_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rn_ftz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rn_sat_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rn_sat_ftz(half %0, half %1) {
+llvm.func @addf_f16_f16_rn_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rn_sat_ftz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rm(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rm(half %0, half %1) {
+llvm.func @addf_f16_f16_rm(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rm(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rm_sat(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rm_sat(half %0, half %1) {
+llvm.func @addf_f16_f16_rm_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rm_sat(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rm_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rm_ftz(half %0, half %1) {
+llvm.func @addf_f16_f16_rm_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rm_ftz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rm_sat_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rm_sat_ftz(half %0, half %1) {
+llvm.func @addf_f16_f16_rm_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rm_sat_ftz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rp(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rp(half %0, half %1) {
+llvm.func @addf_f16_f16_rp(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rp(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rp_sat(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rp_sat(half %0, half %1) {
+llvm.func @addf_f16_f16_rp_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rp_sat(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rp_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rp_ftz(half %0, half %1) {
+llvm.func @addf_f16_f16_rp_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rp_ftz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rp_sat_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rp_sat_ftz(half %0, half %1) {
+llvm.func @addf_f16_f16_rp_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rp_sat_ftz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rz(half %0, half %1) {
+llvm.func @addf_f16_f16_rz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rz_sat(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rz_sat(half %0, half %1) {
+llvm.func @addf_f16_f16_rz_sat(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rz_sat(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rz_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rz_ftz(half %0, half %1) {
+llvm.func @addf_f16_f16_rz_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rz_ftz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f16_rz_sat_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f16_rz_sat_ftz(half %0, half %1) {
+llvm.func @addf_f16_f16_rz_sat_ftz(%a : f16, %b : f16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f16_rz_sat_ftz(half %0, half %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext half %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
llvm.return %f1 : f32
}
// bf16 + bf16 -> f32
-llvm.func @fadd_bf16_bf16_rn(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rn(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rn(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rn(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rn_sat(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rn_sat(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rn_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rn_sat(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rn_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rn_ftz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rn_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rn_ftz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rn_sat_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rn_sat_ftz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rn_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rn_sat_ftz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rm(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rm(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rm(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rm(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rm_sat(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rm_sat(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rm_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rm_sat(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rm_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rm_ftz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rm_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rm_ftz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rm_sat_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rm_sat_ftz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rm_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rm_sat_ftz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rp(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rp(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rp(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rp(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rp_sat(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rp_sat(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rp_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rp_sat(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rp_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rp_ftz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rp_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rp_ftz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rp_sat_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rp_sat_ftz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rp_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rp_sat_ftz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rz_sat(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rz_sat(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rz_sat(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rz_sat(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rz_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rz_ftz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rz_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rz_ftz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_bf16_rz_sat_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_bf16_rz_sat_ftz(bfloat %0, bfloat %1) {
+llvm.func @addf_bf16_bf16_rz_sat_ftz(%a : bf16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_bf16_rz_sat_ftz(bfloat %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
llvm.return %f1 : f32
}
// f32 + f32 -> f64
-llvm.func @fadd_f32_f32_rn(%a : f32, %b : f32) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f32_rn(float %0, float %1) {
+llvm.func @addf_f32_f32_rn(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f32_rn(float %0, float %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = fpext float %1 to double
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rn.d(double %3, double %4)
// CHECK-NEXT: ret double %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f32_f32_rm(%a : f32, %b : f32) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f32_rm(float %0, float %1) {
+llvm.func @addf_f32_f32_rm(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f32_rm(float %0, float %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = fpext float %1 to double
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %3, double %4)
// CHECK-NEXT: ret double %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f32_f32_rp(%a : f32, %b : f32) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f32_rp(float %0, float %1) {
+llvm.func @addf_f32_f32_rp(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f32_rp(float %0, float %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = fpext float %1 to double
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rp.d(double %3, double %4)
// CHECK-NEXT: ret double %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f32_f32_rz(%a : f32, %b : f32) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f32_rz(float %0, float %1) {
+llvm.func @addf_f32_f32_rz(%a : f32, %b : f32) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f32_rz(float %0, float %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = fpext float %1 to double
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rz.d(double %3, double %4)
// CHECK-NEXT: ret double %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f64
llvm.return %f1 : f64
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
similarity index 55%
rename from mlir/test/Target/LLVMIR/nvvm/fadd/fadd_invalid.mlir
rename to mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
index 6a287d06f14c5..c4b15ef2ae075 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
@@ -2,113 +2,113 @@
// -----
-llvm.func @fadd_invalid_sat_mode(%a : f16, %b : f16) -> f32 {
+llvm.func @addf_invalid_sat_mode(%a : f16, %b : f16) -> f32 {
// expected-error at +1 {{SATFINITE saturation mode is not supported for floating point addition operation}}
- %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<satfinite>} : f16, f16 -> f32
+ %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<satfinite>} : f16, f16 -> f32
llvm.return %f1 : f32
}
// -----
-llvm.func @fadd_invalid_vector_scalar_mix(%a : vector<2xf16>, %b : f16) -> f32 {
- // expected-error at +1 {{cannot mix vector and scalar types for floating point addition operation}}
- %f1 = nvvm.fadd %a, %b : vector<2xf16>, f16 -> f32
- llvm.return %f1 : f32
+llvm.func @addf_invalid_vector_scalar_mix(%a : vector<2xf16>, %b : f16) -> vector<2xf16> {
+ // expected-error at +1 {{cannot mix vector and scalar operands}}
+ %f1 = nvvm.addf %a, %b : vector<2xf16>, f16 -> vector<2xf16>
+ llvm.return %f1 : vector<2xf16>
}
// -----
-llvm.func @fadd_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
+llvm.func @addf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
// expected-error at +1 {{FTZ and saturation are not supported for additions involving f64 type}}
- %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : f64, f64 -> f64
+ %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : f64, f64 -> f64
llvm.return %f1 : f64
}
// -----
-llvm.func @fadd_invalid_result_width(%a : f64, %b : f64) -> f32 {
- // expected-error at +1 {{result type must be at least as wide as the operands}}
- %f1 = nvvm.fadd %a, %b : f64, f64 -> f32
+llvm.func @addf_invalid_result_width(%a : f64, %b : f64) -> f32 {
+ // expected-error at +1 {{result type must be at least as wide as lhs operand}}
+ %f1 = nvvm.addf %a, %b : f64, f64 -> f32
llvm.return %f1 : f32
}
// -----
-llvm.func @fadd_invalid_result_width_vector(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf32> {
- // expected-error at +1 {{result type must be at least as wide as the operands}}
- %f1 = nvvm.fadd %a, %b : vector<2xf32>, vector<2xf64> -> vector<2xf32>
+llvm.func @addf_invalid_result_width_vector(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf32> {
+ // expected-error at +1 {{result type must be at least as wide as rhs operand}}
+ %f1 = nvvm.addf %a, %b : vector<2xf32>, vector<2xf64> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
// -----
-llvm.func @fadd_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
+llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
// expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f16
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f16
llvm.return %f1 : f16
}
// -----
-llvm.func @fadd_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
+llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
// expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
llvm.return %f1 : vector<2xf16>
}
// -----
-llvm.func @fadd_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 {
+llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 {
// expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> bf16
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> bf16
llvm.return %f1 : bf16
}
// -----
-llvm.func @fadd_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
+llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
// expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
llvm.return %f1 : vector<2xbf16>
}
// -----
-llvm.func @fadd_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 {
+llvm.func @addf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 {
// expected-error at +1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions}}
- %f1 = nvvm.fadd %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> bf16
+ %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> bf16
llvm.return %f1 : bf16
}
// -----
-llvm.func @fadd_invalid_f16_result_type(%a : f16, %b : bf16) -> f16 {
+llvm.func @addf_invalid_f16_result_type(%a : f16, %b : bf16) -> f16 {
// expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
- %f1 = nvvm.fadd %a, %b : f16, bf16 -> f16
+ %f1 = nvvm.addf %a, %b : f16, bf16 -> f16
llvm.return %f1 : f16
}
// -----
-llvm.func @fadd_invalid_f16_result_type_vector(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf16> {
+llvm.func @addf_invalid_f16_result_type_vector(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf16> {
// expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
- %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xf16>
+ %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xf16>
llvm.return %f1 : vector<2xf16>
}
// -----
-llvm.func @fadd_invalid_bf16_result_type(%a : bf16, %b : f16) -> bf16 {
+llvm.func @addf_invalid_bf16_result_type(%a : bf16, %b : f16) -> bf16 {
// expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
- %f1 = nvvm.fadd %a, %b : bf16, f16 -> bf16
+ %f1 = nvvm.addf %a, %b : bf16, f16 -> bf16
llvm.return %f1 : bf16
}
// -----
-llvm.func @fadd_invalid_bf16_result_type_vector(%a : vector<2xbf16>, %b : vector<2xf16>) -> vector<2xbf16> {
+llvm.func @addf_invalid_bf16_result_type_vector(%a : vector<2xbf16>, %b : vector<2xf16>) -> vector<2xbf16> {
// expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
- %f1 = nvvm.fadd %a, %b : vector<2xbf16>, vector<2xf16> -> vector<2xbf16>
+ %f1 = nvvm.addf %a, %b : vector<2xbf16>, vector<2xf16> -> vector<2xbf16>
llvm.return %f1 : vector<2xbf16>
}
@@ -116,8 +116,8 @@ llvm.func @fadd_invalid_bf16_result_type_vector(%a : vector<2xbf16>, %b : vector
// FIXME: Remove this test once intrinsics for f16 addition (with FTZ only) are
// available.
-llvm.func @fadd_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 {
+llvm.func @addf_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 {
// expected-error at +1 {{FTZ with no saturation is not supported for f16 result type}}
- %f1 = nvvm.fadd %a, %b {ftz=true} : f16, f16 -> f16
+ %f1 = nvvm.addf %a, %b {ftz=true} : f16, f16 -> f16
llvm.return %f1 : f16
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_mixed_arg_types.mlir
similarity index 57%
rename from mlir/test/Target/LLVMIR/nvvm/fadd/fadd_mixed_arg_types.mlir
rename to mlir/test/Target/LLVMIR/nvvm/addf/addf_mixed_arg_types.mlir
index 7da148b4bc2b6..13675b9db5279 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_mixed_arg_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_mixed_arg_types.mlir
@@ -1,684 +1,684 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// f16 + bf16 -> f32
-llvm.func @fadd_f16_bf16(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rn(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rn(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rn(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rn(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rn_sat(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rn_sat(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rn_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rn_sat(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rn_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rn_ftz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rn_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rn_ftz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rn_sat_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rn_sat_ftz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rn_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rn_sat_ftz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rm(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rm(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rm(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rm(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rm_sat(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rm_sat(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rm_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rm_sat(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rm_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rm_ftz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rm_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rm_ftz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rm_sat_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rm_sat_ftz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rm_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rm_sat_ftz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rp(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rp(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rp(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rp(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rp_sat(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rp_sat(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rp_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rp_sat(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rp_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rp_ftz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rp_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rp_ftz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rp_sat_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rp_sat_ftz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rp_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rp_sat_ftz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rz_sat(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rz_sat(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rz_sat(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rz_sat(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rz_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rz_ftz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rz_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rz_ftz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_bf16_rz_sat_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_bf16_rz_sat_ftz(half %0, bfloat %1) {
+llvm.func @addf_f16_bf16_rz_sat_ftz(%a : f16, %b : bf16) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_bf16_rz_sat_ftz(half %0, bfloat %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = fpext bfloat %1 to float
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
// CHECK-NEXT: ret float %5
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
llvm.return %f1 : f32
}
// f16 + f32 -> f32
-llvm.func @fadd_f16_f32(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32(half %0, float %1) {
+llvm.func @addf_f16_f32(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rn(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rn(half %0, float %1) {
+llvm.func @addf_f16_f32_rn(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rn(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rn_sat(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rn_sat(half %0, float %1) {
+llvm.func @addf_f16_f32_rn_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rn_sat(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rn_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rn_ftz(half %0, float %1) {
+llvm.func @addf_f16_f32_rn_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rn_ftz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rn_sat_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rn_sat_ftz(half %0, float %1) {
+llvm.func @addf_f16_f32_rn_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rn_sat_ftz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rm(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rm(half %0, float %1) {
+llvm.func @addf_f16_f32_rm(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rm(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rm_sat(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rm_sat(half %0, float %1) {
+llvm.func @addf_f16_f32_rm_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rm_sat(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rm_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rm_ftz(half %0, float %1) {
+llvm.func @addf_f16_f32_rm_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rm_ftz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rm_sat_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rm_sat_ftz(half %0, float %1) {
+llvm.func @addf_f16_f32_rm_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rm_sat_ftz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rp(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rp(half %0, float %1) {
+llvm.func @addf_f16_f32_rp(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rp(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rp_sat(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rp_sat(half %0, float %1) {
+llvm.func @addf_f16_f32_rp_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rp_sat(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rp_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rp_ftz(half %0, float %1) {
+llvm.func @addf_f16_f32_rp_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rp_ftz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rp_sat_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rp_sat_ftz(half %0, float %1) {
+llvm.func @addf_f16_f32_rp_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rp_sat_ftz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rz(half %0, float %1) {
+llvm.func @addf_f16_f32_rz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rz_sat(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rz_sat(half %0, float %1) {
+llvm.func @addf_f16_f32_rz_sat(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rz_sat(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rz_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rz_ftz(half %0, float %1) {
+llvm.func @addf_f16_f32_rz_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rz_ftz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_f16_f32_rz_sat_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_f16_f32_rz_sat_ftz(half %0, float %1) {
+llvm.func @addf_f16_f32_rz_sat_ftz(%a : f16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_f16_f32_rz_sat_ftz(half %0, float %1) {
// CHECK-NEXT: %3 = fpext half %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
llvm.return %f1 : f32
}
// f16 + f64 -> f64
-llvm.func @fadd_f16_f64(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f16_f64(half %0, double %1) {
+llvm.func @addf_f16_f64(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f16_f64(half %0, double %1) {
// CHECK-NEXT: %3 = fpext half %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : f16, f64 -> f64
+ %f1 = nvvm.addf %a, %b : f16, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f16_f64_rn(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f16_f64_rn(half %0, double %1) {
+llvm.func @addf_f16_f64_rn(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f16_f64_rn(half %0, double %1) {
// CHECK-NEXT: %3 = fpext half %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f16_f64_rm(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f16_f64_rm(half %0, double %1) {
+llvm.func @addf_f16_f64_rm(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f16_f64_rm(half %0, double %1) {
// CHECK-NEXT: %3 = fpext half %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f16_f64_rp(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f16_f64_rp(half %0, double %1) {
+llvm.func @addf_f16_f64_rp(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f16_f64_rp(half %0, double %1) {
// CHECK-NEXT: %3 = fpext half %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f16_f64_rz(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f16_f64_rz(half %0, double %1) {
+llvm.func @addf_f16_f64_rz(%a : f16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f16_f64_rz(half %0, double %1) {
// CHECK-NEXT: %3 = fpext half %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f64 -> f64
llvm.return %f1 : f64
}
// bf16 + f32 -> f32
-llvm.func @fadd_bf16_f32(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rn(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rn(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rn(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rn(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rn_sat(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rn_sat(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rn_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rn_sat(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rn_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rn_ftz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rn_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rn_ftz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rn_sat_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rn_sat_ftz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rn_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rn_sat_ftz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rm(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rm(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rm(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rm(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rm_sat(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rm_sat(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rm_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rm_sat(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rm_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rm_ftz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rm_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rm_ftz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rm_sat_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rm_sat_ftz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rm_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rm_sat_ftz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rp(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rp(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rp(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rp(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rp_sat(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rp_sat(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rp_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rp_sat(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rp_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rp_ftz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rp_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rp_ftz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rp_sat_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rp_sat_ftz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rp_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rp_sat_ftz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rz_sat(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rz_sat(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rz_sat(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rz_sat(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rz_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rz_ftz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rz_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rz_ftz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
-llvm.func @fadd_bf16_f32_rz_sat_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @fadd_bf16_f32_rz_sat_ftz(bfloat %0, float %1) {
+llvm.func @addf_bf16_f32_rz_sat_ftz(%a : bf16, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @addf_bf16_f32_rz_sat_ftz(bfloat %0, float %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to float
// CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
// CHECK-NEXT: ret float %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
llvm.return %f1 : f32
}
// bf16 + f64 -> f64
-llvm.func @fadd_bf16_f64(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_bf16_f64(bfloat %0, double %1) {
+llvm.func @addf_bf16_f64(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_bf16_f64(bfloat %0, double %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : bf16, f64 -> f64
+ %f1 = nvvm.addf %a, %b : bf16, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_bf16_f64_rn(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_bf16_f64_rn(bfloat %0, double %1) {
+llvm.func @addf_bf16_f64_rn(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_bf16_f64_rn(bfloat %0, double %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_bf16_f64_rm(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_bf16_f64_rm(bfloat %0, double %1) {
+llvm.func @addf_bf16_f64_rm(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_bf16_f64_rm(bfloat %0, double %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_bf16_f64_rp(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_bf16_f64_rp(bfloat %0, double %1) {
+llvm.func @addf_bf16_f64_rp(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_bf16_f64_rp(bfloat %0, double %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_bf16_f64_rz(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_bf16_f64_rz(bfloat %0, double %1) {
+llvm.func @addf_bf16_f64_rz(%a : bf16, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_bf16_f64_rz(bfloat %0, double %1) {
// CHECK-NEXT: %3 = fpext bfloat %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f64 -> f64
llvm.return %f1 : f64
}
// f32 + f64 -> f64
-llvm.func @fadd_f32_f64(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f64(float %0, double %1) {
+llvm.func @addf_f32_f64(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f64(float %0, double %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : f32, f64 -> f64
+ %f1 = nvvm.addf %a, %b : f32, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f32_f64_rn(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f64_rn(float %0, double %1) {
+llvm.func @addf_f32_f64_rn(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f64_rn(float %0, double %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f32_f64_rm(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f64_rm(float %0, double %1) {
+llvm.func @addf_f32_f64_rm(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f64_rm(float %0, double %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f32_f64_rp(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f64_rp(float %0, double %1) {
+llvm.func @addf_f32_f64_rp(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f64_rp(float %0, double %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f64 -> f64
llvm.return %f1 : f64
}
-llvm.func @fadd_f32_f64_rz(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @fadd_f32_f64_rz(float %0, double %1) {
+llvm.func @addf_f32_f64_rz(%a : f32, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @addf_f32_f64_rz(float %0, double %1) {
// CHECK-NEXT: %3 = fpext float %0 to double
// CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
// CHECK-NEXT: ret double %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f64 -> f64
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f64 -> f64
llvm.return %f1 : f64
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_all_same_types.mlir
similarity index 83%
rename from mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir
rename to mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_all_same_types.mlir
index b55cf92405a8e..2f88a92e86b92 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_all_same_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_all_same_types.mlir
@@ -1,36 +1,36 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// vector<2xf16> + vector<2xf16> -> vector<2xf16>
-llvm.func @fadd_vector_f16_f16(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
- // CHECK-LABEL: define <2 x half> @fadd_vector_f16_f16(<2 x half> %0, <2 x half> %1) {
+llvm.func @addf_vector_f16_f16(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
+ // CHECK-LABEL: define <2 x half> @addf_vector_f16_f16(<2 x half> %0, <2 x half> %1) {
// CHECK-NEXT: %3 = fadd <2 x half> %0, %1
// CHECK-NEXT: %4 = fadd <2 x half> %3, %3
// CHECK-NEXT: %5 = call <2 x half> @llvm.nvvm.add.rn.sat.v2f16(<2 x half> %4, <2 x half> %4)
// CHECK-NEXT: %6 = call <2 x half> @llvm.nvvm.add.rn.ftz.sat.v2f16(<2 x half> %5, <2 x half> %5)
// CHECK-NEXT: ret <2 x half> %3
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xf16> -> vector<2xf16>
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
- %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
llvm.return %f1 : vector<2xf16>
}
// vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
-llvm.func @fadd_vector_bf16_bf16(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
- // CHECK-LABEL: define <2 x bfloat> @fadd_vector_bf16_bf16(<2 x bfloat> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_bf16_bf16(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
+ // CHECK-LABEL: define <2 x bfloat> @addf_vector_bf16_bf16(<2 x bfloat> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = fadd <2 x bfloat> %0, %1
// CHECK-NEXT: %4 = fadd <2 x bfloat> %3, %3
// CHECK-NEXT: ret <2 x bfloat> %4
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ %f1 = nvvm.addf %a, %b : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
llvm.return %f2 : vector<2xbf16>
}
// vector<2xf32> + vector<2xf32> -> vector<2xf32>
-llvm.func @fadd_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f32_f32_rn(<2 x float> %0, <2 x float> %1) {
+llvm.func @addf_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f32_f32_rn(<2 x float> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
@@ -73,16 +73,16 @@ llvm.func @fadd_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %42 = insertelement <2 x float> %38, float %41, i32 1
// CHECK-NEXT: ret <2 x float> %34
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
llvm.return %f4 : vector<2xf32>
}
-llvm.func @fadd_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f32_f32_rm(<2 x float> %0, <2 x float> %1) {
+llvm.func @addf_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f32_f32_rm(<2 x float> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
@@ -117,15 +117,15 @@ llvm.func @fadd_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
// CHECK-NEXT: ret <2 x float> %34
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
llvm.return %f4 : vector<2xf32>
}
-llvm.func @fadd_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f32_f32_rp(<2 x float> %0, <2 x float> %1) {
+llvm.func @addf_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f32_f32_rp(<2 x float> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
@@ -160,15 +160,15 @@ llvm.func @fadd_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
// CHECK-NEXT: ret <2 x float> %34
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
llvm.return %f4 : vector<2xf32>
}
-llvm.func @fadd_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f32_f32_rz(<2 x float> %0, <2 x float> %1) {
+llvm.func @addf_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f32_f32_rz(<2 x float> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
@@ -203,16 +203,16 @@ llvm.func @fadd_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
// CHECK-NEXT: ret <2 x float> %34
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
llvm.return %f4 : vector<2xf32>
}
// vector<2xf64> + vector<2xf64> -> vector<2xf64>
-llvm.func @fadd_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f64_f64_rn(<2 x double> %0, <2 x double> %1) {
+llvm.func @addf_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f64_f64_rn(<2 x double> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rn.d(double %3, double %4)
@@ -231,13 +231,13 @@ llvm.func @fadd_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %18 = insertelement <2 x double> %14, double %17, i32 1
// CHECK-NEXT: ret <2 x double> %18
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xf64>, vector<2xf64> -> vector<2xf64>
- %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
llvm.return %f2 : vector<2xf64>
}
-llvm.func @fadd_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f64_f64_rm(<2 x double> %0, <2 x double> %1) {
+llvm.func @addf_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f64_f64_rm(<2 x double> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %3, double %4)
@@ -248,12 +248,12 @@ llvm.func @fadd_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
// CHECK-NEXT: ret <2 x double> %10
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f64_f64_rp(<2 x double> %0, <2 x double> %1) {
+llvm.func @addf_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f64_f64_rp(<2 x double> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rp.d(double %3, double %4)
@@ -264,12 +264,12 @@ llvm.func @fadd_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
// CHECK-NEXT: ret <2 x double> %10
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f64_f64_rz(<2 x double> %0, <2 x double> %1) {
+llvm.func @addf_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f64_f64_rz(<2 x double> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x double> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = call double @llvm.nvvm.add.rz.d(double %3, double %4)
@@ -280,6 +280,6 @@ llvm.func @fadd_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
// CHECK-NEXT: ret <2 x double> %10
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_mixed_arg_types.mlir
similarity index 82%
rename from mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir
rename to mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_mixed_arg_types.mlir
index d534d4b09d182..f7230937faebd 100644
--- a/mlir/test/Target/LLVMIR/nvvm/fadd/fadd_vector_mixed_arg_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_mixed_arg_types.mlir
@@ -1,8 +1,8 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// vector<2xf16> + vector<2xbf16> -> vector<2xf32>
-llvm.func @fadd_vector_f16_bf16_f32(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -17,12 +17,12 @@ llvm.func @fadd_vector_f16_bf16_f32(%a : vector<2xf16>, %b : vector<2xbf16>) ->
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rn(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rn(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rn(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rn(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -37,12 +37,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rn(%a : vector<2xf16>, %b : vector<2xbf16>)
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rn_sat(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rn_sat(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -57,12 +57,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rn_ftz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rn_ftz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -77,12 +77,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rn_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rn_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -97,12 +97,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rm(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rm(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rm(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rm(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -117,12 +117,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rm(%a : vector<2xf16>, %b : vector<2xbf16>)
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rm_sat(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rm_sat(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -137,12 +137,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rm_ftz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rm_ftz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -157,12 +157,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rm_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rm_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -177,12 +177,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rp(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rp(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rp(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rp(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -197,12 +197,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rp(%a : vector<2xf16>, %b : vector<2xbf16>)
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rp_sat(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rp_sat(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -217,12 +217,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rp_ftz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rp_ftz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -237,12 +237,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rp_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rp_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -257,12 +257,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -277,12 +277,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rz(%a : vector<2xf16>, %b : vector<2xbf16>)
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rz_sat(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rz_sat(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -297,12 +297,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rz_ftz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rz_ftz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -317,12 +317,12 @@ llvm.func @fadd_vector_f16_bf16_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xbf1
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_bf16_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_bf16_f32_rz_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
+llvm.func @addf_vector_f16_bf16_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rz_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -337,13 +337,13 @@ llvm.func @fadd_vector_f16_bf16_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2
// CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
// CHECK-NEXT: ret <2 x float> %14
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
// vector<2xf16> + vector<2xf32> -> vector<2xf32>
-llvm.func @fadd_vector_f16_f32_f32(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -356,12 +356,12 @@ llvm.func @fadd_vector_f16_f32_f32(%a : vector<2xf16>, %b : vector<2xf32>) -> ve
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rn(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rn(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rn(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rn(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -374,12 +374,12 @@ llvm.func @fadd_vector_f16_f32_f32_rn(%a : vector<2xf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rn_sat(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rn_sat(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -392,12 +392,12 @@ llvm.func @fadd_vector_f16_f32_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rn_ftz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rn_ftz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -410,12 +410,12 @@ llvm.func @fadd_vector_f16_f32_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rn_sat_ftz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rn_sat_ftz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -428,12 +428,12 @@ llvm.func @fadd_vector_f16_f32_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2x
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rm(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rm(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rm(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rm(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -446,12 +446,12 @@ llvm.func @fadd_vector_f16_f32_f32_rm(%a : vector<2xf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rm_sat(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rm_sat(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -464,12 +464,12 @@ llvm.func @fadd_vector_f16_f32_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rm_ftz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rm_ftz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -482,12 +482,12 @@ llvm.func @fadd_vector_f16_f32_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rm_sat_ftz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rm_sat_ftz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -500,12 +500,12 @@ llvm.func @fadd_vector_f16_f32_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2x
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rp(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rp(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rp(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rp(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -518,12 +518,12 @@ llvm.func @fadd_vector_f16_f32_f32_rp(%a : vector<2xf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rp_sat(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rp_sat(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -536,12 +536,12 @@ llvm.func @fadd_vector_f16_f32_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rp_ftz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rp_ftz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -554,12 +554,12 @@ llvm.func @fadd_vector_f16_f32_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rp_sat_ftz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rp_sat_ftz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -572,12 +572,12 @@ llvm.func @fadd_vector_f16_f32_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2x
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -590,12 +590,12 @@ llvm.func @fadd_vector_f16_f32_f32_rz(%a : vector<2xf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rz_sat(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rz_sat(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -608,12 +608,12 @@ llvm.func @fadd_vector_f16_f32_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rz_ftz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rz_ftz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -626,12 +626,12 @@ llvm.func @fadd_vector_f16_f32_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xf32>
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_f16_f32_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_f16_f32_f32_rz_sat_ftz(<2 x half> %0, <2 x float> %1) {
+llvm.func @addf_vector_f16_f32_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rz_sat_ftz(<2 x half> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to float
@@ -644,13 +644,13 @@ llvm.func @fadd_vector_f16_f32_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2x
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
// vector<2xf16> + vector<2xf64> -> vector<2xf64>
-llvm.func @fadd_vector_f16_f64_f64(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64(<2 x half> %0, <2 x double> %1) {
+llvm.func @addf_vector_f16_f64_f64(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64(<2 x half> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
@@ -663,12 +663,12 @@ llvm.func @fadd_vector_f16_f64_f64(%a : vector<2xf16>, %b : vector<2xf64>) -> ve
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f16_f64_f64_rn(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64_rn(<2 x half> %0, <2 x double> %1) {
+llvm.func @addf_vector_f16_f64_f64_rn(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64_rn(<2 x half> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
@@ -681,12 +681,12 @@ llvm.func @fadd_vector_f16_f64_f64_rn(%a : vector<2xf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f16_f64_f64_rm(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64_rm(<2 x half> %0, <2 x double> %1) {
+llvm.func @addf_vector_f16_f64_f64_rm(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64_rm(<2 x half> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
@@ -699,12 +699,12 @@ llvm.func @fadd_vector_f16_f64_f64_rm(%a : vector<2xf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f16_f64_f64_rp(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64_rp(<2 x half> %0, <2 x double> %1) {
+llvm.func @addf_vector_f16_f64_f64_rp(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64_rp(<2 x half> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
@@ -717,12 +717,12 @@ llvm.func @fadd_vector_f16_f64_f64_rp(%a : vector<2xf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f16_f64_f64_rz(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f16_f64_f64_rz(<2 x half> %0, <2 x double> %1) {
+llvm.func @addf_vector_f16_f64_f64_rz(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64_rz(<2 x half> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext half %3 to double
@@ -735,13 +735,13 @@ llvm.func @fadd_vector_f16_f64_f64_rz(%a : vector<2xf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
// vector<2xbf16> + vector<2xf32> -> vector<2xf32>
-llvm.func @fadd_vector_bf16_f32_f32(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -754,12 +754,12 @@ llvm.func @fadd_vector_bf16_f32_f32(%a : vector<2xbf16>, %b : vector<2xf32>) ->
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rn(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rn(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rn(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rn(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -772,12 +772,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rn(%a : vector<2xbf16>, %b : vector<2xf32>)
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rn_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rn_sat(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rn_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rn_sat(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -790,12 +790,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rn_sat(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rn_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rn_ftz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rn_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rn_ftz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -808,12 +808,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rn_ftz(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rn_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rn_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rn_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rn_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -826,12 +826,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rn_sat_ftz(%a : vector<2xbf16>, %b : vector<
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rm(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rm(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rm(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rm(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -844,12 +844,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rm(%a : vector<2xbf16>, %b : vector<2xf32>)
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rm_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rm_sat(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rm_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rm_sat(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -862,12 +862,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rm_sat(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rm_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rm_ftz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rm_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rm_ftz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -880,12 +880,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rm_ftz(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rm_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rm_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rm_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rm_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -898,12 +898,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rm_sat_ftz(%a : vector<2xbf16>, %b : vector<
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rp(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rp(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rp(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rp(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -916,12 +916,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rp(%a : vector<2xbf16>, %b : vector<2xf32>)
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rp_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rp_sat(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rp_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rp_sat(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -934,12 +934,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rp_sat(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rp_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rp_ftz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rp_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rp_ftz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -952,12 +952,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rp_ftz(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rp_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rp_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rp_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rp_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -970,12 +970,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rp_sat_ftz(%a : vector<2xbf16>, %b : vector<
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -988,12 +988,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rz(%a : vector<2xbf16>, %b : vector<2xf32>)
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rz_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rz_sat(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rz_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rz_sat(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -1006,12 +1006,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rz_sat(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rz_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rz_ftz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rz_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rz_ftz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -1024,12 +1024,12 @@ llvm.func @fadd_vector_bf16_f32_f32_rz_ftz(%a : vector<2xbf16>, %b : vector<2xf3
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
-llvm.func @fadd_vector_bf16_f32_f32_rz_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @fadd_vector_bf16_f32_f32_rz_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
+llvm.func @addf_vector_bf16_f32_f32_rz_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
+ // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rz_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to float
@@ -1042,13 +1042,13 @@ llvm.func @fadd_vector_bf16_f32_f32_rz_sat_ftz(%a : vector<2xbf16>, %b : vector<
// CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
// CHECK-NEXT: ret <2 x float> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
llvm.return %f1 : vector<2xf32>
}
// vector<2xbf16> + vector<2xf64> -> vector<2xf64>
-llvm.func @fadd_vector_bf16_f64_f64(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64(<2 x bfloat> %0, <2 x double> %1) {
+llvm.func @addf_vector_bf16_f64_f64(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64(<2 x bfloat> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
@@ -1061,12 +1061,12 @@ llvm.func @fadd_vector_bf16_f64_f64(%a : vector<2xbf16>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_bf16_f64_f64_rn(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64_rn(<2 x bfloat> %0, <2 x double> %1) {
+llvm.func @addf_vector_bf16_f64_f64_rn(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64_rn(<2 x bfloat> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
@@ -1079,12 +1079,12 @@ llvm.func @fadd_vector_bf16_f64_f64_rn(%a : vector<2xbf16>, %b : vector<2xf64>)
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_bf16_f64_f64_rm(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64_rm(<2 x bfloat> %0, <2 x double> %1) {
+llvm.func @addf_vector_bf16_f64_f64_rm(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64_rm(<2 x bfloat> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
@@ -1097,12 +1097,12 @@ llvm.func @fadd_vector_bf16_f64_f64_rm(%a : vector<2xbf16>, %b : vector<2xf64>)
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_bf16_f64_f64_rp(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64_rp(<2 x bfloat> %0, <2 x double> %1) {
+llvm.func @addf_vector_bf16_f64_f64_rp(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64_rp(<2 x bfloat> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
@@ -1115,12 +1115,12 @@ llvm.func @fadd_vector_bf16_f64_f64_rp(%a : vector<2xbf16>, %b : vector<2xf64>)
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_bf16_f64_f64_rz(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_bf16_f64_f64_rz(<2 x bfloat> %0, <2 x double> %1) {
+llvm.func @addf_vector_bf16_f64_f64_rz(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64_rz(<2 x bfloat> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext bfloat %3 to double
@@ -1133,13 +1133,13 @@ llvm.func @fadd_vector_bf16_f64_f64_rz(%a : vector<2xbf16>, %b : vector<2xf64>)
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
// vector<2xf32> + vector<2xf64> -> vector<2xf64>
-llvm.func @fadd_vector_f32_f64_f64(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64(<2 x float> %0, <2 x double> %1) {
+llvm.func @addf_vector_f32_f64_f64(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64(<2 x float> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
@@ -1152,12 +1152,12 @@ llvm.func @fadd_vector_f32_f64_f64(%a : vector<2xf32>, %b : vector<2xf64>) -> ve
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b : vector<2xf32>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f32_f64_f64_rn(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64_rn(<2 x float> %0, <2 x double> %1) {
+llvm.func @addf_vector_f32_f64_f64_rn(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64_rn(<2 x float> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
@@ -1170,12 +1170,12 @@ llvm.func @fadd_vector_f32_f64_f64_rn(%a : vector<2xf32>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f32_f64_f64_rm(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64_rm(<2 x float> %0, <2 x double> %1) {
+llvm.func @addf_vector_f32_f64_f64_rm(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64_rm(<2 x float> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
@@ -1188,12 +1188,12 @@ llvm.func @fadd_vector_f32_f64_f64_rm(%a : vector<2xf32>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f32_f64_f64_rp(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64_rp(<2 x float> %0, <2 x double> %1) {
+llvm.func @addf_vector_f32_f64_f64_rp(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64_rp(<2 x float> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
@@ -1206,12 +1206,12 @@ llvm.func @fadd_vector_f32_f64_f64_rp(%a : vector<2xf32>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
-llvm.func @fadd_vector_f32_f64_f64_rz(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @fadd_vector_f32_f64_f64_rz(<2 x float> %0, <2 x double> %1) {
+llvm.func @addf_vector_f32_f64_f64_rz(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
+ // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64_rz(<2 x float> %0, <2 x double> %1) {
// CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
// CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
// CHECK-NEXT: %5 = fpext float %3 to double
@@ -1224,6 +1224,6 @@ llvm.func @fadd_vector_f32_f64_f64_rz(%a : vector<2xf32>, %b : vector<2xf64>) ->
// CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
// CHECK-NEXT: ret <2 x double> %12
// CHECK-NEXT: }
- %f1 = nvvm.fadd %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
>From 07c1d404a9526c7fe15d5ed958c8c335346241cd Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Wed, 4 Feb 2026 10:13:32 +0000
Subject: [PATCH 8/9] update docs
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index c3e6c5ad11832..4dfe8bf1a9380 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6216,8 +6216,8 @@ def NVVM_AddFOp : NVVM_FloatBinaryOp<"addf", [Commutative]> {
- The result type must be at least as wide as both operands.
- Operands and result must be all scalars or all vectors (no mixing).
- When operands are narrower than the result, they are extended to the
- result type before addition. When this occurs, the modifiers that are
- supported for the addition will depend upon the result type.
+ result type before addition. When this occurs, the modifiers
+ supported for the addition are determined by the result type.
**Supported type combinations:**
>From 47a71e14fbffc3682e11bfbf682a655839cc58a0 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Mon, 9 Feb 2026 08:23:18 +0000
Subject: [PATCH 9/9] remove mixed precision support
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 51 +-
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 34 +-
.../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 62 +-
.../LLVMIR/nvvm/addf/addf_all_same_types.mlir | 56 +-
.../nvvm/addf/addf_different_return_type.mlir | 400 ------
.../Target/LLVMIR/nvvm/addf/addf_invalid.mlir | 76 +-
.../nvvm/addf/addf_mixed_arg_types.mlir | 684 ---------
.../nvvm/addf/addf_vector_all_same_types.mlir | 56 +-
.../addf/addf_vector_mixed_arg_types.mlir | 1229 -----------------
9 files changed, 102 insertions(+), 2546 deletions(-)
delete mode 100644 mlir/test/Target/LLVMIR/nvvm/addf/addf_different_return_type.mlir
delete mode 100644 mlir/test/Target/LLVMIR/nvvm/addf/addf_mixed_arg_types.mlir
delete mode 100644 mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_mixed_arg_types.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 4dfe8bf1a9380..e74c0fb8b026f 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -18,6 +18,7 @@ include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
include "mlir/Dialect/LLVMIR/NVVMRequiresSMTraits.td"
include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td"
include "mlir/Interfaces/InferIntRangeInterface.td"
@@ -6178,64 +6179,30 @@ def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
def SIMTFloatType : AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16, F32, F64]>]>;
-class ResultAtLeastAsWideAs<string operandArg> :
- TypesMatchWith<"result type must be at least as wide as " # operandArg # " operand",
- operandArg, "res", "$_self",
- "::mlir::NVVM::isResultTypeAtLeastAsWideAsOperand">;
-
-class AllScalarsOrAllVectors<string lhsArg, string rhsArg> :
- TypesMatchWith<"cannot mix vector and scalar operands",
- lhsArg, rhsArg, "$_self",
- "::mlir::NVVM::areBothScalarsOrBothVectors">;
-
class NVVM_FloatBinaryOp<string mnemonic, list<Trait> traits = []> :
- NVVM_Op<mnemonic, traits # [Pure, ResultsAreFloatLike,
- ResultAtLeastAsWideAs<"lhs">,
- ResultAtLeastAsWideAs<"rhs">,
- AllScalarsOrAllVectors<"lhs", "rhs">]>,
+ NVVM_Op<mnemonic, traits # [Pure, SameOperandsAndResultType]>,
Arguments<(ins SIMTFloatType:$lhs, SIMTFloatType:$rhs,
DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
DefaultValuedAttr<BoolAttr, "false">:$ftz)>,
Results<(outs SIMTFloatType:$res)> {
- let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($res)";
}
def NVVM_AddFOp : NVVM_FloatBinaryOp<"addf", [Commutative]> {
let summary = [{
- Performs floating point addition operation with support for mixed precision
- operands
+ Performs floating point addition operation
}];
let description = [{
- The `nvvm.addf` operation performs floating point addition of two operands.
+ The `nvvm.addf` operation performs floating point addition of two floating
+ point operands of the same type.
The rounding mode is specified by the `rnd` attribute, saturation mode by
the `sat` attribute, and flush-to-zero by the `ftz` attribute.
- **Type constraints:**
- - The result type must be at least as wide as both operands.
- - Operands and result must be all scalars or all vectors (no mixing).
- - When operands are narrower than the result, they are extended to the
- result type before addition. When this occurs, the modifiers
- supported for the addition are determined by the result type.
-
- **Supported type combinations:**
-
- | Result Type | Allowed Operand Types |
- |--------------------|------------------------------------------------------------|
- | `f16` | `f16` |
- | `bf16` | `bf16` |
- | `f32` | `f32`, `f16`, `bf16` |
- | `f64` | `f64`, `f32`, `f16`, `bf16` |
- | `vector<2xf16>` | `vector<2xf16>` |
- | `vector<2xbf16>` | `vector<2xbf16>` |
- | `vector<2xf32>` | `vector<2xf32>`, `vector<2xf16>`, `vector<2xbf16>` |
- | `vector<2xf64>` | `vector<2xf64>`, `vector<2xf32>`, `vector<2xf16>`, `vector<2xbf16>` |
-
For more information, see PTX ISA:
- [floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add)
- [half-precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add)
- - [mixed precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add)
}];
let hasVerifier = 1;
@@ -6260,9 +6227,9 @@ def NVVM_SubFOp : NVVM_FloatBinaryOp<"subf"> {
It supports the same type combinations and modifiers as `nvvm.addf`.
This is equivalent to `nvvm.addf(lhs, -rhs)`.
- For more information, see PTX ISA - [floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-sub),
- [half-precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub),
- [mixed precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-sub).
+ For more information, see PTX ISA:
+ - [floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-sub)
+ - [half-precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub)
}];
let hasCanonicalizer = 1;
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index b5084eb647987..6abb394f738c8 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3073,49 +3073,31 @@ LogicalResult NVVM::TensormapReplaceOp::verify() {
}
LogicalResult NVVM::AddFOp::verify() {
- mlir::Type resFType = getRes().getType();
- mlir::Type lhsFType = getLhs().getType();
- mlir::Type rhsFType = getRhs().getType();
mlir::NVVM::FPRoundingMode rndMode = getRnd();
mlir::NVVM::SaturationMode satMode = getSat();
bool isFTZ = getFtz();
- auto getBaseFType = [](Type type) -> Type {
- if (isa<VectorType>(type))
- return cast<VectorType>(type).getElementType();
- return type;
- };
-
- auto resBaseFType = getBaseFType(resFType);
- auto lhsBaseFType = getBaseFType(lhsFType);
- auto rhsBaseFType = getBaseFType(rhsFType);
-
- bool sameTypeOperation =
- llvm::all_equal({lhsBaseFType, rhsBaseFType, resBaseFType});
-
- if ((resBaseFType.isF16() || resBaseFType.isBF16()) && !sameTypeOperation) {
- return emitOpError(
- "only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type");
- }
-
- // Modifier constraints
+ mlir::Type opType = getRes().getType();
+ mlir::Type opBaseType = isa<VectorType>(opType)
+ ? cast<VectorType>(opType).getElementType()
+ : opType;
if (satMode == NVVM::SaturationMode::SATFINITE)
return emitOpError("SATFINITE saturation mode is not supported for "
"floating point addition operation");
- if (resBaseFType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ))
+ if (opBaseType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ))
return emitOpError("FTZ and saturation are not supported for additions "
"involving f64 type");
- if (resBaseFType.isF16()) {
+ if (opBaseType.isF16()) {
if (!(rndMode == NVVM::FPRoundingMode::RN ||
rndMode == NVVM::FPRoundingMode::NONE))
return emitOpError("only RN rounding mode is supported for f16 and "
"vector<2xf16> additions");
}
- if (resBaseFType.isBF16()) {
+ if (opBaseType.isBF16()) {
if (rndMode != NVVM::FPRoundingMode::RN &&
rndMode != NVVM::FPRoundingMode::NONE)
return emitOpError("only RN rounding mode is supported for bf16 and "
@@ -3129,7 +3111,7 @@ LogicalResult NVVM::AddFOp::verify() {
// PTX instructions since the corresponding LLVM intrinsic is missing. This
// should be removed once the intrinsics for f16 addition (with FTZ only) are
// available.
- if (resBaseFType.isF16() && isFTZ && satMode == NVVM::SaturationMode::NONE)
+ if (opBaseType.isF16() && isFTZ && satMode == NVVM::SaturationMode::NONE)
return emitOpError(
"FTZ with no saturation is not supported for f16 result type");
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 458ce77be615a..0ff5c0eb19961 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -457,9 +457,7 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
llvm::Value *argLHS = mt.lookupValue(thisOp.getLhs());
llvm::Value *argRHS = mt.lookupValue(thisOp.getRhs());
- mlir::Type lhsType = thisOp.getLhs().getType();
- mlir::Type rhsType = thisOp.getRhs().getType();
- mlir::Type resType = thisOp.getRes().getType();
+ mlir::Type opType = thisOp.getLhs().getType();
// FIXME: Add intrinsics for add.rn.ftz.f16x2 and add.rn.ftz.f16 here when
// they are available.
@@ -509,9 +507,9 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
// f16 + f16 -> f16 / vector<2xf16> + vector<2xf16> -> vector<2xf16>
// FIXME: Allow lowering to add.rn.ftz.f16x2 and add.rn.ftz.f16 here when the
// intrinsics are available.
- bool isVectorF16Add = isa<VectorType>(resType) &&
- cast<VectorType>(resType).getElementType().isF16();
- if (resType.isF16() || isVectorF16Add) {
+ bool isVectorF16Add = isa<VectorType>(opType) &&
+ cast<VectorType>(opType).getElementType().isF16();
+ if (opType.isF16() || isVectorF16Add) {
if (isSat) {
unsigned index = (isVectorF16Add << 1) | isFTZ;
mt.mapValue(thisOp.getRes(), addIntrinsic(f16IDs[index]));
@@ -523,50 +521,33 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
}
// bf16 + bf16 -> bf16 / vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
- bool isVectorBF16Add = isa<VectorType>(resType) &&
- cast<VectorType>(resType).getElementType().isBF16();
- if (resType.isBF16() || isVectorBF16Add) {
+ bool isVectorBF16Add = isa<VectorType>(opType) &&
+ cast<VectorType>(opType).getElementType().isBF16();
+ if (opType.isBF16() || isVectorBF16Add) {
mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
return;
}
- // Helper functions for casting and adding vectors
- auto getCastedFloat = [&](mlir::Type elemType, llvm::Value *value,
- llvm::Type *targetType) -> llvm::Value * {
- return (mt.convertType(elemType) == targetType)
- ? value
- : builder.CreateFPExt(value, targetType);
- };
+ // Helper function for adding vectors
auto addVector = [&](llvm::Type *targetType, llvm::Intrinsic::ID intrinsicID,
llvm::Value *result) -> llvm::Value * {
- auto lhsElemType = cast<VectorType>(lhsType).getElementType();
- auto rhsElemType = cast<VectorType>(rhsType).getElementType();
for (int64_t i = 0; i < 2; ++i) {
llvm::Value *lhsElemi =
builder.CreateExtractElement(argLHS, builder.getInt32(i));
llvm::Value *rhsElemi =
builder.CreateExtractElement(argRHS, builder.getInt32(i));
- llvm::Value *lhsCasted =
- getCastedFloat(lhsElemType, lhsElemi, targetType);
- llvm::Value *rhsCasted =
- getCastedFloat(rhsElemType, rhsElemi, targetType);
- llvm::Value *sum = addIntrinsic(intrinsicID, lhsCasted, rhsCasted);
+ llvm::Value *sum = addIntrinsic(intrinsicID, lhsElemi, rhsElemi);
result = builder.CreateInsertElement(result, sum, builder.getInt32(i));
};
return result;
};
- // f64 + f64/f32/f16/bf16
- bool isVectorF64Add = isa<VectorType>(resType) &&
- cast<VectorType>(resType).getElementType().isF64();
-
- if (resType.isF64()) {
- llvm::Value *lhsF64 =
- getCastedFloat(lhsType, argLHS, builder.getDoubleTy());
- llvm::Value *rhsF64 =
- getCastedFloat(rhsType, argRHS, builder.getDoubleTy());
+ // f64 + f64 -> f64 / vector<2xf64> + vector<2xf64> -> vector<2xf64>
+ bool isVectorF64Add = isa<VectorType>(opType) &&
+ cast<VectorType>(opType).getElementType().isF64();
+ if (opType.isF64()) {
unsigned index = static_cast<unsigned>(rndMode);
- mt.mapValue(thisOp.getRes(), addIntrinsic(f64IDs[index], lhsF64, rhsF64));
+ mt.mapValue(thisOp.getRes(), addIntrinsic(f64IDs[index], argLHS, argRHS));
return;
} else if (isVectorF64Add) {
llvm::Value *result = llvm::PoisonValue::get(
@@ -577,24 +558,19 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
return;
}
- // f16 + f16 -> !f16 / bf16 + bf16 -> !bf16 / f16 + bf16 / f32 +
- // f32/f16/bf16
- bool isVectorF32Add = isa<VectorType>(resType) &&
- cast<VectorType>(resType).getElementType().isF32();
-
- if (resType.isF32()) {
- llvm::Value *lhsF32 = getCastedFloat(lhsType, argLHS, builder.getFloatTy());
- llvm::Value *rhsF32 = getCastedFloat(rhsType, argRHS, builder.getFloatTy());
+ // f32 + f32 -> f32 / vector<2xf32> + vector<2xf32> -> vector<2xf32>
+ bool isVectorF32Add = isa<VectorType>(opType) &&
+ cast<VectorType>(opType).getElementType().isF32();
+ if (opType.isF32()) {
unsigned index =
((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
- mt.mapValue(thisOp.getRes(), addIntrinsic(f32IDs[index], lhsF32, rhsF32));
+ mt.mapValue(thisOp.getRes(), addIntrinsic(f32IDs[index], argLHS, argRHS));
return;
} else if (isVectorF32Add) {
llvm::Value *result = llvm::PoisonValue::get(
llvm::FixedVectorType::get(builder.getFloatTy(), 2));
unsigned index =
((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
-
result = addVector(builder.getFloatTy(), f32IDs[index], result);
mt.mapValue(thisOp.getRes(), result);
return;
diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_all_same_types.mlir
index 39ec7d50e50b4..fd05c85ae441f 100644
--- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_all_same_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_all_same_types.mlir
@@ -9,10 +9,10 @@ llvm.func @fadd_f16_f16(%a : f16, %b : f16) -> f16 {
// CHECK-NEXT: %6 = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %5, half %5)
// CHECK-NEXT: ret half %6
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : f16, f16 -> f16
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f16
- %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f16
- %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f16
+ %f1 = nvvm.addf %a, %b : f16
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16
llvm.return %f4 : f16
}
@@ -23,8 +23,8 @@ llvm.func @fadd_bf16_bf16(%a : bf16, %b : bf16) -> bf16 {
// CHECK-NEXT: %4 = fadd bfloat %3, %3
// CHECK-NEXT: ret bfloat %4
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : bf16, bf16 -> bf16
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> bf16
+ %f1 = nvvm.addf %a, %b : bf16
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : bf16
llvm.return %f2 : bf16
}
@@ -50,23 +50,23 @@ llvm.func @fadd_f32_f32(%a : f32, %b : f32) -> f32 {
// CHECK-NEXT: %19 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %18, float %18)
// CHECK-NEXT: ret float %19
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : f32, f32 -> f32
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f32
- %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f32, f32 -> f32
- %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
- %f6 = nvvm.addf %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f32
- %f7 = nvvm.addf %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f8 = nvvm.addf %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f32, f32 -> f32
- %f9 = nvvm.addf %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
- %f10 = nvvm.addf %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f32
- %f11 = nvvm.addf %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f12 = nvvm.addf %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f32, f32 -> f32
- %f13 = nvvm.addf %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
- %f14 = nvvm.addf %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f32
- %f15 = nvvm.addf %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
- %f16 = nvvm.addf %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f32, f32 -> f32
- %f17 = nvvm.addf %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32, f32 -> f32
+ %f1 = nvvm.addf %a, %b : f32
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f32
+ %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32
+ %f6 = nvvm.addf %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32
+ %f7 = nvvm.addf %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32
+ %f8 = nvvm.addf %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f32
+ %f9 = nvvm.addf %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32
+ %f10 = nvvm.addf %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32
+ %f11 = nvvm.addf %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32
+ %f12 = nvvm.addf %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f32
+ %f13 = nvvm.addf %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32
+ %f14 = nvvm.addf %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32
+ %f15 = nvvm.addf %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32
+ %f16 = nvvm.addf %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f32
+ %f17 = nvvm.addf %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32
llvm.return %f17 : f32
}
@@ -80,10 +80,10 @@ llvm.func @fadd_f64_f64(%a : f64, %b : f64) -> f64 {
// CHECK-NEXT: %7 = call double @llvm.nvvm.add.rz.d(double %6, double %6)
// CHECK-NEXT: ret double %7
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : f64, f64 -> f64
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f64, f64 -> f64
- %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64, f64 -> f64
- %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64, f64 -> f64
- %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rz>} : f64, f64 -> f64
+ %f1 = nvvm.addf %a, %b : f64
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f64
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64
+ %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rz>} : f64
llvm.return %f5 : f64
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_different_return_type.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_different_return_type.mlir
deleted file mode 100644
index 46776c529b8ab..0000000000000
--- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_different_return_type.mlir
+++ /dev/null
@@ -1,400 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// f16 + f16 -> f32
-llvm.func @addf_f16_f16_rn(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rn(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rn_sat(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rn_sat(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rn_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rn_ftz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rn_sat_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rn_sat_ftz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rm(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rm(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rm_sat(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rm_sat(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rm_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rm_ftz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rm_sat_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rm_sat_ftz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rp(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rp(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rp_sat(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rp_sat(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rp_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rp_ftz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rp_sat_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rp_sat_ftz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rz_sat(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rz_sat(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rz_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rz_ftz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f16_rz_sat_ftz(%a : f16, %b : f16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f16_rz_sat_ftz(half %0, half %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext half %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-// bf16 + bf16 -> f32
-llvm.func @addf_bf16_bf16_rn(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rn(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rn_sat(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rn_sat(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rn_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rn_ftz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rn_sat_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rn_sat_ftz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rm(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rm(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rm_sat(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rm_sat(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rm_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rm_ftz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rm_sat_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rm_sat_ftz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rp(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rp(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rp_sat(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rp_sat(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rp_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rp_ftz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rp_sat_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rp_sat_ftz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rz_sat(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rz_sat(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rz_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rz_ftz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_bf16_rz_sat_ftz(%a : bf16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_bf16_rz_sat_ftz(bfloat %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-// f32 + f32 -> f64
-llvm.func @addf_f32_f32_rn(%a : f32, %b : f32) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f32_rn(float %0, float %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = fpext float %1 to double
- // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rn.d(double %3, double %4)
- // CHECK-NEXT: ret double %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f32_f32_rm(%a : f32, %b : f32) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f32_rm(float %0, float %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = fpext float %1 to double
- // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rm.d(double %3, double %4)
- // CHECK-NEXT: ret double %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f32_f32_rp(%a : f32, %b : f32) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f32_rp(float %0, float %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = fpext float %1 to double
- // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rp.d(double %3, double %4)
- // CHECK-NEXT: ret double %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f32_f32_rz(%a : f32, %b : f32) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f32_rz(float %0, float %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = fpext float %1 to double
- // CHECK-NEXT: %5 = call double @llvm.nvvm.add.rz.d(double %3, double %4)
- // CHECK-NEXT: ret double %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f64
- llvm.return %f1 : f64
-}
diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
index c4b15ef2ae075..8757912feb9df 100644
--- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
@@ -2,49 +2,25 @@
// -----
-llvm.func @addf_invalid_sat_mode(%a : f16, %b : f16) -> f32 {
+llvm.func @addf_invalid_sat_mode(%a : f16, %b : f16) -> f16 {
// expected-error at +1 {{SATFINITE saturation mode is not supported for floating point addition operation}}
- %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<satfinite>} : f16, f16 -> f32
- llvm.return %f1 : f32
-}
-
-// -----
-
-llvm.func @addf_invalid_vector_scalar_mix(%a : vector<2xf16>, %b : f16) -> vector<2xf16> {
- // expected-error at +1 {{cannot mix vector and scalar operands}}
- %f1 = nvvm.addf %a, %b : vector<2xf16>, f16 -> vector<2xf16>
- llvm.return %f1 : vector<2xf16>
+ %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<satfinite>} : f16
+ llvm.return %f1 : f16
}
// -----
llvm.func @addf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
// expected-error at +1 {{FTZ and saturation are not supported for additions involving f64 type}}
- %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : f64, f64 -> f64
+ %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : f64
llvm.return %f1 : f64
}
// -----
-llvm.func @addf_invalid_result_width(%a : f64, %b : f64) -> f32 {
- // expected-error at +1 {{result type must be at least as wide as lhs operand}}
- %f1 = nvvm.addf %a, %b : f64, f64 -> f32
- llvm.return %f1 : f32
-}
-
-// -----
-
-llvm.func @addf_invalid_result_width_vector(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf32> {
- // expected-error at +1 {{result type must be at least as wide as rhs operand}}
- %f1 = nvvm.addf %a, %b : vector<2xf32>, vector<2xf64> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-// -----
-
llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
// expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f16 -> f16
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16
llvm.return %f1 : f16
}
@@ -52,7 +28,7 @@ llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
// expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>
llvm.return %f1 : vector<2xf16>
}
@@ -60,7 +36,7 @@ llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -
llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 {
// expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, bf16 -> bf16
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16
llvm.return %f1 : bf16
}
@@ -68,7 +44,7 @@ llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 {
llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
// expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>
llvm.return %f1 : vector<2xbf16>
}
@@ -76,48 +52,16 @@ llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>
llvm.func @addf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 {
// expected-error at +1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions}}
- %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, bf16 -> bf16
- llvm.return %f1 : bf16
-}
-
-// -----
-
-llvm.func @addf_invalid_f16_result_type(%a : f16, %b : bf16) -> f16 {
- // expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
- %f1 = nvvm.addf %a, %b : f16, bf16 -> f16
- llvm.return %f1 : f16
-}
-
-// -----
-
-llvm.func @addf_invalid_f16_result_type_vector(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf16> {
- // expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
- %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xf16>
- llvm.return %f1 : vector<2xf16>
-}
-
-// -----
-
-llvm.func @addf_invalid_bf16_result_type(%a : bf16, %b : f16) -> bf16 {
- // expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
- %f1 = nvvm.addf %a, %b : bf16, f16 -> bf16
+ %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : bf16
llvm.return %f1 : bf16
}
// -----
-llvm.func @addf_invalid_bf16_result_type_vector(%a : vector<2xbf16>, %b : vector<2xf16>) -> vector<2xbf16> {
- // expected-error at +1 {{only f16 + f16 (bf16 + bf16) is supported for f16 (bf16) result type}}
- %f1 = nvvm.addf %a, %b : vector<2xbf16>, vector<2xf16> -> vector<2xbf16>
- llvm.return %f1 : vector<2xbf16>
-}
-
-// -----
-
// FIXME: Remove this test once intrinsics for f16 addition (with FTZ only) are
// available.
llvm.func @addf_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 {
// expected-error at +1 {{FTZ with no saturation is not supported for f16 result type}}
- %f1 = nvvm.addf %a, %b {ftz=true} : f16, f16 -> f16
+ %f1 = nvvm.addf %a, %b {ftz=true} : f16
llvm.return %f1 : f16
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_mixed_arg_types.mlir
deleted file mode 100644
index 13675b9db5279..0000000000000
--- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_mixed_arg_types.mlir
+++ /dev/null
@@ -1,684 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// f16 + bf16 -> f32
-llvm.func @addf_f16_bf16(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rn(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rn(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rn_sat(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rn_sat(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rn_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rn_ftz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rn_sat_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rn_sat_ftz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rm(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rm(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rm_sat(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rm_sat(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rm_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rm_ftz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rm_sat_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rm_sat_ftz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rp(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rp(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rp_sat(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rp_sat(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rp_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rp_ftz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rp_sat_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rp_sat_ftz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rz_sat(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rz_sat(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rz_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rz_ftz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_bf16_rz_sat_ftz(%a : f16, %b : bf16) -> f32 {
- // CHECK-LABEL: define float @addf_f16_bf16_rz_sat_ftz(half %0, bfloat %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = fpext bfloat %1 to float
- // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %4)
- // CHECK-NEXT: ret float %5
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, bf16 -> f32
- llvm.return %f1 : f32
-}
-
-// f16 + f32 -> f32
-llvm.func @addf_f16_f32(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rn(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rn(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rn_sat(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rn_sat(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rn_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rn_ftz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rn_sat_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rn_sat_ftz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rm(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rm(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rm_sat(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rm_sat(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rm_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rm_ftz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rm_sat_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rm_sat_ftz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rp(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rp(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rp_sat(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rp_sat(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rp_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rp_ftz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rp_sat_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rp_sat_ftz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rz_sat(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rz_sat(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rz_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rz_ftz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_f16_f32_rz_sat_ftz(%a : f16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_f16_f32_rz_sat_ftz(half %0, float %1) {
- // CHECK-NEXT: %3 = fpext half %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-// f16 + f64 -> f64
-llvm.func @addf_f16_f64(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f16_f64(half %0, double %1) {
- // CHECK-NEXT: %3 = fpext half %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : f16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f16_f64_rn(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f16_f64_rn(half %0, double %1) {
- // CHECK-NEXT: %3 = fpext half %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f16_f64_rm(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f16_f64_rm(half %0, double %1) {
- // CHECK-NEXT: %3 = fpext half %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f16_f64_rp(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f16_f64_rp(half %0, double %1) {
- // CHECK-NEXT: %3 = fpext half %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f16_f64_rz(%a : f16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f16_f64_rz(half %0, double %1) {
- // CHECK-NEXT: %3 = fpext half %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-// bf16 + f32 -> f32
-llvm.func @addf_bf16_f32(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rn(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rn(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rn_sat(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rn_sat(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rn_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rn_ftz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rn_sat_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rn_sat_ftz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rm(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rm(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rm_sat(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rm_sat(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rm_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rm_ftz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rm_sat_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rm_sat_ftz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rp(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rp(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rp_sat(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rp_sat(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rp_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rp_ftz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rp_sat_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rp_sat_ftz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rz_sat(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rz_sat(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rz_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rz_ftz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-llvm.func @addf_bf16_f32_rz_sat_ftz(%a : bf16, %b : f32) -> f32 {
- // CHECK-LABEL: define float @addf_bf16_f32_rz_sat_ftz(bfloat %0, float %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to float
- // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %3, float %1)
- // CHECK-NEXT: ret float %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : bf16, f32 -> f32
- llvm.return %f1 : f32
-}
-
-// bf16 + f64 -> f64
-llvm.func @addf_bf16_f64(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_bf16_f64(bfloat %0, double %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : bf16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_bf16_f64_rn(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_bf16_f64_rn(bfloat %0, double %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_bf16_f64_rm(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_bf16_f64_rm(bfloat %0, double %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_bf16_f64_rp(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_bf16_f64_rp(bfloat %0, double %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : bf16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_bf16_f64_rz(%a : bf16, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_bf16_f64_rz(bfloat %0, double %1) {
- // CHECK-NEXT: %3 = fpext bfloat %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : bf16, f64 -> f64
- llvm.return %f1 : f64
-}
-
-// f32 + f64 -> f64
-llvm.func @addf_f32_f64(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f64(float %0, double %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : f32, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f32_f64_rn(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f64_rn(float %0, double %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f32_f64_rm(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f64_rm(float %0, double %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rm.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f32_f64_rp(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f64_rp(float %0, double %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rp.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f64 -> f64
- llvm.return %f1 : f64
-}
-
-llvm.func @addf_f32_f64_rz(%a : f32, %b : f64) -> f64 {
- // CHECK-LABEL: define double @addf_f32_f64_rz(float %0, double %1) {
- // CHECK-NEXT: %3 = fpext float %0 to double
- // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rz.d(double %3, double %1)
- // CHECK-NEXT: ret double %4
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f64 -> f64
- llvm.return %f1 : f64
-}
diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_all_same_types.mlir
index 2f88a92e86b92..b472de739c92a 100644
--- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_all_same_types.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_all_same_types.mlir
@@ -9,10 +9,10 @@ llvm.func @addf_vector_f16_f16(%a : vector<2xf16>, %b : vector<2xf16>) -> vector
// CHECK-NEXT: %6 = call <2 x half> @llvm.nvvm.add.rn.ftz.sat.v2f16(<2 x half> %5, <2 x half> %5)
// CHECK-NEXT: ret <2 x half> %3
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xf16> -> vector<2xf16>
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
- %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
- %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf16> -> vector<2xf16>
+ %f1 = nvvm.addf %a, %b : vector<2xf16>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>
llvm.return %f1 : vector<2xf16>
}
@@ -23,8 +23,8 @@ llvm.func @addf_vector_bf16_bf16(%a : vector<2xbf16>, %b : vector<2xbf16>) -> ve
// CHECK-NEXT: %4 = fadd <2 x bfloat> %3, %3
// CHECK-NEXT: ret <2 x bfloat> %4
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xbf16> -> vector<2xbf16>
+ %f1 = nvvm.addf %a, %b : vector<2xbf16>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>
llvm.return %f2 : vector<2xbf16>
}
@@ -73,11 +73,11 @@ llvm.func @addf_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %42 = insertelement <2 x float> %38, float %41, i32 1
// CHECK-NEXT: ret <2 x float> %34
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b : vector<2xf32>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf32>
+ %f5 = nvvm.addf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>
llvm.return %f4 : vector<2xf32>
}
@@ -117,10 +117,10 @@ llvm.func @addf_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
// CHECK-NEXT: ret <2 x float> %34
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf32>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>
llvm.return %f4 : vector<2xf32>
}
@@ -160,10 +160,10 @@ llvm.func @addf_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
// CHECK-NEXT: ret <2 x float> %34
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf32>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>
llvm.return %f4 : vector<2xf32>
}
@@ -203,10 +203,10 @@ llvm.func @addf_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vec
// CHECK-NEXT: %34 = insertelement <2 x float> %30, float %33, i32 1
// CHECK-NEXT: ret <2 x float> %34
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
- %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>, vector<2xf32> -> vector<2xf32>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf32>
+ %f3 = nvvm.addf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf32>
+ %f4 = nvvm.addf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf32>
llvm.return %f4 : vector<2xf32>
}
@@ -231,8 +231,8 @@ llvm.func @addf_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %18 = insertelement <2 x double> %14, double %17, i32 1
// CHECK-NEXT: ret <2 x double> %18
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xf64>, vector<2xf64> -> vector<2xf64>
- %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b : vector<2xf64>
+ %f2 = nvvm.addf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf64>
llvm.return %f2 : vector<2xf64>
}
@@ -248,7 +248,7 @@ llvm.func @addf_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
// CHECK-NEXT: ret <2 x double> %10
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
@@ -264,7 +264,7 @@ llvm.func @addf_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
// CHECK-NEXT: ret <2 x double> %10
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
@@ -280,6 +280,6 @@ llvm.func @addf_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vec
// CHECK-NEXT: %10 = insertelement <2 x double> %6, double %9, i32 1
// CHECK-NEXT: ret <2 x double> %10
// CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf64>, vector<2xf64> -> vector<2xf64>
+ %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf64>
llvm.return %f1 : vector<2xf64>
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_mixed_arg_types.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_mixed_arg_types.mlir
deleted file mode 100644
index f7230937faebd..0000000000000
--- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_vector_mixed_arg_types.mlir
+++ /dev/null
@@ -1,1229 +0,0 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
-// vector<2xf16> + vector<2xbf16> -> vector<2xf32>
-llvm.func @addf_vector_f16_bf16_f32(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rn(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rn(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rn_sat(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rn_ftz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.ftz.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rn_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rm(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rm(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rm_sat(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rm_ftz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.ftz.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rm_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rp(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rp(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rp_sat(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rp_ftz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.ftz.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rp_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rz_sat(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rz_ftz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.ftz.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_bf16_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2xbf16>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_bf16_f32_rz_sat_ftz(<2 x half> %0, <2 x bfloat> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x bfloat> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = fpext bfloat %4 to float
- // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %6)
- // CHECK-NEXT: %8 = insertelement <2 x float> poison, float %7, i32 0
- // CHECK-NEXT: %9 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %10 = extractelement <2 x bfloat> %1, i32 1
- // CHECK-NEXT: %11 = fpext half %9 to float
- // CHECK-NEXT: %12 = fpext bfloat %10 to float
- // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %11, float %12)
- // CHECK-NEXT: %14 = insertelement <2 x float> %8, float %13, i32 1
- // CHECK-NEXT: ret <2 x float> %14
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xbf16> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-// vector<2xf16> + vector<2xf32> -> vector<2xf32>
-llvm.func @addf_vector_f16_f32_f32(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rn(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rn(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rn_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rn_sat(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rn_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rn_ftz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.ftz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rn_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rn_sat_ftz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rm(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rm(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rm_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rm_sat(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rm_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rm_ftz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rm_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rm_sat_ftz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rp(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rp(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rp_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rp_sat(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rp_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rp_ftz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.ftz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rp_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rp_sat_ftz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rz_sat(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rz_sat(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rz_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rz_ftz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.ftz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_f16_f32_f32_rz_sat_ftz(%a : vector<2xf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_f16_f32_f32_rz_sat_ftz(<2 x half> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-// vector<2xf16> + vector<2xf64> -> vector<2xf64>
-llvm.func @addf_vector_f16_f64_f64(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64(<2 x half> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_f16_f64_f64_rn(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64_rn(<2 x half> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_f16_f64_f64_rm(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64_rm(<2 x half> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rm.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_f16_f64_f64_rp(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64_rp(<2 x half> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rp.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_f16_f64_f64_rz(%a : vector<2xf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f16_f64_f64_rz(<2 x half> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x half> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext half %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x half> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext half %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rz.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-// vector<2xbf16> + vector<2xf32> -> vector<2xf32>
-llvm.func @addf_vector_bf16_f32_f32(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rn(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rn(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rn_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rn_sat(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rn_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rn_ftz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.ftz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rn_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rn_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rm(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rm(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rm_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rm_sat(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rm_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rm_ftz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rm_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rm_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rp(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rp(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rp_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rp_sat(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rp_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rp_ftz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.ftz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rp_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rp_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rz_sat(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rz_sat(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rz_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rz_ftz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.ftz.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-llvm.func @addf_vector_bf16_f32_f32_rz_sat_ftz(%a : vector<2xbf16>, %b : vector<2xf32>) -> vector<2xf32> {
- // CHECK-LABEL: define <2 x float> @addf_vector_bf16_f32_f32_rz_sat_ftz(<2 x bfloat> %0, <2 x float> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x float> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to float
- // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %5, float %4)
- // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x float> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to float
- // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %10, float %9)
- // CHECK-NEXT: %12 = insertelement <2 x float> %7, float %11, i32 1
- // CHECK-NEXT: ret <2 x float> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : vector<2xbf16>, vector<2xf32> -> vector<2xf32>
- llvm.return %f1 : vector<2xf32>
-}
-
-// vector<2xbf16> + vector<2xf64> -> vector<2xf64>
-llvm.func @addf_vector_bf16_f64_f64(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64(<2 x bfloat> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_bf16_f64_f64_rn(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64_rn(<2 x bfloat> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_bf16_f64_f64_rm(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64_rm(<2 x bfloat> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rm.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_bf16_f64_f64_rp(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64_rp(<2 x bfloat> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rp.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_bf16_f64_f64_rz(%a : vector<2xbf16>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_bf16_f64_f64_rz(<2 x bfloat> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x bfloat> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext bfloat %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x bfloat> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext bfloat %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rz.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-// vector<2xf32> + vector<2xf64> -> vector<2xf64>
-llvm.func @addf_vector_f32_f64_f64(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64(<2 x float> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext float %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext float %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b : vector<2xf32>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_f32_f64_f64_rn(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64_rn(<2 x float> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext float %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext float %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rn.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_f32_f64_f64_rm(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64_rm(<2 x float> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext float %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext float %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rm.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_f32_f64_f64_rp(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64_rp(<2 x float> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext float %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext float %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rp.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
-
-llvm.func @addf_vector_f32_f64_f64_rz(%a : vector<2xf32>, %b : vector<2xf64>) -> vector<2xf64> {
- // CHECK-LABEL: define <2 x double> @addf_vector_f32_f64_f64_rz(<2 x float> %0, <2 x double> %1) {
- // CHECK-NEXT: %3 = extractelement <2 x float> %0, i32 0
- // CHECK-NEXT: %4 = extractelement <2 x double> %1, i32 0
- // CHECK-NEXT: %5 = fpext float %3 to double
- // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %5, double %4)
- // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0
- // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1
- // CHECK-NEXT: %9 = extractelement <2 x double> %1, i32 1
- // CHECK-NEXT: %10 = fpext float %8 to double
- // CHECK-NEXT: %11 = call double @llvm.nvvm.add.rz.d(double %10, double %9)
- // CHECK-NEXT: %12 = insertelement <2 x double> %7, double %11, i32 1
- // CHECK-NEXT: ret <2 x double> %12
- // CHECK-NEXT: }
- %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xf32>, vector<2xf64> -> vector<2xf64>
- llvm.return %f1 : vector<2xf64>
-}
More information about the Mlir-commits
mailing list