[Mlir-commits] [mlir] [MLIR][NVVM] Add nvvm.fadd and nvvm.fsub Ops (PR #179162)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Feb 1 20:54:49 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Srinivasa Ravi (Wolfram70)
<details>
<summary>Changes</summary>
This change adds the `nvvm.fadd` and `nvvm.fsub` Ops to the NVVM dialect. `nvvm.fadd` performs floating point addition of two operands along with any conversions necessary.
`nvvm.fsub` performs floating point subtraction of two operands and is equivalent to an `llvm.fneg` followed by an `nvvm.fadd` operation.
PTX ISA Reference:
1. https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add
2. https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add
3. https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add
---
Patch is 71.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/179162.diff
7 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+76-5)
- (modified) mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp (+212)
- (added) mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir (+9)
- (added) mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir (+89)
- (added) mlir/test/Target/LLVMIR/nvvm/fadd_different_return_type.mlir (+400)
- (added) mlir/test/Target/LLVMIR/nvvm/fadd_invalid.mlir (+107)
- (added) mlir/test/Target/LLVMIR/nvvm/fadd_mixed_arg_types.mlir (+684)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 64a52acbb2278..0dce63c4e5a74 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -1860,12 +1860,12 @@ def FPRoundingModeAttr : EnumAttr<NVVM_Dialect, FPRoundingMode, "fp_rnd_mode"> {
let assemblyFormat = "`<` $value `>`";
}
-def SaturationModeNone : I32EnumAttrCase<"NONE", 0, "none">;
-def SaturationModeFinite : I32EnumAttrCase<"SATFINITE", 1, "satfinite">;
+def SaturationModeNone : I32EnumCase<"NONE", 0, "none">;
+def SaturationModeFinite : I32EnumCase<"SATFINITE", 1, "satfinite">;
+def SaturationModeSat : I32EnumCase<"SAT", 2, "sat">;
-def SaturationMode : I32EnumAttr<"SaturationMode", "NVVM SaturationMode kind",
- [SaturationModeNone, SaturationModeFinite]> {
- let genSpecializedAttr = 0;
+def SaturationMode : I32Enum<"SaturationMode", "NVVM SaturationMode kind",
+ [SaturationModeNone, SaturationModeFinite, SaturationModeSat]> {
let cppNamespace = "::mlir::NVVM";
}
def SaturationModeAttr : EnumAttr<NVVM_Dialect, SaturationMode, "sat_mode"> {
@@ -6155,6 +6155,77 @@ def NVVM_Tcgen05MMAWsSparseOp : NVVM_Op<"tcgen05.mma.ws.sp",
}];
}
+def NVVM_FloatAdditionOp :
+ NVVM_SingleResultIntrinsicOp<"fadd", [Pure, Commutative]> {
+ let summary = [{
+ Performs floating point addition operation with support for mixed precision
+ operands
+ }];
+ let description = [{
+ The `nvvm.fadd` operation performs floating point addition of two operands.
+
+ The rounding mode to be used is specified by the `rnd` attribute,
+ saturation mode by the `sat` attribute, and FTZ by the `ftz` unit attribute.
+
+ The result type must be at least as wide as the operands. The operands are
+ converted to the result type before addition if it is wider.
+
+ For more information, see PTX ISA - [floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-add),
+ [half-precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-add),
+ [mixed precision floating point addition](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-add).
+ }];
+ let arguments = (ins
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$lhs,
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$rhs,
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
+ UnitAttr:$ftz
+ );
+ let results = (outs AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
+ let hasVerifier = 1;
+
+ let llvmBuilder = [{
+ auto [ID, args] = NVVM::FloatAdditionOp::getIntrinsicIDAndArgs(*op, moduleTranslation, builder);
+ if(ID != llvm::Intrinsic::not_intrinsic) {
+ llvm::Value *addResult = createIntrinsicCall(builder, ID, args);
+ $res = ($_resultType->getScalarSizeInBits() >
+ addResult->getType()->getScalarSizeInBits())
+ ? builder.CreateFPExt(addResult, $_resultType) : addResult;
+ }
+ }];
+}
+
+def NVVM_FloatSubtractionOp :
+ NVVM_Op<"fsub", [Pure]> {
+ let summary = [{
+ Performs floating point subtraction operation with support for mixed
+ precision operands
+ }];
+ let description = [{
+ The `nvvm.fsub` operation performs floating point subtraction of two
+ operands.
+
+ It supports the same type combinations and modifiers as `nvvm.fadd`.
+ This is equivalent to `nvvm.fadd(lhs, -rhs)`.
+
+ For more information, see PTX ISA - [floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-sub),
+ [half-precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub),
+ [mixed precision floating point subtraction](https://docs.nvidia.com/cuda/parallel-thread-execution/#mixed-precision-floating-point-instructions-sub).
+
+ }];
+ let arguments = (ins
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$lhs,
+ AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$rhs,
+ DefaultValuedAttr<FPRoundingModeAttr, "FPRoundingMode::NONE">:$rnd,
+ DefaultValuedAttr<SaturationModeAttr, "SaturationMode::NONE">:$sat,
+ UnitAttr:$ftz
+ );
+ let results = (outs AnyTypeOf<[F16, BF16, F32, F64, VectorOfLengthAndType<[2], [F16, BF16]>]>:$res);
+ let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type(operands) `->` type($res)";
+ let hasCanonicalizer = 1;
+}
+
//===----------------------------------------------------------------------===//
// NVVM tensormap.replace Op
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 76ec8b8b7cfd2..033b420d0faee 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3072,6 +3072,85 @@ LogicalResult NVVM::TensormapReplaceOp::verify() {
return success();
}
+LogicalResult NVVM::FloatAdditionOp::verify() {
+ auto resFType = getRes().getType();
+ auto lhsFType = getLhs().getType();
+ auto rhsFType = getRhs().getType();
+ auto rndMode = getRnd();
+ auto satMode = getSat();
+ auto isFTZ = getFtz();
+
+ if (satMode == NVVM::SaturationMode::SATFINITE)
+ return emitOpError("SATFINITE saturation mode is not supported for "
+ "floating point addition operation");
+
+ if (isa<VectorType>(resFType) != isa<VectorType>(lhsFType) ||
+ isa<VectorType>(resFType) != isa<VectorType>(rhsFType))
+ return emitOpError("cannot mix vector and scalar types for floating point "
+ "addition operation");
+
+ if (isa<VectorType>(lhsFType) &&
+ ((cast<VectorType>(lhsFType).getElementType() !=
+ cast<VectorType>(rhsFType).getElementType()) ||
+ (cast<VectorType>(lhsFType).getElementType() !=
+ cast<VectorType>(resFType).getElementType())))
+ return emitOpError(
+ "cannot mix different element types for vector floating point "
+ "addition operation");
+
+ if (resFType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ))
+ return emitOpError("FTZ and saturation are not supported for additions "
+ "involving f64 type");
+
+ auto getBaseFType = [](Type type) -> Type {
+ if (isa<VectorType>(type))
+ return cast<VectorType>(type).getElementType();
+ return type;
+ };
+
+ auto resBaseFType = getBaseFType(resFType);
+ auto lhsBaseFType = getBaseFType(lhsFType);
+ auto rhsBaseFType = getBaseFType(rhsFType);
+
+ if (resBaseFType.getIntOrFloatBitWidth() <
+ std::max(lhsBaseFType.getIntOrFloatBitWidth(),
+ rhsBaseFType.getIntOrFloatBitWidth()))
+ return emitOpError("result type must be at least as wide as the operands");
+
+ if (resBaseFType.isF16() && rndMode != NVVM::FPRoundingMode::RN &&
+ rndMode != NVVM::FPRoundingMode::NONE)
+ return emitOpError("only RN rounding mode is supported for f16 and "
+ "vector<2xf16> additions");
+
+ if (resBaseFType.isBF16()) {
+ if (rndMode != NVVM::FPRoundingMode::RN &&
+ rndMode != NVVM::FPRoundingMode::NONE)
+ return emitOpError("only RN rounding mode is supported for bf16 and "
+ "vector<2xbf16> additions");
+ if (satMode != NVVM::SaturationMode::NONE || isFTZ)
+ return emitOpError("FTZ and saturation are not supported for bf16 and "
+ "vector<2xbf16> additions");
+ }
+
+ if (resBaseFType.isF16() && !(lhsBaseFType.isF16() && rhsBaseFType.isF16()))
+ return emitOpError("only f16 + f16 is supported for f16 result type");
+
+ if (resBaseFType.isBF16() &&
+ !(lhsBaseFType.isBF16() && rhsBaseFType.isBF16()))
+ return emitOpError("only bf16 + bf16 is supported for bf16 result type");
+
+ // FIXME: This is a temporary check disallowing lowering to add.rn.ftz.f16(x2)
+ // PTX instructions since the corresponding LLVM intrinsic is missing. This
+ // should be removed once the intrinsics for f16 addition (with FTZ only) are
+ // available.
+ if ((isa<VectorType>(resFType) || resBaseFType.isF16()) && isFTZ &&
+ satMode == NVVM::SaturationMode::NONE)
+ return emitOpError(
+ "FTZ with no saturation is not supported for f16 additions");
+
+ return success();
+}
+
/// Packs the given `field` into the `result`.
/// The `result` is 64-bits and each `field` can be 32-bits or narrower.
static llvm::Value *
@@ -3148,6 +3227,33 @@ std::string NVVM::MBarrierTryWaitParityOp::getPtx() {
space);
}
+//===----------------------------------------------------------------------===//
+// Canonicalization patterns
+//===----------------------------------------------------------------------===//
+
+struct ConvertFsubToFnegFadd : public OpRewritePattern<FloatSubtractionOp> {
+ using OpRewritePattern<FloatSubtractionOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(FloatSubtractionOp op,
+ PatternRewriter &rewriter) const override {
+ Location loc = op.getLoc();
+
+ Value negRhs =
+ LLVM::FNegOp::create(rewriter, loc, op.getRhs().getType(), op.getRhs());
+
+ rewriter.replaceOpWithNewOp<FloatAdditionOp>(op, op.getType(), op.getLhs(),
+ negRhs, op.getRnd(),
+ op.getSat(), op.getFtz());
+
+ return success();
+ }
+};
+
+void FloatSubtractionOp::getCanonicalizationPatterns(
+ RewritePatternSet &patterns, MLIRContext *context) {
+ patterns.add<ConvertFsubToFnegFadd>(context);
+}
+
//===----------------------------------------------------------------------===//
// getIntrinsicID/getIntrinsicIDAndArgs methods
//===----------------------------------------------------------------------===//
@@ -4887,6 +4993,112 @@ mlir::NVVM::IDArgPair TensormapReplaceOp::getIntrinsicIDAndArgs(
return {IDs[fieldIndex], args};
}
+mlir::NVVM::IDArgPair FloatAdditionOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::FloatAdditionOp>(op);
+ llvm::SmallVector<llvm::Value *> args;
+ auto rndMode = thisOp.getRnd();
+ bool isRndRN = rndMode == NVVM::FPRoundingMode::RN;
+ auto isSat = thisOp.getSat() == NVVM::SaturationMode::SAT;
+ auto isFTZ = thisOp.getFtz();
+
+ llvm::Value *argLHS = mt.lookupValue(thisOp.getLhs());
+ llvm::Value *argRHS = mt.lookupValue(thisOp.getRhs());
+
+ mlir::Type lhsType = thisOp.getLhs().getType();
+ mlir::Type rhsType = thisOp.getRhs().getType();
+ mlir::Type resType = thisOp.getRes().getType();
+
+ // FIXME: Add intrinsics for add.rn.ftz.f16x2 and add.rn.ftz.f16 here when
+ // they are available.
+ static constexpr llvm::Intrinsic::ID f16IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_sat_f16,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f16,
+ llvm::Intrinsic::nvvm_add_rn_sat_v2f16,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_v2f16,
+ };
+
+ static constexpr llvm::Intrinsic::ID f32IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_f,
+ llvm::Intrinsic::nvvm_add_rm_f,
+ llvm::Intrinsic::nvvm_add_rp_f,
+ llvm::Intrinsic::nvvm_add_rz_f,
+ llvm::Intrinsic::nvvm_add_rn_sat_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_sat_f,
+ llvm::Intrinsic::nvvm_add_rm_sat_f,
+ llvm::Intrinsic::nvvm_add_rp_sat_f,
+ llvm::Intrinsic::nvvm_add_rz_sat_f,
+ llvm::Intrinsic::nvvm_add_rn_ftz_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_ftz_f,
+ llvm::Intrinsic::nvvm_add_rm_ftz_f,
+ llvm::Intrinsic::nvvm_add_rp_ftz_f,
+ llvm::Intrinsic::nvvm_add_rz_ftz_f,
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rm_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rp_ftz_sat_f,
+ llvm::Intrinsic::nvvm_add_rz_ftz_sat_f,
+ };
+
+ static constexpr llvm::Intrinsic::ID f64IDs[] = {
+ llvm::Intrinsic::nvvm_add_rn_d, // default rounding mode RN
+ llvm::Intrinsic::nvvm_add_rn_d, llvm::Intrinsic::nvvm_add_rm_d,
+ llvm::Intrinsic::nvvm_add_rp_d, llvm::Intrinsic::nvvm_add_rz_d};
+
+ auto addIntrinsic = [&](llvm::Intrinsic::ID IID, llvm::Value *LHS = nullptr,
+ llvm::Value *RHS = nullptr) -> NVVM::IDArgPair {
+ args.push_back(LHS ? LHS : argLHS);
+ args.push_back(RHS ? RHS : argRHS);
+ return {IID, args};
+ };
+
+ // f16 + f16 -> f16 / vector<2xf16> + vector<2xf16> -> vector<2xf16>
+ // FIXME: Allow lowering to add.rn.ftz.f16x2 and add.rn.ftz.f16 here when the
+ // intrinsics are available.
+ bool isVectorF16Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isF16();
+ if (resType.isF16() || isVectorF16Add) {
+ if (isSat) {
+ unsigned index = (isVectorF16Add << 1) | isFTZ;
+ return addIntrinsic(f16IDs[index]);
+ } else {
+ mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ return {llvm::Intrinsic::not_intrinsic, args};
+ }
+ }
+
+ // bf16 + bf16 -> bf16 / vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
+ bool isVectorBF16Add = isa<VectorType>(resType) &&
+ cast<VectorType>(resType).getElementType().isBF16();
+ if (resType.isBF16() || isVectorBF16Add) {
+ mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ return {llvm::Intrinsic::not_intrinsic, args};
+ }
+
+ // f64 + f64/f32/f16/bf16
+ if (resType.isF64()) {
+ llvm::Value *lhsF64 =
+ lhsType.isF64() ? argLHS
+ : builder.CreateFPExt(argLHS, builder.getDoubleTy());
+ llvm::Value *rhsF64 =
+ rhsType.isF64() ? argRHS
+ : builder.CreateFPExt(argRHS, builder.getDoubleTy());
+ unsigned index = static_cast<unsigned>(rndMode);
+ return addIntrinsic(f64IDs[index], lhsF64, rhsF64);
+ }
+
+ // f16 + f16 -> !f16 / bf16 + bf16 -> !bf16 / f16 + bf16 / f32 + f32/f16/bf16
+ llvm::Value *lhsF32 = lhsType.isF32()
+ ? argLHS
+ : builder.CreateFPExt(argLHS, builder.getFloatTy());
+ llvm::Value *rhsF32 = rhsType.isF32()
+ ? argRHS
+ : builder.CreateFPExt(argRHS, builder.getFloatTy());
+ unsigned index = ((isFTZ << 1) | isSat) * 5 + static_cast<unsigned>(rndMode);
+ return addIntrinsic(f32IDs[index], lhsF32, rhsF32);
+}
+
//===----------------------------------------------------------------------===//
// NVVM tcgen05.mma functions
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir b/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
new file mode 100644
index 0000000000000..76d0a1453edf9
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/nvvm-canonicalize.mlir
@@ -0,0 +1,9 @@
+// RUN: mlir-opt %s -split-input-file --canonicalize | FileCheck %s
+
+// CHECK-LABEL: @fsub_canonicalize
+llvm.func @fsub_canonicalize(%arg0 : f32, %arg1 : f32) -> f32 {
+ // CHECK: %[[NEG_ARG1:.*]] = llvm.fneg %arg1 : f32
+ // CHECK: %[[ADD_RESULT:.*]] = nvvm.fadd %arg0, %[[NEG_ARG1]] : f32, f32 -> f32
+ %0 = nvvm.fsub %arg0, %arg1 : f32, f32 -> f32
+ llvm.return %0 : f32
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir b/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
new file mode 100644
index 0000000000000..2aa2bf3a4906b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/fadd_all_same_types.mlir
@@ -0,0 +1,89 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f16 + f16 -> f16
+llvm.func @fadd_f16_f16(%a : f16, %b : f16) -> f16 {
+ // CHECK-LABEL: define half @fadd_f16_f16(half %0, half %1) {
+ // CHECK-NEXT: %3 = fadd half %0, %1
+ // CHECK-NEXT: %4 = fadd half %3, %3
+ // CHECK-NEXT: %5 = call half @llvm.nvvm.add.rn.sat.f16(half %4, half %4)
+ // CHECK-NEXT: %6 = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %5, half %5)
+ // CHECK-NEXT: ret half %6
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f16, f16 -> f16
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16, f16 -> f16
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16, f16 -> f16
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f16, f16 -> f16
+ llvm.return %f4 : f16
+}
+
+// bf16 + bf16 -> bf16
+llvm.func @fadd_bf16_bf16(%a : bf16, %b : bf16) -> bf16 {
+ // CHECK-LABEL: define bfloat @fadd_bf16_bf16(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fadd bfloat %0, %1
+ // CHECK-NEXT: %4 = fadd bfloat %3, %3
+ // CHECK-NEXT: ret bfloat %4
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : bf16, bf16 -> bf16
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : bf16, bf16 -> bf16
+ llvm.return %f2 : bf16
+}
+
+// f32 + f32 -> f32
+llvm.func @fadd_f32_f32(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fadd_f32_f32(float %0, float %1) {
+ // CHECK-NEXT: %3 = call float @llvm.nvvm.add.rn.f(float %0, float %1)
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %3, float %3)
+ // CHECK-NEXT: %5 = call float @llvm.nvvm.add.rn.sat.f(float %4, float %4)
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.ftz.f(float %5, float %5)
+ // CHECK-NEXT: %7 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %6, float %6)
+ // CHECK-NEXT: %8 = call float @llvm.nvvm.add.rm.f(float %7, float %7)
+ // CHECK-NEXT: %9 = call float @llvm.nvvm.add.rm.sat.f(float %8, float %8)
+ // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rm.ftz.f(float %9, float %9)
+ // CHECK-NEXT: %11 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %10, float %10)
+ // CHECK-NEXT: %12 = call float @llvm.nvvm.add.rp.f(float %11, float %11)
+ // CHECK-NEXT: %13 = call float @llvm.nvvm.add.rp.sat.f(float %12, float %12)
+ // CHECK-NEXT: %14 = call float @llvm.nvvm.add.rp.ftz.f(float %13, float %13)
+ // CHECK-NEXT: %15 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %14, float %14)
+ // CHECK-NEXT: %16 = call float @llvm.nvvm.add.rz.f(float %15, float %15)
+ // CHECK-NEXT: %17 = call float @llvm.nvvm.add.rz.sat.f(float %16, float %16)
+ // CHECK-NEXT: %18 = call float @llvm.nvvm.add.rz.ftz.f(float %17, float %17)
+ // CHECK-NEXT: %19 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %18, float %18)
+ // CHECK-NEXT: ret float %19
+ // CHECK-NEXT: }
+ %f1 = nvvm.fadd %a, %b : f32, f32 -> f32
+ %f2 = nvvm.fadd %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32, f32 -> f32
+ %f3 = nvvm.fadd %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f4 = nvvm.fadd %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz} : f32, f32 -> f32
+ %f5 = nvvm.fadd %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f6 = nvvm.fadd %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32, f32 -> f32
+ %f7 = nvvm.fadd %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f8 = nvvm.fadd %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz} : f32, f32 -> f32
+ %f9 = nvvm.fadd %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f10 = nvvm.fadd %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32, f32 -> f32
+ %f11 = nvvm.fadd %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f12 = nvvm.fadd %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz} : f32, f32 -> f32
+ %f13 = nvvm.fadd %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz} : f32, f32 -> f32
+ %f14 = nvvm.fadd %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32, f32 -> f32
+ %f15 = nvvm.fadd %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32, f32 -> f32
+ %f16 = nvvm.fadd %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz} : f32, f32 -> f32
+ %f17 = nvvm.fadd %f16, %f16 {rnd...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/179162
More information about the Mlir-commits
mailing list