[Mlir-commits] [mlir] [MLIR][NVVM] Add LLVMIR lowering for nvvm.subf (PR #184968)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Mar 6 01:17:11 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Srinivasa Ravi (Wolfram70)
<details>
<summary>Changes</summary>
This change adds direct LLVMIR lowering to the `nvvm.subf` operation added in #<!-- -->179162 to prevent translation failures when canonicalization is not run. Also adds `mlir-translate` tests for `nvvm.subf`.
PTX ISA Reference:
1. https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-sub
2. https://docs.nvidia.com/cuda/parallel-thread-execution/#half-precision-floating-point-instructions-sub
---
Patch is 40.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184968.diff
7 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+14-2)
- (modified) mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp (+20-15)
- (modified) mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp (+13-19)
- (modified) mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir (+7-7)
- (added) mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir (+117)
- (added) mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir (+67)
- (added) mlir/test/Target/LLVMIR/nvvm/subf/subf_vector.mlir (+313)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 43c7b3df73efe..f8e1ab38e80d4 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -6341,11 +6341,15 @@ def NVVM_AddFOp : NVVM_FloatBinaryOp<"addf", [Commutative]> {
let hasVerifier = 1;
let extraClassDeclaration = [{
+ // Shared lowering for nvvm.addf and nvvm.subf (via fneg+add)
static void lowerAddFToLLVMIR(
- Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+ llvm::Value *lhs, llvm::Value *rhs, Value res,
+ NVVM::FPRoundingMode rnd, NVVM::SaturationMode sat, bool ftz,
+ LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
}];
let llvmBuilder = [{
- NVVM::AddFOp::lowerAddFToLLVMIR(*op, moduleTranslation, builder);
+ NVVM::AddFOp::lowerAddFToLLVMIR($lhs, $rhs, op.getRes(), $rnd, $sat, $ftz,
+ moduleTranslation, builder);
}];
}
@@ -6366,6 +6370,14 @@ def NVVM_SubFOp : NVVM_FloatBinaryOp<"subf"> {
}];
let hasCanonicalizer = 1;
+ let hasVerifier = 1;
+
+ let llvmBuilder = [{
+ // sub(a, b) = add(a, -b)
+ llvm::Value *rhs = builder.CreateFNeg($rhs);
+ NVVM::AddFOp::lowerAddFToLLVMIR($lhs, rhs, op.getRes(), $rnd, $sat, $ftz,
+ moduleTranslation, builder);
+ }];
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 354a02f4a2aae..6ccd59cec65bc 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -3059,33 +3059,34 @@ LogicalResult NVVM::TensormapReplaceOp::verify() {
return success();
}
-LogicalResult NVVM::AddFOp::verify() {
- mlir::NVVM::FPRoundingMode rndMode = getRnd();
- mlir::NVVM::SaturationMode satMode = getSat();
- bool isFTZ = getFtz();
+template <typename OpType>
+static LogicalResult verifyAddSubFOp(OpType op) {
+ mlir::NVVM::FPRoundingMode rndMode = op.getRnd();
+ mlir::NVVM::SaturationMode satMode = op.getSat();
+ bool isFTZ = op.getFtz();
- mlir::Type opType = getRes().getType();
+ mlir::Type opType = op.getRes().getType();
mlir::Type opBaseType = isa<VectorType>(opType)
? cast<VectorType>(opType).getElementType()
: opType;
if (opBaseType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ))
- return emitOpError("FTZ and saturation are not supported for additions "
- "involving f64 type");
+ return op.emitOpError("FTZ and saturation are not supported for "
+ "additions/subtractions involving f64 type");
if (opBaseType.isF16() && !(rndMode == NVVM::FPRoundingMode::RN ||
rndMode == NVVM::FPRoundingMode::NONE))
- return emitOpError("only RN rounding mode is supported for f16 and "
- "vector<2xf16> additions");
+ return op.emitOpError("only RN rounding mode is supported for f16 and "
+ "vector<2xf16> additions/subtractions");
if (opBaseType.isBF16()) {
if (rndMode != NVVM::FPRoundingMode::RN &&
rndMode != NVVM::FPRoundingMode::NONE)
- return emitOpError("only RN rounding mode is supported for bf16 and "
- "vector<2xbf16> additions");
+ return op.emitOpError("only RN rounding mode is supported for bf16 and "
+ "vector<2xbf16> additions/subtractions");
if (satMode != NVVM::SaturationMode::NONE || isFTZ)
- return emitOpError("FTZ and saturation are not supported for bf16 and "
- "vector<2xbf16> additions");
+ return op.emitOpError("FTZ and saturation are not supported for bf16 and "
+ "vector<2xbf16> additions/subtractions");
}
// FIXME: This is a temporary check disallowing lowering to add.rn.ftz.f16(x2)
@@ -3093,12 +3094,16 @@ LogicalResult NVVM::AddFOp::verify() {
// should be removed once the intrinsics for f16 addition (with FTZ only) are
// available.
if (opBaseType.isF16() && isFTZ && satMode == NVVM::SaturationMode::NONE)
- return emitOpError("FTZ with no saturation is not supported for f16 and "
- "vector<2xf16> additions");
+ return op.emitOpError("FTZ with no saturation is not supported for f16 and "
+ "vector<2xf16> additions/subtractions");
return success();
}
+LogicalResult NVVM::AddFOp::verify() { return verifyAddSubFOp<AddFOp>(*this); }
+
+LogicalResult NVVM::SubFOp::verify() { return verifyAddSubFOp<SubFOp>(*this); }
+
/// Packs the given `field` into the `result`.
/// The `result` is 64-bits and each `field` can be 32-bits or narrower.
static llvm::Value *
diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
index 02067bb456b25..092643f408ce6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp
@@ -446,21 +446,15 @@ getFenceProxySyncRestrictID(NVVM::MemOrderKind order) {
nvvm_fence_proxy_async_generic_release_sync_restrict_space_cta_scope_cluster;
}
-void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
+void NVVM::AddFOp::lowerAddFToLLVMIR(llvm::Value *argLHS, llvm::Value *argRHS,
+ Value res, NVVM::FPRoundingMode rndMode,
+ NVVM::SaturationMode satMode, bool isFTZ,
+ LLVM::ModuleTranslation &mt,
llvm::IRBuilderBase &builder) {
- auto thisOp = cast<NVVM::AddFOp>(op);
- NVVM::FPRoundingMode rndMode = thisOp.getRnd();
- NVVM::SaturationMode satMode = thisOp.getSat();
- bool isFTZ = thisOp.getFtz();
+ llvm::Type *opTypeLLVM = argLHS->getType();
+ bool isVectorOp = opTypeLLVM->isVectorTy();
bool isSat = satMode != NVVM::SaturationMode::NONE;
- llvm::Value *argLHS = mt.lookupValue(thisOp.getLhs());
- llvm::Value *argRHS = mt.lookupValue(thisOp.getRhs());
-
- mlir::Type opType = thisOp.getLhs().getType();
- llvm::Type *opTypeLLVM = mt.convertType(opType);
- bool isVectorAdd = opTypeLLVM->isVectorTy();
-
// FIXME: Add intrinsics for add.rn.ftz.f16x2 and add.rn.ftz.f16 here when
// they are available.
static constexpr llvm::Intrinsic::ID f16IDs[] = {
@@ -507,8 +501,8 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
return createIntrinsicCall(builder, IID, callArgs);
};
- if (isVectorAdd && (opTypeLLVM->getScalarType()->isFloatTy() ||
- opTypeLLVM->getScalarType()->isDoubleTy())) {
+ if (isVectorOp && (opTypeLLVM->getScalarType()->isFloatTy() ||
+ opTypeLLVM->getScalarType()->isDoubleTy())) {
llvm::Value *result = llvm::PoisonValue::get(
llvm::FixedVectorType::get(opTypeLLVM->getScalarType(), 2));
for (int64_t i = 0; i < 2; ++i) {
@@ -531,25 +525,25 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
if (opTypeLLVM->getScalarType()->isHalfTy()) {
llvm::Value *result;
if (isSat) {
- unsigned index = (isVectorAdd << 1) | isFTZ;
+ unsigned index = (isVectorOp << 1) | isFTZ;
result = addIntrinsic(f16IDs[index]);
} else {
result = builder.CreateFAdd(argLHS, argRHS);
}
- mt.mapValue(thisOp.getRes(), result);
+ mt.mapValue(res, result);
return;
}
// bf16 + bf16 -> bf16 / vector<2xbf16> + vector<2xbf16> -> vector<2xbf16>
if (opTypeLLVM->getScalarType()->isBFloatTy()) {
- mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS));
+ mt.mapValue(res, builder.CreateFAdd(argLHS, argRHS));
return;
}
// f64 + f64 -> f64 / vector<2xf64> + vector<2xf64> -> vector<2xf64>
if (opTypeLLVM->getScalarType()->isDoubleTy()) {
unsigned index = static_cast<unsigned>(rndMode);
- mt.mapValue(thisOp.getRes(), addIntrinsic(f64IDs[index]));
+ mt.mapValue(res, addIntrinsic(f64IDs[index]));
return;
}
@@ -558,7 +552,7 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt,
if (opTypeLLVM->getScalarType()->isFloatTy()) {
unsigned index =
((isFTZ << 1) | isSat) * numRndModes + static_cast<unsigned>(rndMode);
- mt.mapValue(thisOp.getRes(), addIntrinsic(f32IDs[index]));
+ mt.mapValue(res, addIntrinsic(f32IDs[index]));
return;
}
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
index b0b162357fe2f..23ba79ee3d8af 100644
--- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir
@@ -11,7 +11,7 @@ llvm.func @addf_invalid_sat_mode(%a : f16, %b : f16) -> f16 {
// -----
llvm.func @addf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
- // expected-error at +1 {{FTZ and saturation are not supported for additions involving f64 type}}
+ // expected-error at +1 {{FTZ and saturation are not supported for additions/subtractions involving f64 type}}
%f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : f64
llvm.return %f1 : f64
}
@@ -19,7 +19,7 @@ llvm.func @addf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
// -----
llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
- // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
+ // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}}
%f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16
llvm.return %f1 : f16
}
@@ -27,7 +27,7 @@ llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
// -----
llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
- // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}}
+ // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}}
%f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xf16>
llvm.return %f1 : vector<2xf16>
}
@@ -35,7 +35,7 @@ llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -
// -----
llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 {
- // expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
+ // expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}}
%f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : bf16
llvm.return %f1 : bf16
}
@@ -43,7 +43,7 @@ llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 {
// -----
llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> {
- // expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}}
+ // expected-error at +1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}}
%f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : vector<2xbf16>
llvm.return %f1 : vector<2xbf16>
}
@@ -51,7 +51,7 @@ llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>
// -----
llvm.func @addf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 {
- // expected-error at +1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions}}
+ // expected-error at +1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions/subtractions}}
%f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : bf16
llvm.return %f1 : bf16
}
@@ -61,7 +61,7 @@ llvm.func @addf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 {
// FIXME: Remove this test once intrinsics for f16 addition (with FTZ only) are
// available.
llvm.func @addf_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 {
- // expected-error at +1 {{FTZ with no saturation is not supported for f16 and vector<2xf16> additions}}
+ // expected-error at +1 {{FTZ with no saturation is not supported for f16 and vector<2xf16> additions/subtractions}}
%f1 = nvvm.addf %a, %b {ftz=true} : f16
llvm.return %f1 : f16
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir b/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir
new file mode 100644
index 0000000000000..e21bcfb42023d
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir
@@ -0,0 +1,117 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// f16 - f16 -> f16
+llvm.func @fsub_f16_f16(%a : f16, %b : f16) -> f16 {
+ // CHECK-LABEL: define half @fsub_f16_f16(half %0, half %1) {
+ // CHECK-NEXT: %3 = fneg half %1
+ // CHECK-NEXT: %4 = fadd half %0, %3
+ // CHECK-NEXT: %5 = fneg half %4
+ // CHECK-NEXT: %6 = fadd half %4, %5
+ // CHECK-NEXT: %7 = fneg half %6
+ // CHECK-NEXT: %8 = call half @llvm.nvvm.add.rn.sat.f16(half %6, half %7)
+ // CHECK-NEXT: %9 = fneg half %8
+ // CHECK-NEXT: %10 = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %8, half %9)
+ // CHECK-NEXT: ret half %10
+ // CHECK-NEXT: }
+ %f1 = nvvm.subf %a, %b : f16
+ %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f16
+ %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f16
+ %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f16
+ llvm.return %f4 : f16
+}
+
+// bf16 - bf16 -> bf16
+llvm.func @fsub_bf16_bf16(%a : bf16, %b : bf16) -> bf16 {
+ // CHECK-LABEL: define bfloat @fsub_bf16_bf16(bfloat %0, bfloat %1) {
+ // CHECK-NEXT: %3 = fneg bfloat %1
+ // CHECK-NEXT: %4 = fadd bfloat %0, %3
+ // CHECK-NEXT: %5 = fneg bfloat %4
+ // CHECK-NEXT: %6 = fadd bfloat %4, %5
+ // CHECK-NEXT: ret bfloat %6
+ // CHECK-NEXT: }
+ %f1 = nvvm.subf %a, %b : bf16
+ %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : bf16
+ llvm.return %f2 : bf16
+}
+
+// f32 - f32 -> f32
+llvm.func @fsub_f32_f32(%a : f32, %b : f32) -> f32 {
+ // CHECK-LABEL: define float @fsub_f32_f32(float %0, float %1) {
+ // CHECK-NEXT: %3 = fneg float %1
+ // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %0, float %3)
+ // CHECK-NEXT: %5 = fneg float %4
+ // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %4, float %5)
+ // CHECK-NEXT: %7 = fneg float %6
+ // CHECK-NEXT: %8 = call float @llvm.nvvm.add.rn.sat.f(float %6, float %7)
+ // CHECK-NEXT: %9 = fneg float %8
+ // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rn.ftz.f(float %8, float %9)
+ // CHECK-NEXT: %11 = fneg float %10
+ // CHECK-NEXT: %12 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %10, float %11)
+ // CHECK-NEXT: %13 = fneg float %12
+ // CHECK-NEXT: %14 = call float @llvm.nvvm.add.rm.f(float %12, float %13)
+ // CHECK-NEXT: %15 = fneg float %14
+ // CHECK-NEXT: %16 = call float @llvm.nvvm.add.rm.sat.f(float %14, float %15)
+ // CHECK-NEXT: %17 = fneg float %16
+ // CHECK-NEXT: %18 = call float @llvm.nvvm.add.rm.ftz.f(float %16, float %17)
+ // CHECK-NEXT: %19 = fneg float %18
+ // CHECK-NEXT: %20 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %18, float %19)
+ // CHECK-NEXT: %21 = fneg float %20
+ // CHECK-NEXT: %22 = call float @llvm.nvvm.add.rp.f(float %20, float %21)
+ // CHECK-NEXT: %23 = fneg float %22
+ // CHECK-NEXT: %24 = call float @llvm.nvvm.add.rp.sat.f(float %22, float %23)
+ // CHECK-NEXT: %25 = fneg float %24
+ // CHECK-NEXT: %26 = call float @llvm.nvvm.add.rp.ftz.f(float %24, float %25)
+ // CHECK-NEXT: %27 = fneg float %26
+ // CHECK-NEXT: %28 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %26, float %27)
+ // CHECK-NEXT: %29 = fneg float %28
+ // CHECK-NEXT: %30 = call float @llvm.nvvm.add.rz.f(float %28, float %29)
+ // CHECK-NEXT: %31 = fneg float %30
+ // CHECK-NEXT: %32 = call float @llvm.nvvm.add.rz.sat.f(float %30, float %31)
+ // CHECK-NEXT: %33 = fneg float %32
+ // CHECK-NEXT: %34 = call float @llvm.nvvm.add.rz.ftz.f(float %32, float %33)
+ // CHECK-NEXT: %35 = fneg float %34
+ // CHECK-NEXT: %36 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %34, float %35)
+ // CHECK-NEXT: ret float %36
+ // CHECK-NEXT: }
+ %f1 = nvvm.subf %a, %b : f32
+ %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f32
+ %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>} : f32
+ %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rn>, ftz=true} : f32
+ %f5 = nvvm.subf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32
+ %f6 = nvvm.subf %f5, %f5 {rnd = #nvvm.fp_rnd_mode<rm>} : f32
+ %f7 = nvvm.subf %f6, %f6 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>} : f32
+ %f8 = nvvm.subf %f7, %f7 {rnd = #nvvm.fp_rnd_mode<rm>, ftz=true} : f32
+ %f9 = nvvm.subf %f8, %f8 {rnd = #nvvm.fp_rnd_mode<rm>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32
+ %f10 = nvvm.subf %f9, %f9 {rnd = #nvvm.fp_rnd_mode<rp>} : f32
+ %f11 = nvvm.subf %f10, %f10 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>} : f32
+ %f12 = nvvm.subf %f11, %f11 {rnd = #nvvm.fp_rnd_mode<rp>, ftz=true} : f32
+ %f13 = nvvm.subf %f12, %f12 {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32
+ %f14 = nvvm.subf %f13, %f13 {rnd = #nvvm.fp_rnd_mode<rz>} : f32
+ %f15 = nvvm.subf %f14, %f14 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>} : f32
+ %f16 = nvvm.subf %f15, %f15 {rnd = #nvvm.fp_rnd_mode<rz>, ftz=true} : f32
+ %f17 = nvvm.subf %f16, %f16 {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<sat>, ftz=true} : f32
+ llvm.return %f17 : f32
+}
+
+// f64 - f64 -> f64
+llvm.func @fsub_f64_f64(%a : f64, %b : f64) -> f64 {
+ // CHECK-LABEL: define double @fsub_f64_f64(double %0, double %1) {
+ // CHECK-NEXT: %3 = fneg double %1
+ // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %0, double %3)
+ // CHECK-NEXT: %5 = fneg double %4
+ // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %4, double %5)
+ // CHECK-NEXT: %7 = fneg double %6
+ // CHECK-NEXT: %8 = call double @llvm.nvvm.add.rm.d(double %6, double %7)
+ // CHECK-NEXT: %9 = fneg double %8
+ // CHECK-NEXT: %10 = call double @llvm.nvvm.add.rp.d(double %8, double %9)
+ // CHECK-NEXT: %11 = fneg double %10
+ // CHECK-NEXT: %12 = call double @llvm.nvvm.add.rz.d(double %10, double %11)
+ // CHECK-NEXT: ret double %12
+ // CHECK-NEXT: }
+ %f1 = nvvm.subf %a, %b : f64
+ %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode<rn>} : f64
+ %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode<rm>} : f64
+ %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode<rp>} : f64
+ %f5 = nvvm.subf %f4, %f4 {rnd = #nvvm.fp_rnd_mode<rz>} : f64
+ llvm.return %f5 : f64
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir
new file mode 100644
index 0000000000000..bf4bbd19cb396
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-translate --mlir-to-llvmir --split-input-file --verify-diagnostics %s
+
+// -----
+
+llvm.func @subf_invalid_sat_mode(%a : f16, %b : f16) -> f16 {
+ // expected-error at +1 {{ attribute 'sat' failed to satisfy constraint: Describes the saturation mode whose value is one of {none, sat}}}
+ %f1 = nvvm.subf %a, %b {sat = #nvvm.sat_mode<satfinite>} : f16
+ llvm.return %f1 : f16
+}
+
+// -----
+
+llvm.func @subf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 {
+ // expected-error at +1 {{FTZ and saturation are not supported for additions/subtractions involving f64 type}}
+ %f1 = nvvm.subf %a, %b {sat = #nvvm.sat_mode<sat>, ftz=true} : f64
+ llvm.return %f1 : f64
+}
+
+// -----
+
+llvm.func @subf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 {
+ // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}}
+ %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode<rm>} : f16
+ llvm.return %f1 : f16
+}
+
+// -----
+
+llvm.func @subf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> {
+ // expected-error at +1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}}
+ %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/184968
More information about the Mlir-commits
mailing list