[Mlir-commits] [mlir] 79d8a34 - [mlir] add some FP classification ops and their lowering to libdevice (#127322)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Feb 16 05:51:24 PST 2025
Author: Oleksandr "Alex" Zinenko
Date: 2025-02-16T14:51:21+01:00
New Revision: 79d8a34bc5c0a261b9e9f77c1d4480ef135481ac
URL: https://github.com/llvm/llvm-project/commit/79d8a34bc5c0a261b9e9f77c1d4480ef135481ac
DIFF: https://github.com/llvm/llvm-project/commit/79d8a34bc5c0a261b9e9f77c1d4480ef135481ac.diff
LOG: [mlir] add some FP classification ops and their lowering to libdevice (#127322)
Introduce a subset of floating point classification ops to the Math
dialect. These ops mirror functions provided by the C math library and,
similarly to the existing `math.copysign`, belong to the math dialect.
Add a lowering of those ops to Nvidia libdevice calls when possible as
the first mechanism to exercise them.
Added:
Modified:
mlir/include/mlir/Dialect/Math/IR/MathOps.td
mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
mlir/lib/Dialect/Math/IR/MathOps.cpp
mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
mlir/test/Dialect/Math/ops.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td
index 5990a9f0d2e44..8a277320e2f91 100644
--- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td
+++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td
@@ -34,6 +34,23 @@ class Math_IntegerUnaryOp<string mnemonic, list<Trait> traits = []> :
let assemblyFormat = "$operand attr-dict `:` type($result)";
}
+// Base class for floating point classification ops. Require an operand and
+// result of the same shape, which can be a floating point scalar, a vector or a
+// tensor thereof.
+class Math_FloatClassificationOp<string mnemonic, list<Trait> traits = []> :
+ Math_Op<mnemonic,
+ traits # [DeclareOpInterfaceMethods<ArithFastMathInterface>,
+ TypesMatchWith<
+ "result type has i1 element type and same shape as operands",
+ "operand", "result", "::getI1SameShape($_self)">]> {
+ let arguments = (ins FloatLike:$operand,
+ DefaultValuedAttr<Arith_FastMathAttr,
+ "::mlir::arith::FastMathFlags::none">:$fastmath);
+ let results = (outs BoolLike:$result);
+
+ let assemblyFormat = "$operand attr-dict `:` type($operand)";
+}
+
// Base class for unary math operations on floating point types. Require an
// operand and result of the same type. This type can be a floating point type,
// vector or tensor thereof.
@@ -678,6 +695,79 @@ def Math_IPowIOp : Math_IntegerBinaryOp<"ipowi"> {
let hasFolder = 1;
}
+//===----------------------------------------------------------------------===//
+// IsFiniteOp
+//===----------------------------------------------------------------------===//
+
+def Math_IsFiniteOp : Math_FloatClassificationOp<"isfinite"> {
+ let summary = "returns true if the operand classifies as finite";
+ let description = [{
+ Determines if the given floating-point number has finite value i.e. it
+ is normal, subnormal or zero, but not infinite or NaN.
+
+ Example:
+
+ ```mlir
+ %f = math.isfinite %a : f32
+ ```
+ }];
+}
+
+//===----------------------------------------------------------------------===//
+// IsInfOp
+//===----------------------------------------------------------------------===//
+
+def Math_IsInfOp : Math_FloatClassificationOp<"isinf"> {
+ let summary = "returns true if the operand classifies as infinite";
+ let description = [{
+ Determines if the given floating-point number is positive or negative
+ infinity.
+
+ Example:
+
+ ```mlir
+ %f = math.isinf %a : f32
+ ```
+ }];
+}
+
+//===----------------------------------------------------------------------===//
+// IsNaNOp
+//===----------------------------------------------------------------------===//
+
+def Math_IsNaNOp : Math_FloatClassificationOp<"isnan"> {
+ let summary = "returns true if the operand classifies as NaN";
+ let description = [{
+ Determines if the given floating-point number is a not-a-number (NaN)
+ value.
+
+ Example:
+
+ ```mlir
+ %f = math.isnan %a : f32
+ ```
+ }];
+}
+
+
+//===----------------------------------------------------------------------===//
+// IsNormalOp
+//===----------------------------------------------------------------------===//
+
+def Math_IsNormalOp : Math_FloatClassificationOp<"isnormal"> {
+ let summary = "returns true if the operand classifies as normal";
+ let description = [{
+ Determines if the given floating-point number is normal, i.e. is neither
+ zero, subnormal, infinite, nor NaN.
+
+ Example:
+
+ ```mlir
+ %f = math.isnormal %a : f32
+ ```
+ }];
+}
+
//===----------------------------------------------------------------------===//
// LogOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
index 9f7ceb11752ba..0bc2f697a7662 100644
--- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
@@ -71,11 +71,13 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
std::is_base_of<OpTrait::OneResult<SourceOp>, SourceOp>::value,
"expected single result op");
+ bool isResultBool = op->getResultTypes().front().isInteger(1);
if constexpr (!std::is_base_of<OpTrait::SameOperandsAndResultType<SourceOp>,
SourceOp>::value) {
assert(op->getNumOperands() > 0 &&
"expected op to take at least one operand");
- assert(op->getResultTypes().front() == op->getOperand(0).getType() &&
+ assert((op->getResultTypes().front() == op->getOperand(0).getType() ||
+ isResultBool) &&
"expected op with same operand and result types");
}
@@ -88,10 +90,13 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
for (Value operand : adaptor.getOperands())
castedOperands.push_back(maybeCast(operand, rewriter));
- Type resultType = castedOperands.front().getType();
+ Type castedOperandType = castedOperands.front().getType();
+
+ // At ABI level, booleans are treated as i32.
+ Type resultType =
+ isResultBool ? rewriter.getIntegerType(32) : castedOperandType;
Type funcType = getFunctionType(resultType, castedOperands);
- StringRef funcName = getFunctionName(
- cast<LLVM::LLVMFunctionType>(funcType).getReturnType(), op);
+ StringRef funcName = getFunctionName(castedOperandType, op);
if (funcName.empty())
return failure();
@@ -104,6 +109,20 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
return success();
}
+ // Boolean result are mapping to i32 at the ABI level with zero values being
+ // interpreted as false and non-zero values being interpreted as true. Since
+ // there is no guarantee of a specific value being used to indicate true,
+ // compare for inequality with zero (rather than truncate or shift).
+ if (isResultBool) {
+ Value zero = rewriter.create<LLVM::ConstantOp>(
+ op->getLoc(), rewriter.getIntegerType(32),
+ rewriter.getI32IntegerAttr(0));
+ Value truncated = rewriter.create<LLVM::ICmpOp>(
+ op->getLoc(), LLVM::ICmpPredicate::ne, callOp.getResult(), zero);
+ rewriter.replaceOp(op, {truncated});
+ return success();
+ }
+
assert(callOp.getResult().getType().isF32() &&
"only f32 types are supposed to be truncated back");
Value truncated = rewriter.create<LLVM::FPTruncOp>(
@@ -118,7 +137,7 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
if (!isa<Float16Type, BFloat16Type>(type))
return operand;
- // if there's a f16 function, no need to cast f16 values
+ // If there's an f16 function, no need to cast f16 values.
if (!f16Func.empty() && isa<Float16Type>(type))
return operand;
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index c1a4d31861d3b..9290279112715 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -595,6 +595,13 @@ void mlir::populateGpuToNVVMConversionPatterns(
populateOpPatterns<math::FloorOp>(converter, patterns, "__nv_floorf",
"__nv_floor");
populateOpPatterns<math::FmaOp>(converter, patterns, "__nv_fmaf", "__nv_fma");
+ // Note: libdevice does not provide `__nv_isfinitef` as of moment of writing.
+ populateOpPatterns<math::IsFiniteOp>(converter, patterns, "",
+ "__nv_isfinited");
+ populateOpPatterns<math::IsInfOp>(converter, patterns, "__nv_isinff",
+ "__nv_isinfd");
+ populateOpPatterns<math::IsNaNOp>(converter, patterns, "__nv_isnanf",
+ "__nv_isnand");
populateOpPatterns<math::LogOp>(converter, patterns, "__nv_logf", "__nv_log",
"__nv_fast_logf");
populateOpPatterns<math::Log10Op>(converter, patterns, "__nv_log10f",
diff --git a/mlir/lib/Dialect/Math/IR/MathOps.cpp b/mlir/lib/Dialect/Math/IR/MathOps.cpp
index 1690585e78c5d..42e357c012739 100644
--- a/mlir/lib/Dialect/Math/IR/MathOps.cpp
+++ b/mlir/lib/Dialect/Math/IR/MathOps.cpp
@@ -16,6 +16,20 @@
using namespace mlir;
using namespace mlir::math;
+//===----------------------------------------------------------------------===//
+// Common helpers
+//===----------------------------------------------------------------------===//
+
+/// Return the type of the same shape (scalar, vector or tensor) containing i1.
+static Type getI1SameShape(Type type) {
+ auto i1Type = IntegerType::get(type.getContext(), 1);
+ if (auto shapedType = llvm::dyn_cast<ShapedType>(type))
+ return shapedType.cloneWith(std::nullopt, i1Type);
+ if (llvm::isa<UnrankedTensorType>(type))
+ return UnrankedTensorType::get(i1Type);
+ return i1Type;
+}
+
//===----------------------------------------------------------------------===//
// TableGen'd op method definitions
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 9f74e0c7947e6..664a0bb0c0d5b 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1058,3 +1058,40 @@ gpu.module @test_module_53 {
func.return %result32, %result64 : f32, f64
}
}
+
+gpu.module @test_module_54 {
+ // CHECK: llvm.func @__nv_isinff(f32) -> i32
+ // CHECK: llvm.func @__nv_isinfd(f64) -> i32
+ // CHECK: llvm.func @__nv_isnanf(f32) -> i32
+ // CHECK: llvm.func @__nv_isnand(f64) -> i32
+ // CHECK: llvm.func @__nv_isfinited(f64) -> i32
+ // CHECK-LABEL: @fpclassify
+ func.func @fpclassify(%f32: f32, %f64: f64) -> (i1, i1, i1, i1, i1, i1) {
+ // CHECK: %[[INFF:.+]] = llvm.call @__nv_isinff(%{{.*}}) : (f32) -> i32
+ // CHECK: %[[ZERO:.+]] = llvm.mlir.constant(0 : i32) : i32
+ // CHECK: %[[R0:.+]] = llvm.icmp "ne" %[[INFF]], %[[ZERO]]
+ %0 = math.isinf %f32 : f32
+ // CHECK: llvm.call @__nv_isinfd(%{{.*}}) : (f64) -> i32
+ // CHECK: llvm.mlir.constant(0
+ // CHECK: llvm.icmp "ne"
+ %1 = math.isinf %f64 : f64
+ // CHECK: llvm.call @__nv_isnanf(%{{.*}}) : (f32) -> i32
+ // CHECK: llvm.mlir.constant(0
+ // CHECK: llvm.icmp "ne"
+ %2 = math.isnan %f32 : f32
+ // CHECK: llvm.call @__nv_isnand(%{{.*}}) : (f64) -> i32
+ // CHECK: llvm.mlir.constant(0
+ // CHECK: llvm.icmp "ne"
+ %3 = math.isnan %f64 : f64
+ // Note: for some reason, libdevice does not provide isfinite for f32, so
+ // this should fail to convert.
+ // CHECK: math.isfinite {{.*}} : f32
+ %4 = math.isfinite %f32 : f32
+ // CHECK: llvm.call @__nv_isfinited(%{{.*}}) : (f64) -> i32
+ // CHECK: llvm.mlir.constant(0
+ // CHECK: llvm.icmp "ne"
+ %5 = math.isfinite %f64 : f64
+ // CHECK: llvm.return %[[R0]]
+ return %0, %1, %2, %3, %4, %5 : i1, i1, i1, i1, i1, i1
+ }
+}
diff --git a/mlir/test/Dialect/Math/ops.mlir b/mlir/test/Dialect/Math/ops.mlir
index 7e45d9bc6f74a..8feadedd1860e 100644
--- a/mlir/test/Dialect/Math/ops.mlir
+++ b/mlir/test/Dialect/Math/ops.mlir
@@ -298,3 +298,42 @@ func.func @fastmath(%f: f32, %i: i32, %v: vector<4xf32>, %t: tensor<4x4x?xf32>)
%4 = math.fpowi %f, %i fastmath<fast> : f32, i32
return
}
+
+// CHECK-LABEL: func @fpclassify(
+// CHECK-SAME: %[[F:.+]]: f32, %[[D:.+]]: f64,
+// CHECK-SAME: %[[V:.+]]: vector<4xf32>, %[[T:.+]]: tensor<4x?xf32>
+func.func @fpclassify(%f: f32, %d: f64, %v: vector<4xf32>, %t: tensor<4x?xf32>) {
+ // CHECK: math.isfinite %[[F]] : f32
+ // CHECK: math.isfinite %[[D]] : f64
+ // CHECK: math.isfinite %[[V]] : vector<4xf32>
+ // CHECK: math.isfinite %[[T]] : tensor<4x?xf32>
+ math.isfinite %f : f32
+ math.isfinite %d : f64
+ math.isfinite %v : vector<4xf32>
+ math.isfinite %t : tensor<4x?xf32>
+ // CHECK: math.isinf %[[F]] : f32
+ // CHECK: math.isinf %[[D]] : f64
+ // CHECK: math.isinf %[[V]] : vector<4xf32>
+ // CHECK: math.isinf %[[T]] : tensor<4x?xf32>
+ math.isinf %f : f32
+ math.isinf %d : f64
+ math.isinf %v : vector<4xf32>
+ math.isinf %t : tensor<4x?xf32>
+ // CHECK: math.isnan %[[F]] : f32
+ // CHECK: math.isnan %[[D]] : f64
+ // CHECK: math.isnan %[[V]] : vector<4xf32>
+ // CHECK: math.isnan %[[T]] : tensor<4x?xf32>
+ math.isnan %f : f32
+ math.isnan %d : f64
+ math.isnan %v : vector<4xf32>
+ math.isnan %t : tensor<4x?xf32>
+ // CHECK: math.isnormal %[[F]] : f32
+ // CHECK: math.isnormal %[[D]] : f64
+ // CHECK: math.isnormal %[[V]] : vector<4xf32>
+ // CHECK: math.isnormal %[[T]] : tensor<4x?xf32>
+ math.isnormal %f : f32
+ math.isnormal %d : f64
+ math.isnormal %v : vector<4xf32>
+ math.isnormal %t : tensor<4x?xf32>
+ return
+}
More information about the Mlir-commits
mailing list