[Mlir-commits] [mlir] 3a3732c - [mlir][arith] wide integer emulation support for fpto*i ops (#132375)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Mar 27 17:58:59 PDT 2025
Author: egebeysel
Date: 2025-03-27T20:58:56-04:00
New Revision: 3a3732c2527056342051068c3dbaef4e822bd0a6
URL: https://github.com/llvm/llvm-project/commit/3a3732c2527056342051068c3dbaef4e822bd0a6
DIFF: https://github.com/llvm/llvm-project/commit/3a3732c2527056342051068c3dbaef4e822bd0a6.diff
LOG: [mlir][arith] wide integer emulation support for fpto*i ops (#132375)
Adding wide integer emulation support for `arith.fpto*i` operations. As
the other emulated operations, the upper and lower `N` bits of the `i2N`
integer result are emitted separately.
For the unsigned case we use the following emulation
```c
// example is 64 -> 32 bit emulation, but the implementation is generalized to any 2N -> N case
const double TWO_POW_N = (uint_64_t(1) << N); // 2^N, N is the bitwidth of the widest int supported
// f is a floating-point value representing the input of the fptoui op.
uint32_t hi = (uint32_t)(f / TWO_POW_N); // Truncates the division result
uint32_t lo = (uint32_t)(f - hi * TWO_POW_N); // Subtracts to get the lower bits.
```
For the signed case, we defer the emulation of the absolute value to
`fptoui` and handle the sign:
```
fptosi(fp) = sign(fp) * fptoui(abs(fp))
```
The edge cases of `NaNs, +-inf` and overflows/underflows are undefined
behaviour and the resulting numbers are the combination of the lower
bitwidth UB values. These operations also propagate poison values.
Signed-off-by: Ege Beysel <beysel at roofline.ai>
Added:
mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-fptosi-i64.mlir
mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-fptoui-i64.mlir
Modified:
mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp
mlir/test/Dialect/Arith/emulate-wide-int.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp
index 3226b5d99114a..22babe8123617 100644
--- a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp
+++ b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp
@@ -17,6 +17,7 @@
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Transforms/DialectConversion.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
@@ -1008,6 +1009,128 @@ struct ConvertUIToFP final : OpConversionPattern<arith::UIToFPOp> {
}
};
+//===----------------------------------------------------------------------===//
+// ConvertFPToSI
+//===----------------------------------------------------------------------===//
+
+struct ConvertFPToSI final : OpConversionPattern<arith::FPToSIOp> {
+ using OpConversionPattern::OpConversionPattern;
+
+ LogicalResult
+ matchAndRewrite(arith::FPToSIOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ Location loc = op.getLoc();
+ // Get the input float type.
+ Value inFp = adaptor.getIn();
+ Type fpTy = inFp.getType();
+
+ Type intTy = op.getType();
+
+ auto newTy = getTypeConverter()->convertType<VectorType>(intTy);
+ if (!newTy)
+ return rewriter.notifyMatchFailure(
+ loc, llvm::formatv("unsupported type: {}", intTy));
+
+ // Work on the absolute value and then convert the result to signed integer.
+ // Defer absolute value to fptoui. If minSInt < fp < maxSInt, i.e. if the fp
+ // is representable in signed i2N, emits the correct result. Else, the
+ // result is UB.
+
+ TypedAttr zeroAttr = rewriter.getZeroAttr(fpTy);
+ Value zeroCst = rewriter.create<arith::ConstantOp>(loc, zeroAttr);
+ Value zeroCstInt = createScalarOrSplatConstant(rewriter, loc, intTy, 0);
+
+ // Get the absolute value. One could have used math.absf here, but that
+ // introduces an extra dependency.
+ Value isNeg = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OLT,
+ inFp, zeroCst);
+ Value negInFp = rewriter.create<arith::NegFOp>(loc, inFp);
+
+ Value absVal = rewriter.create<arith::SelectOp>(loc, isNeg, negInFp, inFp);
+
+ // Defer the absolute value to fptoui.
+ Value res = rewriter.create<arith::FPToUIOp>(loc, intTy, absVal);
+
+ // Negate the value if < 0 .
+ Value neg = rewriter.create<arith::SubIOp>(loc, zeroCstInt, res);
+
+ rewriter.replaceOpWithNewOp<arith::SelectOp>(op, isNeg, neg, res);
+ return success();
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// ConvertFPToUI
+//===----------------------------------------------------------------------===//
+
+struct ConvertFPToUI final : OpConversionPattern<arith::FPToUIOp> {
+ using OpConversionPattern::OpConversionPattern;
+
+ LogicalResult
+ matchAndRewrite(arith::FPToUIOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ Location loc = op.getLoc();
+ // Get the input float type.
+ Value inFp = adaptor.getIn();
+ Type fpTy = inFp.getType();
+
+ Type intTy = op.getType();
+ auto newTy = getTypeConverter()->convertType<VectorType>(intTy);
+ if (!newTy)
+ return rewriter.notifyMatchFailure(
+ loc, llvm::formatv("unsupported type: {}", intTy));
+ unsigned newBitWidth = newTy.getElementTypeBitWidth();
+
+ Type newHalfType = IntegerType::get(inFp.getContext(), newBitWidth);
+ if (auto vecType = dyn_cast<VectorType>(fpTy))
+ newHalfType = VectorType::get(vecType.getShape(), newHalfType);
+
+ // The resulting integer has the upper part and the lower part. This would
+ // be interpreted as 2^N * high + low, where N is the bitwidth. Therefore,
+ // to calculate the higher part, we emit resHigh = fptoui(fp/2^N). For the
+ // lower part, we emit fptoui(fp - resHigh * 2^N). The special cases of
+ // overflows including +-inf, NaNs and negative numbers are UB.
+
+ const llvm::fltSemantics &fSemantics =
+ cast<FloatType>(getElementTypeOrSelf(fpTy)).getFloatSemantics();
+
+ auto powBitwidth = llvm::APFloat(fSemantics);
+ // If the integer does not fit the floating point number, we set the
+ // powBitwidth to inf. This ensures that the upper part is set
+ // correctly to 0. The opStatus inexact here only occurs when we have an
+ // overflow, since the number is always a power of two.
+ if (powBitwidth.convertFromAPInt(APInt(newBitWidth * 2, 1).shl(newBitWidth),
+ false, llvm::RoundingMode::TowardZero) ==
+ llvm::detail::opStatus::opInexact)
+ powBitwidth = llvm::APFloat::getInf(fSemantics);
+
+ TypedAttr powBitwidthAttr =
+ FloatAttr::get(getElementTypeOrSelf(fpTy), powBitwidth);
+ if (auto vecType = dyn_cast<VectorType>(fpTy))
+ powBitwidthAttr = SplatElementsAttr::get(vecType, powBitwidthAttr);
+ Value powBitwidthFloatCst =
+ rewriter.create<arith::ConstantOp>(loc, powBitwidthAttr);
+
+ Value fpDivPowBitwidth =
+ rewriter.create<arith::DivFOp>(loc, inFp, powBitwidthFloatCst);
+ Value resHigh =
+ rewriter.create<arith::FPToUIOp>(loc, newHalfType, fpDivPowBitwidth);
+ // Calculate fp - resHigh * 2^N by getting the remainder of the division
+ Value remainder =
+ rewriter.create<arith::RemFOp>(loc, inFp, powBitwidthFloatCst);
+ Value resLow =
+ rewriter.create<arith::FPToUIOp>(loc, newHalfType, remainder);
+
+ Value high = appendX1Dim(rewriter, loc, resHigh);
+ Value low = appendX1Dim(rewriter, loc, resLow);
+
+ Value resultVec = constructResultVector(rewriter, loc, newTy, {low, high});
+
+ rewriter.replaceOp(op, resultVec);
+ return success();
+ }
+};
+
//===----------------------------------------------------------------------===//
// ConvertTruncI
//===----------------------------------------------------------------------===//
@@ -1184,5 +1307,6 @@ void arith::populateArithWideIntEmulationPatterns(
ConvertIndexCastIntToIndex<arith::IndexCastUIOp>,
ConvertIndexCastIndexToInt<arith::IndexCastOp, arith::ExtSIOp>,
ConvertIndexCastIndexToInt<arith::IndexCastUIOp, arith::ExtUIOp>,
- ConvertSIToFP, ConvertUIToFP>(typeConverter, patterns.getContext());
+ ConvertSIToFP, ConvertUIToFP, ConvertFPToUI, ConvertFPToSI>(
+ typeConverter, patterns.getContext());
}
diff --git a/mlir/test/Dialect/Arith/emulate-wide-int.mlir b/mlir/test/Dialect/Arith/emulate-wide-int.mlir
index 52da80ce26a73..936050cddb676 100644
--- a/mlir/test/Dialect/Arith/emulate-wide-int.mlir
+++ b/mlir/test/Dialect/Arith/emulate-wide-int.mlir
@@ -1046,3 +1046,112 @@ func.func @sitofp_i64_f64_vector(%a : vector<3xi64>) -> vector<3xf64> {
%r = arith.sitofp %a : vector<3xi64> to vector<3xf64>
return %r : vector<3xf64>
}
+
+// CHECK-LABEL: func @fptoui_i64_f64
+// CHECK-SAME: ([[ARG:%.+]]: f64) -> vector<2xi32>
+// CHECK-NEXT: [[POW:%.+]] = arith.constant 0x41F0000000000000 : f64
+// CHECK-NEXT: [[DIV:%.+]] = arith.divf [[ARG]], [[POW]] : f64
+// CHECK-NEXT: [[HIGHHALF:%.+]] = arith.fptoui [[DIV]] : f64 to i32
+// CHECK-NEXT: [[REM:%.+]] = arith.remf [[ARG]], [[POW]] : f64
+// CHECK-NEXT: [[LOWHALF:%.+]] = arith.fptoui [[REM]] : f64 to i32
+// CHECK: %{{.+}} = vector.insert [[LOWHALF]], %{{.+}} [0]
+// CHECK-NEXT: [[RESVEC:%.+]] = vector.insert [[HIGHHALF]], %{{.+}} [1]
+// CHECK: return [[RESVEC]] : vector<2xi32>
+func.func @fptoui_i64_f64(%a : f64) -> i64 {
+ %r = arith.fptoui %a : f64 to i64
+ return %r : i64
+}
+
+// CHECK-LABEL: func @fptoui_i64_f64_vector
+// CHECK-SAME: ([[ARG:%.+]]: vector<3xf64>) -> vector<3x2xi32>
+// CHECK-NEXT: [[POW:%.+]] = arith.constant dense<0x41F0000000000000> : vector<3xf64>
+// CHECK-NEXT: [[DIV:%.+]] = arith.divf [[ARG]], [[POW]] : vector<3xf64>
+// CHECK-NEXT: [[HIGHHALF:%.+]] = arith.fptoui [[DIV]] : vector<3xf64> to vector<3xi32>
+// CHECK-NEXT: [[REM:%.+]] = arith.remf [[ARG]], [[POW]] : vector<3xf64>
+// CHECK-NEXT: [[LOWHALF:%.+]] = arith.fptoui [[REM]] : vector<3xf64> to vector<3xi32>
+// CHECK-DAG: [[HIGHHALFX1:%.+]] = vector.shape_cast [[HIGHHALF]] : vector<3xi32> to vector<3x1xi32>
+// CHECK-DAG: [[LOWHALFX1:%.+]] = vector.shape_cast [[LOWHALF]] : vector<3xi32> to vector<3x1xi32>
+// CHECK: %{{.+}} = vector.insert_strided_slice [[LOWHALFX1]], %{{.+}} {offsets = [0, 0], strides = [1, 1]}
+// CHECK-NEXT: [[RESVEC:%.+]] = vector.insert_strided_slice [[HIGHHALFX1]], %{{.+}} {offsets = [0, 1], strides = [1, 1]}
+// CHECK: return [[RESVEC]] : vector<3x2xi32>
+func.func @fptoui_i64_f64_vector(%a : vector<3xf64>) -> vector<3xi64> {
+ %r = arith.fptoui %a : vector<3xf64> to vector<3xi64>
+ return %r : vector<3xi64>
+}
+
+// This generates lines that are already verified by other patterns.
+// We do not re-verify these and just check for the wrapper around fptoui by following its low part.
+// CHECK-LABEL: func @fptosi_i64_f64
+// CHECK-SAME: ([[ARG:%.+]]: f64) -> vector<2xi32>
+// CHECK: [[ZEROCST:%.+]] = arith.constant 0.000000e+00 : f64
+// CHECK: [[ZEROCSTINT:%.+]] = arith.constant dense<0> : vector<2xi32>
+// CHECK-NEXT: [[ISNEGATIVE:%.+]] = arith.cmpf olt, [[ARG]], [[ZEROCST]] : f64
+// CHECK-NEXT: [[NEGATED:%.+]] = arith.negf [[ARG]] : f64
+// CHECK-NEXT: [[ABSVALUE:%.+]] = arith.select [[ISNEGATIVE]], [[NEGATED]], [[ARG]] : f64
+// CHECK-NEXT: [[POW:%.+]] = arith.constant 0x41F0000000000000 : f64
+// CHECK-NEXT: [[DIV:%.+]] = arith.divf [[ABSVALUE]], [[POW]] : f64
+// CHECK-NEXT: [[HIGHHALF:%.+]] = arith.fptoui [[DIV]] : f64 to i32
+// CHECK-NEXT: [[REM:%.+]] = arith.remf [[ABSVALUE]], [[POW]] : f64
+// CHECK-NEXT: [[LOWHALF:%.+]] = arith.fptoui [[REM]] : f64 to i32
+// CHECK: vector.insert [[LOWHALF]], %{{.+}} [0] : i32 into vector<2xi32>
+// CHECK-NEXT: [[FPTOUIRESVEC:%.+]] = vector.insert [[HIGHHALF]]
+// CHECK: [[ZEROCSTINTHALF:%.+]] = vector.extract [[ZEROCSTINT]][0] : i32 from vector<2xi32>
+// CHECK: [[SUB:%.+]] = arith.subi [[ZEROCSTINTHALF]], %{{.+}} : i32
+// CHECK-NEXT: arith.cmpi ult, [[ZEROCSTINTHALF]], %{{.+}} : i32
+// CHECK-NEXT: arith.extui
+// CHECK-NEXT: arith.subi
+// CHECK-NEXT: arith.subi
+// CHECK: vector.insert [[SUB]]
+// CHECK: [[SUBVEC:%.+]] = vector.insert
+// CHECK: [[SUB:%.+]] = vector.extract [[SUBVEC]][0] : i32 from vector<2xi32>
+// CHECK: [[LOWRES:%.+]] = vector.extract [[FPTOUIRESVEC]][0] : i32 from vector<2xi32>
+// CHECK: [[ABSRES:%.+]] = arith.select [[ISNEGATIVE]], [[SUB]], [[LOWRES]] : i32
+// CHECK-NEXT: arith.select [[ISNEGATIVE]]
+// CHECK: vector.insert [[ABSRES]]
+// CHECK-NEXT: [[ABSRESVEC:%.+]] = vector.insert
+// CHECK-NEXT: return [[ABSRESVEC]] : vector<2xi32>
+func.func @fptosi_i64_f64(%a : f64) -> i64 {
+ %r = arith.fptosi %a : f64 to i64
+ return %r : i64
+}
+
+// Same as the non-vector one, we don't re-verify.
+// CHECK-LABEL: func @fptosi_i64_f64_vector
+// CHECK-SAME: ([[ARG:%.+]]: vector<3xf64>) -> vector<3x2xi32>
+// CHECK-NEXT: [[ZEROCST:%.+]] = arith.constant dense<0.000000e+00> : vector<3xf64>
+// CHECK-NEXT: [[ZEROCSTINT:%.+]] = arith.constant dense<0> : vector<3x2xi32>
+// CHECK-NEXT: [[ISNEGATIVE:%.+]] = arith.cmpf olt, [[ARG]], [[ZEROCST]] : vector<3xf64>
+// CHECK-NEXT: [[NEGATED:%.+]] = arith.negf [[ARG]] : vector<3xf64>
+// CHECK-NEXT: [[ABSVALUE:%.+]] = arith.select [[ISNEGATIVE]], [[NEGATED]], [[ARG]] : vector<3xi1>, vector<3xf64>
+// CHECK-NEXT: [[POW:%.+]] = arith.constant dense<0x41F0000000000000> : vector<3xf64>
+// CHECK-NEXT: [[DIV:%.+]] = arith.divf [[ABSVALUE]], [[POW]] : vector<3xf64>
+// CHECK-NEXT: [[HIGHHALF:%.+]] = arith.fptoui [[DIV]] : vector<3xf64> to vector<3xi32>
+// CHECK-NEXT: [[REM:%.+]] = arith.remf [[ABSVALUE]], [[POW]] : vector<3xf64>
+// CHECK-NEXT: [[LOWHALF:%.+]] = arith.fptoui [[REM]] : vector<3xf64> to vector<3xi32>
+// CHECK-NEXT: [[HIGHHALFX1:%.+]] = vector.shape_cast [[HIGHHALF]] : vector<3xi32> to vector<3x1xi32>
+// CHECK-NEXT: [[LOWHALFX1:%.+]] = vector.shape_cast [[LOWHALF]] : vector<3xi32> to vector<3x1xi32>
+// CHECK: vector.insert_strided_slice [[LOWHALFX1]], %{{.+}} {offsets = [0, 0], strides = [1, 1]} : vector<3x1xi32> into vector<3x2xi32>
+// CHECK-NEXT: [[FPTOUIRESVEC:%.+]] = vector.insert_strided_slice [[HIGHHALFX1]]
+// CHECK: [[ZEROCSTINTHALF:%.+]] = vector.extract_strided_slice [[ZEROCSTINT]]
+// CHECK-SAME: {offsets = [0, 0], sizes = [3, 1], strides = [1, 1]} : vector<3x2xi32> to vector<3x1xi32>
+// CHECK: [[SUB:%.+]] = arith.subi [[ZEROCSTINTHALF]], %{{.+}} : vector<3x1xi32>
+// CHECK-NEXT: arith.cmpi ult, [[ZEROCSTINTHALF]], %{{.+}} : vector<3x1xi32>
+// CHECK-NEXT: arith.extui
+// CHECK-NEXT: arith.subi
+// CHECK-NEXT: arith.subi
+// CHECK: vector.insert_strided_slice [[SUB]]
+// CHECK-NEXT: [[SUBVEC:%.+]] = vector.insert_strided_slice
+// CHECK: [[SUB:%.+]] = vector.extract_strided_slice [[SUBVEC]]
+// CHECK-SAME: {offsets = [0, 0], sizes = [3, 1], strides = [1, 1]} : vector<3x2xi32> to vector<3x1xi32>
+// CHECK: [[LOWRES:%.+]] = vector.extract_strided_slice [[FPTOUIRESVEC]]
+// CHECK-SAME: {offsets = [0, 0], sizes = [3, 1], strides = [1, 1]} : vector<3x2xi32> to vector<3x1xi32>
+// CHECK: [[ISNEGATIVEX1:%.+]] = vector.shape_cast [[ISNEGATIVE]] : vector<3xi1> to vector<3x1xi1>
+// CHECK: [[ABSRES:%.+]] = arith.select [[ISNEGATIVEX1]], [[SUB]], [[LOWRES]] : vector<3x1xi1>, vector<3x1xi32>
+// CHECK-NEXT: arith.select [[ISNEGATIVEX1]]
+// CHECK: vector.insert_strided_slice [[ABSRES]]
+// CHECK-NEXT: [[ABSRESVEC:%.+]] = vector.insert_strided_slice
+// CHECK-NEXT: return [[ABSRESVEC]] : vector<3x2xi32>
+func.func @fptosi_i64_f64_vector(%a : vector<3xf64>) -> vector<3xi64> {
+ %r = arith.fptosi %a : vector<3xf64> to vector<3xi64>
+ return %r : vector<3xi64>
+}
diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-fptosi-i64.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-fptosi-i64.mlir
new file mode 100644
index 0000000000000..d93b834c8f919
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-fptosi-i64.mlir
@@ -0,0 +1,89 @@
+// Check that the wide integer `arith.fptosi` emulation produces the same result as wide
+// `arith.fptosi`. Emulate i64 ops with i32 ops.
+
+// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
+// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \
+// RUN: mlir-runner -e entry -entry-point-result=void \
+// RUN: --shared-libs=%mlir_c_runner_utils | \
+// RUN: FileCheck %s --match-full-lines
+
+// RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=32" \
+// RUN: --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
+// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \
+// RUN: mlir-runner -e entry -entry-point-result=void \
+// RUN: --shared-libs=%mlir_c_runner_utils | \
+// RUN: FileCheck %s --match-full-lines
+
+// Ops in this function *only* will be emulated using i32 types.
+func.func @emulate_fptosi(%arg: f64) -> i64 {
+ %res = arith.fptosi %arg : f64 to i64
+ return %res : i64
+}
+
+func.func @check_fptosi(%arg : f64) -> () {
+ %res = func.call @emulate_fptosi(%arg) : (f64) -> (i64)
+ vector.print %res : i64
+ return
+}
+
+func.func @entry() {
+ %cst0 = arith.constant 0.0 : f64
+ %cst_nzero = arith.constant 0x8000000000000000 : f64
+ %cst1 = arith.constant 1.0 : f64
+ %cst_n1 = arith.constant -1.0 : f64
+ %cst_n1_5 = arith.constant -1.5 : f64
+
+ %cstpow20 = arith.constant 1048576.0 : f64
+ %cstnpow20 = arith.constant -1048576.0 : f64
+
+ %cst_i32_max = arith.constant 4294967295.0 : f64
+ %cst_i32_min = arith.constant -4294967296.0 : f64
+ %cst_i32_overflow = arith.constant 4294967296.0 : f64
+ %cst_i32_noverflow = arith.constant -4294967297.0 : f64
+
+
+ %cstpow40 = arith.constant 1099511627776.0 : f64
+ %cstnpow40 = arith.constant -1099511627776.0 : f64
+ %cst_pow40ppow20 = arith.constant 1099512676352.0 : f64
+ %cst_npow40ppow20 = arith.constant -1099512676352.0 : f64
+
+ %cst_max = arith.constant 9007199254740992.0
+ %cst_min = arith.constant -9007199254740992.0
+
+ // CHECK: 0
+ func.call @check_fptosi(%cst0) : (f64) -> ()
+ // CHECK-NEXT: 0
+ func.call @check_fptosi(%cst_nzero) : (f64) -> ()
+ // CHECK-NEXT: 1
+ func.call @check_fptosi(%cst1) : (f64) -> ()
+ // CHECK-NEXT: -1
+ func.call @check_fptosi(%cst_n1) : (f64) -> ()
+ // CHECK-NEXT: -1
+ func.call @check_fptosi(%cst_n1_5) : (f64) -> ()
+ // CHECK-NEXT: 1048576
+ func.call @check_fptosi(%cstpow20) : (f64) -> ()
+ // CHECK-NEXT: -1048576
+ func.call @check_fptosi(%cstnpow20) : (f64) -> ()
+ // CHECK-NEXT: 4294967295
+ func.call @check_fptosi(%cst_i32_max) : (f64) -> ()
+ // CHECK-NEXT: -4294967296
+ func.call @check_fptosi(%cst_i32_min) : (f64) -> ()
+ // CHECK-NEXT: 4294967296
+ func.call @check_fptosi(%cst_i32_overflow) : (f64) -> ()
+ // CHECK-NEXT: -4294967297
+ func.call @check_fptosi(%cst_i32_noverflow) : (f64) -> ()
+ // CHECK-NEXT: 1099511627776
+ func.call @check_fptosi(%cstpow40) : (f64) -> ()
+ // CHECK-NEXT: -1099511627776
+ func.call @check_fptosi(%cstnpow40) : (f64) -> ()
+ // CHECK-NEXT: 1099512676352
+ func.call @check_fptosi(%cst_pow40ppow20) : (f64) -> ()
+ // CHECK-NEXT: -1099512676352
+ func.call @check_fptosi(%cst_npow40ppow20) : (f64) -> ()
+ // CHECK-NEXT: 9007199254740992
+ func.call @check_fptosi(%cst_max) : (f64) -> ()
+ // CHECK-NEXT: -9007199254740992
+ func.call @check_fptosi(%cst_min) : (f64) -> ()
+
+ return
+}
diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-fptoui-i64.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-fptoui-i64.mlir
new file mode 100644
index 0000000000000..81283ee9fdfd8
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-fptoui-i64.mlir
@@ -0,0 +1,64 @@
+// Check that the wide integer `arith.fptoui` emulation produces the same result as wide
+// `arith.fptoui`. Emulate i64 ops with i32 ops.
+
+// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
+// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \
+// RUN: mlir-runner -e entry -entry-point-result=void \
+// RUN: --shared-libs=%mlir_c_runner_utils | \
+// RUN: FileCheck %s --match-full-lines
+
+// RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=32" \
+// RUN: --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \
+// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \
+// RUN: mlir-runner -e entry -entry-point-result=void \
+// RUN: --shared-libs=%mlir_c_runner_utils | \
+// RUN: FileCheck %s --match-full-lines
+
+// Ops in this function *only* will be emulated using i32 types.
+func.func @emulate_fptoui(%arg: f64) -> i64 {
+ %res = arith.fptoui %arg : f64 to i64
+ return %res : i64
+}
+
+func.func @check_fptoui(%arg : f64) -> () {
+ %res = func.call @emulate_fptoui(%arg) : (f64) -> (i64)
+ vector.print %res : i64
+ return
+}
+
+func.func @entry() {
+ %cst0 = arith.constant 0.0 : f64
+ %cst1 = arith.constant 1.0 : f64
+ %cst1_5 = arith.constant 1.5 : f64
+
+ %cstpow20 = arith.constant 1048576.0 : f64
+ %cst_i32_max = arith.constant 4294967295.0 : f64
+ %cst_i32_overflow = arith.constant 4294967296.0 : f64
+
+
+ %cstpow40 = arith.constant 1099511627776.0 : f64
+ %cst_pow40ppow20 = arith.constant 1099512676352.0 : f64
+
+ %cst_nzero = arith.constant 0x8000000000000000 : f64
+
+ // CHECK: 0
+ func.call @check_fptoui(%cst0) : (f64) -> ()
+ // CHECK-NEXT: 1
+ func.call @check_fptoui(%cst1) : (f64) -> ()
+ // CHECK-NEXT: 1
+ func.call @check_fptoui(%cst1_5) : (f64) -> ()
+ // CHECK-NEXT: 1048576
+ func.call @check_fptoui(%cstpow20) : (f64) -> ()
+ // CHECK-NEXT: 4294967295
+ func.call @check_fptoui(%cst_i32_max) : (f64) -> ()
+ // CHECK-NEXT: 4294967296
+ func.call @check_fptoui(%cst_i32_overflow) : (f64) -> ()
+ // CHECK-NEXT: 1099511627776
+ func.call @check_fptoui(%cstpow40) : (f64) -> ()
+ // CHECK-NEXT: 1099512676352
+ func.call @check_fptoui(%cst_pow40ppow20) : (f64) -> ()
+ // CHECK-NEXT: 0
+ func.call @check_fptoui(%cst_nzero) : (f64) -> ()
+
+ return
+}
More information about the Mlir-commits
mailing list