[llvm] 85395af - [DAGCombine] fp_to_sint isSaturatingMinMax
Samuel Parker via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 26 04:45:51 PST 2023
Author: Samuel Parker
Date: 2023-01-26T12:37:43Z
New Revision: 85395af27241ab9c8d5763b8afcaa07f1bab26d5
URL: https://github.com/llvm/llvm-project/commit/85395af27241ab9c8d5763b8afcaa07f1bab26d5
DIFF: https://github.com/llvm/llvm-project/commit/85395af27241ab9c8d5763b8afcaa07f1bab26d5.diff
LOG: [DAGCombine] fp_to_sint isSaturatingMinMax
Check for single smax pattern against zero when converting from a
small enough float.
Differential Revision: https://reviews.llvm.org/D142481
Added:
Modified:
llvm/include/llvm/ADT/APFloat.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Support/APFloat.cpp
llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index c0e2d13c29392..09ab9b36bb494 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -246,6 +246,8 @@ struct APFloatBase {
static ExponentType semanticsMinExponent(const fltSemantics &);
static ExponentType semanticsMaxExponent(const fltSemantics &);
static unsigned int semanticsSizeInBits(const fltSemantics &);
+ static unsigned int semanticsIntSizeInBits(const fltSemantics&,
+ bool isSigned);
/// Returns the size of the floating point number (in bits) in the given
/// semantics.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0a3ebd73d2722..0b7c511f0a1d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5108,7 +5108,7 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
// same as SimplifySelectCC. N0<N1 ? N2 : N3.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC, unsigned &BW,
- bool &Unsigned) {
+ bool &Unsigned, SelectionDAG &DAG) {
auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
// The compare and select operand should be the same or the select operands
@@ -5132,6 +5132,26 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
if (!Opcode0)
return SDValue();
+ // We could only need one range check, if the fptosi could never produce
+ // the upper value.
+ if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
+ if (isNullOrNullSplat(N3)) {
+ EVT IntVT = N0.getValueType();
+ EVT FPVT = N0.getOperand(0).getValueType();
+ if (FPVT.isSimple()) {
+ Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext())->getScalarType();
+ const fltSemantics &Semantics = InputTy->getFltSemantics();
+ uint32_t MinBitWidth =
+ APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
+ if (IntVT.getSizeInBits() >= MinBitWidth) {
+ Unsigned = true;
+ BW = PowerOf2Ceil(MinBitWidth);
+ return N0;
+ }
+ }
+ }
+ }
+
SDValue N00, N01, N02, N03;
ISD::CondCode N0CC;
switch (N0.getOpcode()) {
@@ -5194,7 +5214,7 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SelectionDAG &DAG) {
unsigned BW;
bool Unsigned;
- SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
return SDValue();
EVT FPVT = Fp.getOperand(0).getValueType();
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index eae4fdb6c3d07..cdeed3e42ba94 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -251,6 +251,16 @@ namespace llvm {
unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
return semantics.sizeInBits;
}
+ unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
+ bool isSigned) {
+ // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
+ // at least one more bit than the MaxExponent to hold the max FP value.
+ unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
+ // Extra sign bit needed.
+ if (isSigned)
+ ++MinBitWidth;
+ return MinBitWidth;
+ }
unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
return Sem.sizeInBits;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3f851a2b2182f..70ec15fca808f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -501,16 +501,12 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
// If the integer type can hold the max FP value, it is safe to cast
// directly to that type. Otherwise, we may create poison via overflow
// that did not exist in the original code.
- //
- // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
- // at least one more bit than the MaxExponent to hold the max FP value.
Type *InputTy = I->getOperand(0)->getType()->getScalarType();
const fltSemantics &Semantics = InputTy->getFltSemantics();
- uint32_t MinBitWidth = APFloatBase::semanticsMaxExponent(Semantics);
- // Extra sign bit needed.
- if (I->getOpcode() == Instruction::FPToSI)
- ++MinBitWidth;
- return Ty->getScalarSizeInBits() > MinBitWidth;
+ uint32_t MinBitWidth =
+ APFloatBase::semanticsIntSizeInBits(Semantics,
+ I->getOpcode() == Instruction::FPToSI);
+ return Ty->getScalarSizeInBits() >= MinBitWidth;
}
default:
// TODO: Can handle more cases here.
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index 4a1a9a2b3cf0e..3288c07b35c73 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -191,19 +191,11 @@ entry:
define i32 @ustest_f16i32_cse(half %x) {
; CHECK-LABEL: ustest_f16i32_cse:
; CHECK: .functype ustest_f16i32_cse (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.gt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi half %x to i64
@@ -485,18 +477,11 @@ entry:
define i16 @ustest_f16i16_cse(half %x) {
; CHECK-LABEL: ustest_f16i16_cse:
; CHECK: .functype ustest_f16i16_cse (f32) -> (i32)
-; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i32.trunc_sat_f32_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.gt_s
-; CHECK-NEXT: i32.select
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi half %x to i32
@@ -607,7 +592,6 @@ entry:
ret i64 %conv6
}
-
define i64 @ustest_f64i64(double %x) {
; CHECK-LABEL: ustest_f64i64:
; CHECK: .functype ustest_f64i64 (f64) -> (i64)
@@ -1214,19 +1198,11 @@ entry:
define i32 @ustest_f16i32_mm_cse(half %x) {
; CHECK-LABEL: ustest_f16i32_mm_cse:
; CHECK: .functype ustest_f16i32_mm_cse (f32) -> (i32)
-; CHECK-NEXT: .local i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i64.trunc_sat_f32_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i64.const 0
-; CHECK-NEXT: i64.gt_s
-; CHECK-NEXT: i64.select
-; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi half %x to i64
@@ -1477,18 +1453,11 @@ entry:
define i16 @ustest_f16i16_mm_cse(half %x) {
; CHECK-LABEL: ustest_f16i16_mm_cse:
; CHECK: .functype ustest_f16i16_mm_cse (f32) -> (i32)
-; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
; CHECK-NEXT: call __extendhfsf2
-; CHECK-NEXT: i32.trunc_sat_f32_s
-; CHECK-NEXT: local.tee 1
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: local.get 1
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.gt_s
-; CHECK-NEXT: i32.select
+; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi half %x to i32
More information about the llvm-commits
mailing list