[llvm] Optimize fptrunc(x)>=C1 --> x>=C2 (PR #99475)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 17:54:16 PDT 2024
https://github.com/kissholic updated https://github.com/llvm/llvm-project/pull/99475
>From a1547d2230cd503a1e25752092409df3175f7a3d Mon Sep 17 00:00:00 2001
From: kissholicma <kissholicma at tencent.com>
Date: Thu, 18 Jul 2024 19:31:56 +0800
Subject: [PATCH 1/2] Optimize fptrunc(x)>=C1 --> x>=C2
---
.../InstCombine/InstCombineCompares.cpp | 31 +++++++++++++++++++
.../Transforms/InstCombine/fold-fcmp-trunc.ll | 11 +++++++
2 files changed, 42 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index abadf54a96767..2af3e92213f13 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -22,10 +22,13 @@
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <bitset>
@@ -7882,6 +7885,30 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I);
}
+// Fold trunc(x) < constant --> x < constant if possible.
+static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
+ Constant *RHSC) {
+ //
+ FCmpInst::Predicate Pred = I.getPredicate();
+
+ // Check that predicates are valid.
+ if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
+ (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
+ return nullptr;
+
+ auto *LType = LHSI->getOperand(0)->getType();
+ auto *RType = RHSC->getType();
+
+ if (!(LType->isFloatingPointTy() && RType->isFloatingPointTy() &&
+ LType->getTypeID() >= RType->getTypeID()))
+ return nullptr;
+
+ auto *ROperand = llvm::ConstantFP::get(
+ LType, dyn_cast<ConstantFP>(RHSC)->getValue().convertToDouble());
+
+ return new FCmpInst(Pred, LHSI->getOperand(0), ROperand, "", &I);
+}
+
/// Optimize fabs(X) compared with zero.
static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) {
Value *X;
@@ -8244,6 +8271,10 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
cast<LoadInst>(LHSI), GEP, GV, I))
return Res;
break;
+ case Instruction::FPTrunc:
+ if (Instruction *NV = foldFCmpFpTrunc(I, LHSI, RHSC))
+ return NV;
+ break;
}
}
diff --git a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
new file mode 100644
index 0000000000000..446111a60dd6c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
@@ -0,0 +1,11 @@
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+
+;CHECK-LABEL: @src(
+;CHECK: %result = fcmp oge double %0, 1.000000e+02
+;CHECK-NEXT: ret i1 %result
+define i1 @src(double %0) {
+ %trunc = fptrunc double %0 to float
+ %result = fcmp oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
\ No newline at end of file
>From 73b1f0b57b45534187103ccc8bb935708179ebf3 Mon Sep 17 00:00:00 2001
From: kissholic <kissholicovo at outlook.com>
Date: Wed, 24 Jul 2024 19:42:55 +0800
Subject: [PATCH 2/2] Optimize fptrunc(x)>=C1 --> x>=C2. Add check cases and
support for vector types.
---
.../InstCombine/InstCombineCompares.cpp | 47 +++++--
.../Transforms/InstCombine/fold-fcmp-trunc.ll | 130 ++++++++++++++++--
2 files changed, 160 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2af3e92213f13..37053a95638c0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -11,9 +11,11 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
@@ -24,14 +26,18 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <bitset>
+#include <cstdint>
using namespace llvm;
using namespace PatternMatch;
@@ -7888,7 +7894,6 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
// Fold trunc(x) < constant --> x < constant if possible.
static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
Constant *RHSC) {
- //
FCmpInst::Predicate Pred = I.getPredicate();
// Check that predicates are valid.
@@ -7896,17 +7901,41 @@ static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
(Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
return nullptr;
- auto *LType = LHSI->getOperand(0)->getType();
- auto *RType = RHSC->getType();
+ if (ConstantFP *ConstRFp = dyn_cast<ConstantFP>(RHSC)) {
+ Type *LType = LHSI->getOperand(0)->getType();
+ bool lossInfo;
+ APFloat RValue = ConstRFp->getValue();
+ RValue.convert(LType->getFltSemantics(), APFloat::rmNearestTiesToEven,
+ &lossInfo);
- if (!(LType->isFloatingPointTy() && RType->isFloatingPointTy() &&
- LType->getTypeID() >= RType->getTypeID()))
- return nullptr;
+ return new FCmpInst(Pred, LHSI->getOperand(0),
+ ConstantFP::get(LType, RValue), "", &I);
+ }
+
+ if (RHSC->getType()->isVectorTy()) {
+ Type *LVecType = LHSI->getOperand(0)->getType();
+ Type *LEleType = dyn_cast<VectorType>(LVecType)->getElementType();
+
+ FixedVectorType *VecType = dyn_cast<FixedVectorType>(RHSC->getType());
+ uint64_t EleNum = VecType->getNumElements();
- auto *ROperand = llvm::ConstantFP::get(
- LType, dyn_cast<ConstantFP>(RHSC)->getValue().convertToDouble());
+ std::vector<Constant *> EleVec(EleNum);
+ for (uint64_t Idx = 0; Idx < EleNum; ++Idx) {
+ bool lossInfo;
+ APFloat EleValue =
+ dyn_cast<ConstantFP>(RHSC->getAggregateElement(Idx))->getValueAPF();
+ EleValue.convert(LEleType->getFltSemantics(),
+ APFloat::rmNearestTiesToEven, &lossInfo);
+ EleVec[Idx] = ConstantFP::get(LEleType, EleValue);
+ }
+
+ ArrayRef<Constant *> EleArr(EleVec);
- return new FCmpInst(Pred, LHSI->getOperand(0), ROperand, "", &I);
+ return new FCmpInst(Pred, LHSI->getOperand(0), ConstantVector::get(EleArr),
+ "", &I);
+ }
+
+ return nullptr;
}
/// Optimize fabs(X) compared with zero.
diff --git a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
index 446111a60dd6c..3fdf35f0e0db2 100644
--- a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
@@ -1,11 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
-;CHECK-LABEL: @src(
-;CHECK: %result = fcmp oge double %0, 1.000000e+02
-;CHECK-NEXT: ret i1 %result
-define i1 @src(double %0) {
- %trunc = fptrunc double %0 to float
- %result = fcmp oge float %trunc, 1.000000e+02
- ret i1 %result
-}
\ No newline at end of file
+define i1 @fcmp_trunc(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc double %0 to float
+ %result = fcmp oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_nnan(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_nnan(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp nnan oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc double %0 to float
+ %result = fcmp nnan oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_ninf(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_ninf(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp ninf oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc double %0 to float
+ %result = fcmp ninf oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_nsz(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_nsz(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp nsz oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc double %0 to float
+ %result = fcmp nsz oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_reassoc(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_reassoc(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp reassoc oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc double %0 to float
+ %result = fcmp reassoc oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_fast(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_fast(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc double %0 to float
+ %result = fcmp fast oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
+define <4 x i1> @fcmp_vec_trunc(<4 x double> %0) {
+; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc(
+; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
+; CHECK-NEXT: ret <4 x i1> [[CMP]]
+;
+ %vec = fptrunc <4 x double> %0 to <4 x float>
+ %cmp = fcmp olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @fcmp_vec_trunc_with_flag(<4 x double> %0) {
+; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc_with_flag(
+; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CMP:%.*]] = fcmp fast olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
+; CHECK-NEXT: ret <4 x i1> [[CMP]]
+;
+ %vec = fptrunc <4 x double> %0 to <4 x float>
+ %cmp = fcmp fast olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
+ ret <4 x i1> %cmp
+}
+
+define i1 @fcmp_trunc_fp128(fp128 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_fp128(
+; CHECK-SAME: fp128 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge fp128 [[TMP0]], 0xL00000000000000004005900000000000
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc fp128 %0 to float
+ %result = fcmp fast oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
+define i1 @fcmp_trunc_x86_fp80(x86_fp80 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_x86_fp80(
+; CHECK-SAME: x86_fp80 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge x86_fp80 [[TMP0]], 0xK4005C800000000000000
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc x86_fp80 %0 to float
+ %result = fcmp fast oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
+define i1 @fcmp_trunc_ppc_fp128(ppc_fp128 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_ppc_fp128(
+; CHECK-SAME: ppc_fp128 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge ppc_fp128 [[TMP0]], 0xM40590000000000000000000000000000
+; CHECK-NEXT: ret i1 [[RESULT]]
+;
+ %trunc = fptrunc ppc_fp128 %0 to float
+ %result = fcmp fast oge float %trunc, 1.000000e+02
+ ret i1 %result
+}
+
More information about the llvm-commits
mailing list