[llvm] Optimize fptrunc(x)>=C1 --> x>=C2 (PR #99475)

Wed Jul 24 17:54:16 PDT 2024

https://github.com/kissholic updated https://github.com/llvm/llvm-project/pull/99475

>From a1547d2230cd503a1e25752092409df3175f7a3d Mon Sep 17 00:00:00 2001
From: kissholicma <kissholicma at tencent.com>
Date: Thu, 18 Jul 2024 19:31:56 +0800
Subject: [PATCH 1/2] Optimize fptrunc(x)>=C1 -->  x>=C2

---
 .../InstCombine/InstCombineCompares.cpp       | 31 +++++++++++++++++++
 .../Transforms/InstCombine/fold-fcmp-trunc.ll | 11 +++++++
 2 files changed, 42 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index abadf54a96767..2af3e92213f13 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -22,10 +22,13 @@
 #include "llvm/Analysis/Utils/Local.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include <bitset>
@@ -7882,6 +7885,30 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
   return new FCmpInst(Pred, LHSI->getOperand(1), RHSC, "", &I);
 }
 
+// Fold trunc(x) < constant --> x < constant if possible.
+static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
+                                    Constant *RHSC) {
+  //
+  FCmpInst::Predicate Pred = I.getPredicate();
+
+  // Check that predicates are valid.
+  if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
+      (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
+    return nullptr;
+
+  auto *LType = LHSI->getOperand(0)->getType();
+  auto *RType = RHSC->getType();
+
+  if (!(LType->isFloatingPointTy() && RType->isFloatingPointTy() &&
+        LType->getTypeID() >= RType->getTypeID()))
+    return nullptr;
+
+  auto *ROperand = llvm::ConstantFP::get(
+      LType, dyn_cast<ConstantFP>(RHSC)->getValue().convertToDouble());
+
+  return new FCmpInst(Pred, LHSI->getOperand(0), ROperand, "", &I);
+}
+
 /// Optimize fabs(X) compared with zero.
 static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) {
   Value *X;
@@ -8244,6 +8271,10 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
                   cast<LoadInst>(LHSI), GEP, GV, I))
             return Res;
       break;
+    case Instruction::FPTrunc:
+      if (Instruction *NV = foldFCmpFpTrunc(I, LHSI, RHSC))
+        return NV;
+      break;
   }
   }
 
diff --git a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
new file mode 100644
index 0000000000000..446111a60dd6c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
@@ -0,0 +1,11 @@
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+
+;CHECK-LABEL: @src(
+;CHECK: %result = fcmp oge double %0, 1.000000e+02
+;CHECK-NEXT: ret i1 %result
+define i1 @src(double %0) {
+    %trunc = fptrunc double %0 to float
+    %result = fcmp oge float %trunc, 1.000000e+02
+    ret i1 %result
+}
\ No newline at end of file

>From 73b1f0b57b45534187103ccc8bb935708179ebf3 Mon Sep 17 00:00:00 2001
From: kissholic <kissholicovo at outlook.com>
Date: Wed, 24 Jul 2024 19:42:55 +0800
Subject: [PATCH 2/2] Optimize fptrunc(x)>=C1 --> x>=C2. Add check cases and
 support for vector types.

---
 .../InstCombine/InstCombineCompares.cpp       |  47 +++++--
 .../Transforms/InstCombine/fold-fcmp-trunc.ll | 130 ++++++++++++++++--
 2 files changed, 160 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2af3e92213f13..37053a95638c0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -11,9 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/CmpInstAnalysis.h"
@@ -24,14 +26,18 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include <bitset>
+#include <cstdint>
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -7888,7 +7894,6 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
 // Fold trunc(x) < constant --> x < constant if possible.
 static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
                                     Constant *RHSC) {
-  //
   FCmpInst::Predicate Pred = I.getPredicate();
 
   // Check that predicates are valid.
@@ -7896,17 +7901,41 @@ static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
       (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
     return nullptr;
 
-  auto *LType = LHSI->getOperand(0)->getType();
-  auto *RType = RHSC->getType();
+  if (ConstantFP *ConstRFp = dyn_cast<ConstantFP>(RHSC)) {
+    Type *LType = LHSI->getOperand(0)->getType();
+    bool lossInfo;
+    APFloat RValue = ConstRFp->getValue();
+    RValue.convert(LType->getFltSemantics(), APFloat::rmNearestTiesToEven,
+                   &lossInfo);
 
-  if (!(LType->isFloatingPointTy() && RType->isFloatingPointTy() &&
-        LType->getTypeID() >= RType->getTypeID()))
-    return nullptr;
+    return new FCmpInst(Pred, LHSI->getOperand(0),
+                        ConstantFP::get(LType, RValue), "", &I);
+  }
+
+  if (RHSC->getType()->isVectorTy()) {
+    Type *LVecType = LHSI->getOperand(0)->getType();
+    Type *LEleType = dyn_cast<VectorType>(LVecType)->getElementType();
+
+    FixedVectorType *VecType = dyn_cast<FixedVectorType>(RHSC->getType());
+    uint64_t EleNum = VecType->getNumElements();
 
-  auto *ROperand = llvm::ConstantFP::get(
-      LType, dyn_cast<ConstantFP>(RHSC)->getValue().convertToDouble());
+    std::vector<Constant *> EleVec(EleNum);
+    for (uint64_t Idx = 0; Idx < EleNum; ++Idx) {
+      bool lossInfo;
+      APFloat EleValue =
+          dyn_cast<ConstantFP>(RHSC->getAggregateElement(Idx))->getValueAPF();
+      EleValue.convert(LEleType->getFltSemantics(),
+                       APFloat::rmNearestTiesToEven, &lossInfo);
+      EleVec[Idx] = ConstantFP::get(LEleType, EleValue);
+    }
+
+    ArrayRef<Constant *> EleArr(EleVec);
 
-  return new FCmpInst(Pred, LHSI->getOperand(0), ROperand, "", &I);
+    return new FCmpInst(Pred, LHSI->getOperand(0), ConstantVector::get(EleArr),
+                        "", &I);
+  }
+
+  return nullptr;
 }
 
 /// Optimize fabs(X) compared with zero.
diff --git a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
index 446111a60dd6c..3fdf35f0e0db2 100644
--- a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
@@ -1,11 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 
-;CHECK-LABEL: @src(
-;CHECK: %result = fcmp oge double %0, 1.000000e+02
-;CHECK-NEXT: ret i1 %result
-define i1 @src(double %0) {
-    %trunc = fptrunc double %0 to float
-    %result = fcmp oge float %trunc, 1.000000e+02
-    ret i1 %result
-}
\ No newline at end of file
+define i1 @fcmp_trunc(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_nnan(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_nnan(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp nnan oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp nnan oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_ninf(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_ninf(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp ninf oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp ninf oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_nsz(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_nsz(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp nsz oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp nsz oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_reassoc(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_reassoc(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp reassoc oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp reassoc oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_fast(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_fast(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp fast oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define <4 x i1> @fcmp_vec_trunc(<4 x double> %0) {
+; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc(
+; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %vec = fptrunc <4 x double> %0 to <4 x float>
+  %cmp = fcmp olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @fcmp_vec_trunc_with_flag(<4 x double> %0) {
+; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc_with_flag(
+; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp fast olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %vec = fptrunc <4 x double> %0 to <4 x float>
+  %cmp = fcmp fast olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
+  ret <4 x i1> %cmp
+}
+
+define i1 @fcmp_trunc_fp128(fp128 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_fp128(
+; CHECK-SAME: fp128 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge fp128 [[TMP0]], 0xL00000000000000004005900000000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc fp128 %0 to float
+  %result = fcmp fast oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_x86_fp80(x86_fp80 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_x86_fp80(
+; CHECK-SAME: x86_fp80 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge x86_fp80 [[TMP0]], 0xK4005C800000000000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc x86_fp80 %0 to float
+  %result = fcmp fast oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_ppc_fp128(ppc_fp128 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_ppc_fp128(
+; CHECK-SAME: ppc_fp128 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge ppc_fp128 [[TMP0]], 0xM40590000000000000000000000000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc ppc_fp128 %0 to float
+  %result = fcmp fast oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+