[llvm] [GlobalIsel] Combine logic of floating point compares (PR #81886)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 15 09:45:39 PST 2024
https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/81886
It is purely based on symmetry. Registers can be scalars, vectors, and non-constants.
X < 5.0 || X > 5.0
->
X != 5.0
X < Y && X > Y
->
FCMP_FALSE
X < Y && X < Y
->
FCMP_TRUE
see InstCombinerImpl::foldLogicOfFCmps
>From d72843cac8a8abd4c7ca91a57535c145f01151e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Wed, 7 Feb 2024 00:40:56 +0100
Subject: [PATCH] [GlobalIsel] Combine logic of floating point compares
It is purely based on symmetry. Registers can be scalars, vectors, and
non-constants.
X < 5.0 || X > 5.0
->
X != 5.0
X < Y && X > Y
->
FCMP_FALSE
X < Y && X < Y
->
FCMP_TRUE
see InstCombinerImpl::foldLogicOfFCmps
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 +
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 82 ++++++++++
.../GlobalISel/combine-logic-of-compare.mir | 146 ++++++++++++++++++
3 files changed, 231 insertions(+)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 10eeafdd09a8ee..5d458240929289 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -931,6 +931,9 @@ class CombinerHelper {
/// into a single comparison using range-based reasoning.
bool tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
BuildFnTy &MatchInfo);
+
+ // Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
+ bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 1b199cfd41d231..5c37095bb66e10 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
+#include <llvm/Analysis/CmpInstAnalysis.h>
#include <optional>
#include <tuple>
@@ -6814,12 +6815,90 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
return true;
}
+bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
+ BuildFnTy &MatchInfo) {
+ assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
+ Register DestReg = Logic->getReg(0);
+ Register LHS = Logic->getLHSReg();
+ Register RHS = Logic->getRHSReg();
+ bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
+
+ // We need a compare on the LHS register.
+ GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
+ if (!Cmp1)
+ return false;
+
+ // We need a compare on the RHS register.
+ GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
+ if (!Cmp2)
+ return false;
+
+ LLT CmpTy = MRI.getType(Cmp1->getReg(0));
+ LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
+
+ // We build one fcmp, want to fold the fcmps, replace the logic op,
+ // and the fcmps must have the same shape.
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
+ !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
+ !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
+ MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
+ return false;
+
+ CmpInst::Predicate PredL = Cmp1->getCond();
+ CmpInst::Predicate PredR = Cmp2->getCond();
+ Register LHS0 = Cmp1->getLHSReg();
+ Register LHS1 = Cmp1->getRHSReg();
+ Register RHS0 = Cmp2->getLHSReg();
+ Register RHS1 = Cmp2->getRHSReg();
+
+ if (LHS0 == RHS1 && LHS1 == RHS0) {
+ // Swap RHS operands to match LHS.
+ PredR = CmpInst::getSwappedPredicate(PredR);
+ std::swap(RHS0, RHS1);
+ }
+
+ if (LHS0 == RHS0 && LHS1 == RHS1) {
+ // We determine the new predicate.
+ unsigned CmpCodeL = getFCmpCode(PredL);
+ unsigned CmpCodeR = getFCmpCode(PredR);
+ unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
+ unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
+ MatchInfo = [=](MachineIRBuilder &B) {
+ // The fcmp predicates fill the lower part of the enum.
+ FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
+ if (Pred == FCmpInst::FCMP_FALSE &&
+ isConstantLegalOrBeforeLegalizer(CmpTy)) {
+ auto False = B.buildConstant(CmpTy, 0);
+ B.buildZExtOrTrunc(DestReg, False);
+ } else if (Pred == FCmpInst::FCMP_TRUE &&
+ isConstantLegalOrBeforeLegalizer(CmpTy)) {
+ auto True =
+ B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
+ CmpTy.isVector() /*isVector*/,
+ true /*isFP*/));
+ B.buildZExtOrTrunc(DestReg, True);
+ } else { // We take the predicate without predicate optimizations.
+ auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
+ B.buildZExtOrTrunc(DestReg, Cmp);
+ }
+ };
+ return true;
+ }
+
+ return false;
+}
+
bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) {
GAnd *And = cast<GAnd>(&MI);
if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
return true;
+ if (tryFoldLogicOfFCmps(And, MatchInfo))
+ return true;
+
return false;
}
@@ -6829,5 +6908,8 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) {
if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
return true;
+ if (tryFoldLogicOfFCmps(Or, MatchInfo))
+ return true;
+
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir
index f667a83bf21a8b..d050823e3b9494 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir
@@ -260,3 +260,149 @@ body: |
%zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
$q0 = COPY %zext
...
+---
+# fcmp (x, y) || fcmp (x, y) -> fcmp(x, y)
+name: test_fcmp_or_fcmp_with_x_y
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_fcmp_or_fcmp_with_x_y
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ueq), [[COPY]](s64), [[COPY1]]
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %cmp1:_(s1) = G_FCMP floatpred(oeq), %0(s64), %1
+ %cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
+ %or:_(s1) = G_OR %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %or(s1)
+ $x0 = COPY %zext
+...
+---
+# fcmp (5, y) || fcmp (y, 5) -> fcmp(x, y)
+name: test_fcmp_or_fcmp_with_5_y
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_fcmp_or_fcmp_with_5_y
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e+00
+ ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(une), [[COPY]](s64), [[C]]
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_FCONSTANT double 5.0
+ %cmp1:_(s1) = G_FCMP floatpred(one), %0(s64), %2
+ %cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %2
+ %or:_(s1) = G_OR %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %or(s1)
+ $x0 = COPY %zext
+...
+---
+# fcmp (x, y) || fcmp (y, x) -> fcmp(x, y)
+name: test_fcmp_or_fcmp_with_anti
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_fcmp_or_fcmp_with_anti
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(une), [[COPY1]](s64), [[COPY]]
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %cmp1:_(s1) = G_FCMP floatpred(one), %1(s64), %0
+ %cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
+ %or:_(s1) = G_OR %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %or(s1)
+ $x0 = COPY %zext
+...
+---
+# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
+name: test_fcmp_and_fcmp_with_x_y
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_fcmp_and_fcmp_with_x_y
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(uno), [[COPY1]](s64), [[COPY]]
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[FCMP]](s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %cmp1:_(s1) = G_FCMP floatpred(une), %1(s64), %0
+ %cmp2:_(s1) = G_FCMP floatpred(uno), %0(s64), %1
+ %and:_(s1) = G_AND %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %and(s1)
+ $x0 = COPY %zext
+...
+---
+# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
+name: test_fcmp_and_fcmp_with_x_y_multi_use
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_fcmp_and_fcmp_with_x_y_multi_use
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: %cmp1:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s64), [[COPY]]
+ ; CHECK-NEXT: %cmp2:_(s1) = G_FCMP floatpred(ugt), [[COPY]](s64), [[COPY1]]
+ ; CHECK-NEXT: %and:_(s1) = G_AND %cmp1, %cmp2
+ ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %and(s1)
+ ; CHECK-NEXT: %zext2:_(s64) = G_ZEXT %and(s1)
+ ; CHECK-NEXT: $x0 = COPY %zext(s64)
+ ; CHECK-NEXT: $x2 = COPY %zext2(s64)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %cmp1:_(s1) = G_FCMP floatpred(ogt), %1(s64), %0
+ %cmp2:_(s1) = G_FCMP floatpred(ugt), %0(s64), %1
+ %and:_(s1) = G_AND %cmp1, %cmp2
+ %zext:_(s64) = G_ZEXT %and(s1)
+ %zext2:_(s64) = G_ZEXT %and(s1)
+ $x0 = COPY %zext
+ $x2 = COPY %zext2
+...
+---
+# fcmp (x, y) && fcmp (x, y) -> fcmp(x, y)
+name: test_fcmp_and_fcmp_with_vectors
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_fcmp_and_fcmp_with_vectors
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1)
+ ; CHECK-NEXT: %zext:_(<2 x s64>) = G_ZEXT [[BUILD_VECTOR]](<2 x s1>)
+ ; CHECK-NEXT: $q0 = COPY %zext(<2 x s64>)
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = COPY $x3
+ %4:_(s64) = COPY $x4
+ %5:_(s64) = COPY $x5
+ %6:_(s64) = COPY $x6
+ %7:_(s64) = COPY $x7
+ %v8:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1(s64)
+ %v9:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64)
+ %cmp1:_(<2 x s1>) = G_FCMP floatpred(oeq), %v8(<2 x s64>), %v9
+ %cmp2:_(<2 x s1>) = G_FCMP floatpred(olt), %v8(<2 x s64>), %v9
+ %and:_(<2 x s1>) = G_AND %cmp1, %cmp2
+ %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
+ $q0 = COPY %zext
+...
More information about the llvm-commits
mailing list