[llvm] r356471 - [InstCombine] fold logic-of-nan-fcmps (PR41069)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 09:39:17 PDT 2019
Author: spatel
Date: Tue Mar 19 09:39:17 2019
New Revision: 356471
URL: http://llvm.org/viewvc/llvm-project?rev=356471&view=rev
Log:
[InstCombine] fold logic-of-nan-fcmps (PR41069)
Combine 2 fcmps that are checking for nan-ness:
and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z
or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z
This is an exact match for a minimal reassociation pattern.
If we want to handle this more generally that should go in
the reassociate pass and allow removing this code.
This should fix:
https://bugs.llvm.org/show_bug.cgi?id=41069
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll
llvm/trunk/test/Transforms/InstCombine/or-fcmp.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=356471&r1=356470&r2=356471&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Tue Mar 19 09:39:17 2019
@@ -1258,6 +1258,52 @@ Value *InstCombiner::foldLogicOfFCmps(FC
return nullptr;
}
+/// This a limited reassociation for a special case (see above) where we are
+/// checking if two values are either both NAN (unordered) or not-NAN (ordered).
+/// This could be handled more generally in '-reassociation', but it seems like
+/// an unlikely pattern for a large number of logic ops and fcmps.
+static Instruction *reassociateFCmps(BinaryOperator &BO,
+ InstCombiner::BuilderTy &Builder) {
+ Instruction::BinaryOps Opcode = BO.getOpcode();
+ assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ "Expecting and/or op for fcmp transform");
+
+ // There are 4 commuted variants of the pattern. Canonicalize operands of this
+ // logic op so an fcmp is operand 0 and a matching logic op is operand 1.
+ Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1), *X;
+ FCmpInst::Predicate Pred;
+ if (match(Op1, m_FCmp(Pred, m_Value(), m_AnyZeroFP())))
+ std::swap(Op0, Op1);
+
+ // Match inner binop and the predicate for combining 2 NAN checks into 1.
+ BinaryOperator *BO1;
+ FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD
+ : FCmpInst::FCMP_UNO;
+ if (!match(Op0, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())) || Pred != NanPred ||
+ !match(Op1, m_BinOp(BO1)) || BO1->getOpcode() != Opcode)
+ return nullptr;
+
+ // The inner logic op must have a matching fcmp operand.
+ Value *BO10 = BO1->getOperand(0), *BO11 = BO1->getOperand(1), *Y;
+ if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
+ Pred != NanPred || X->getType() != Y->getType())
+ std::swap(BO10, BO11);
+
+ if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
+ Pred != NanPred || X->getType() != Y->getType())
+ return nullptr;
+
+ // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z
+ // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z
+ Value *NewFCmp = Builder.CreateFCmp(Pred, X, Y);
+ if (auto *NewFCmpInst = dyn_cast<FCmpInst>(NewFCmp)) {
+ // Intersect FMF from the 2 source fcmps.
+ NewFCmpInst->copyIRFlags(Op0);
+ NewFCmpInst->andIRFlags(BO10);
+ }
+ return BinaryOperator::Create(Opcode, NewFCmp, BO11);
+}
+
/// Match De Morgan's Laws:
/// (~A & ~B) == (~(A | B))
/// (~A | ~B) == (~(A & B))
@@ -1746,6 +1792,9 @@ Instruction *InstCombiner::visitAnd(Bina
if (Value *Res = foldLogicOfFCmps(LHS, RHS, true))
return replaceInstUsesWith(I, Res);
+ if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
+ return FoldedFCmps;
+
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
return CastedAnd;
@@ -2415,6 +2464,9 @@ Instruction *InstCombiner::visitOr(Binar
if (Value *Res = foldLogicOfFCmps(LHS, RHS, false))
return replaceInstUsesWith(I, Res);
+ if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
+ return FoldedFCmps;
+
if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
return CastedOr;
Modified: llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll?rev=356471&r1=356470&r2=356471&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll Tue Mar 19 09:39:17 2019
@@ -25,10 +25,8 @@ define <2 x i1> @PR1738_vec_undef(<2 x d
define i1 @PR41069(i1 %z, float %c, float %d) {
; CHECK-LABEL: @PR41069(
-; CHECK-NEXT: [[ORD1:%.*]] = fcmp arcp ord float [[C:%.*]], 0.000000e+00
-; CHECK-NEXT: [[AND:%.*]] = and i1 [[ORD1]], [[Z:%.*]]
-; CHECK-NEXT: [[ORD2:%.*]] = fcmp afn ord float [[D:%.*]], 0.000000e+00
-; CHECK-NEXT: [[R:%.*]] = and i1 [[AND]], [[ORD2]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord float [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%ord1 = fcmp arcp ord float %c, 0.0
@@ -40,10 +38,8 @@ define i1 @PR41069(i1 %z, float %c, floa
define i1 @PR41069_commute(i1 %z, float %c, float %d) {
; CHECK-LABEL: @PR41069_commute(
-; CHECK-NEXT: [[ORD1:%.*]] = fcmp ninf ord float [[C:%.*]], 0.000000e+00
-; CHECK-NEXT: [[AND:%.*]] = and i1 [[ORD1]], [[Z:%.*]]
-; CHECK-NEXT: [[ORD2:%.*]] = fcmp reassoc ninf ord float [[D:%.*]], 0.000000e+00
-; CHECK-NEXT: [[R:%.*]] = and i1 [[ORD2]], [[AND]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ninf ord float [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%ord1 = fcmp ninf ord float %c, 0.0
@@ -58,10 +54,8 @@ define i1 @PR41069_commute(i1 %z, float
define <2 x i1> @PR41069_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
; CHECK-LABEL: @PR41069_vec(
; CHECK-NEXT: [[ORD1:%.*]] = fcmp ord <2 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[ORD2:%.*]] = fcmp ord <2 x double> [[C:%.*]], <double 0.000000e+00, double undef>
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[ORD1]], [[ORD2]]
-; CHECK-NEXT: [[ORD3:%.*]] = fcmp ord <2 x double> [[D:%.*]], zeroinitializer
-; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[AND]], [[ORD3]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <2 x double> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[ORD1]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%ord1 = fcmp ord <2 x double> %a, %b
@@ -75,10 +69,8 @@ define <2 x i1> @PR41069_vec(<2 x double
define <2 x i1> @PR41069_vec_commute(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
; CHECK-LABEL: @PR41069_vec_commute(
; CHECK-NEXT: [[ORD1:%.*]] = fcmp ord <2 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[ORD2:%.*]] = fcmp ord <2 x double> [[C:%.*]], <double 0.000000e+00, double undef>
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[ORD1]], [[ORD2]]
-; CHECK-NEXT: [[ORD3:%.*]] = fcmp ord <2 x double> [[D:%.*]], zeroinitializer
-; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[ORD3]], [[AND]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <2 x double> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[ORD1]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%ord1 = fcmp ord <2 x double> %a, %b
Modified: llvm/trunk/test/Transforms/InstCombine/or-fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/or-fcmp.ll?rev=356471&r1=356470&r2=356471&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/or-fcmp.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/or-fcmp.ll Tue Mar 19 09:39:17 2019
@@ -26,10 +26,8 @@ define <2 x i1> @PR1738_vec_undef(<2 x d
define i1 @PR41069(double %a, double %b, double %c, double %d) {
; CHECK-LABEL: @PR41069(
; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[UNO2:%.*]] = fcmp uno double [[C:%.*]], 0.000000e+00
-; CHECK-NEXT: [[OR:%.*]] = or i1 [[UNO1]], [[UNO2]]
-; CHECK-NEXT: [[UNO3:%.*]] = fcmp uno double [[D:%.*]], 0.000000e+00
-; CHECK-NEXT: [[R:%.*]] = or i1 [[OR]], [[UNO3]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]]
; CHECK-NEXT: ret i1 [[R]]
;
%uno1 = fcmp uno double %a, %b
@@ -43,10 +41,8 @@ define i1 @PR41069(double %a, double %b,
define i1 @PR41069_commute(double %a, double %b, double %c, double %d) {
; CHECK-LABEL: @PR41069_commute(
; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[UNO2:%.*]] = fcmp uno double [[C:%.*]], 0.000000e+00
-; CHECK-NEXT: [[OR:%.*]] = or i1 [[UNO1]], [[UNO2]]
-; CHECK-NEXT: [[UNO3:%.*]] = fcmp uno double [[D:%.*]], 0.000000e+00
-; CHECK-NEXT: [[R:%.*]] = or i1 [[UNO3]], [[OR]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]]
; CHECK-NEXT: ret i1 [[R]]
;
%uno1 = fcmp uno double %a, %b
@@ -59,10 +55,8 @@ define i1 @PR41069_commute(double %a, do
define <2 x i1> @PR41069_vec(<2 x i1> %z, <2 x float> %c, <2 x float> %d) {
; CHECK-LABEL: @PR41069_vec(
-; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno <2 x float> [[C:%.*]], zeroinitializer
-; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[UNO1]], [[Z:%.*]]
-; CHECK-NEXT: [[UNO2:%.*]] = fcmp uno <2 x float> [[D:%.*]], <float 0.000000e+00, float undef>
-; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[OR]], [[UNO2]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <2 x float> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[Z:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%uno1 = fcmp uno <2 x float> %c, zeroinitializer
@@ -74,10 +68,8 @@ define <2 x i1> @PR41069_vec(<2 x i1> %z
define <2 x i1> @PR41069_vec_commute(<2 x i1> %z, <2 x float> %c, <2 x float> %d) {
; CHECK-LABEL: @PR41069_vec_commute(
-; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno <2 x float> [[C:%.*]], zeroinitializer
-; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[UNO1]], [[Z:%.*]]
-; CHECK-NEXT: [[UNO2:%.*]] = fcmp uno <2 x float> [[D:%.*]], <float 0.000000e+00, float undef>
-; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[UNO2]], [[OR]]
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <2 x float> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[Z:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%uno1 = fcmp uno <2 x float> %c, zeroinitializer
More information about the llvm-commits
mailing list