[llvm] r290266 - Revert "[InstCombine] New opportunities for FoldAndOfICmp and FoldXorOfICmp"

Wed Dec 21 13:37:45 PST 2016

This is what Alive has to say (modulo bugs in my reading of the code):

----------------------------------------
Name: FoldXorOfICmps eq
Pre: isPowerOf2(C1)
  %x = and %a, C1
  %y = and %b, C1
  %0 = icmp eq %x, 0
  %1 = icmp eq %y, 0
  %r = xor %0, %1
=>
  %r = icmp ne %x, %y

Done: 64
Optimization is correct!

----------------------------------------
Name: FoldXorOfICmps ne
Pre: isPowerOf2(C1)
  %x = and %a, C1
  %y = and %b, C1
  %0 = icmp ne %x, 0
  %1 = icmp ne %y, 0
  %r = xor %0, %1
=>
  %r = icmp ne %x, %y

Done: 64
Optimization is correct!

----------------------------------------
Name: FoldAndOfICmps 1
Pre: countTrailingZeros(C1) >= width(C2) - countLeadingZeros(C2) - 1
  %x = and %a, C1
  %y = and %b, C2
  %0 = icmp eq %x, 0
  %1 = icmp ne %y, 0
  %r = and %0, %1
=>
  %r = icmp ult %x, %y


ERROR: Mismatch in values of i1 %r

Example:
%a i4 = 0xF (15, -1)
C1 i4 = 0x2 (2)
%b i4 = 0xF (15, -1)
C2 i4 = 0x3 (3)
%x i4 = 0x2 (2)
%y i4 = 0x3 (3)
%0 i1 = 0x0 (0)
%1 i1 = 0x1 (1, -1)
Source value: 0x0 (0)
Target value: 0x1 (1, -1)

----------------------------------------
Name: FoldAndOfICmps 2
Pre: countTrailingZeros(C2) >= width(C1) - countLeadingZeros(C1) - 1
  %x = and %a, C1
  %y = and %b, C2
  %0 = icmp ne %x, 0
  %1 = icmp eq %y, 0
  %r = and %0, %1
=>
  %r = icmp ult %y, %x


ERROR: Mismatch in values of i1 %r

Example:
%a i4 = 0xF (15, -1)
C1 i4 = 0x7 (7)
%b i4 = 0xF (15, -1)
C2 i4 = 0x4 (4)
%x i4 = 0x7 (7)
%y i4 = 0x4 (4)
%0 i1 = 0x1 (1, -1)
%1 i1 = 0x0 (0)
Source value: 0x0 (0)
Target value: 0x1 (1, -1)


Nuno


-----Original Message-----
From: David Majnemer via llvm-commits
Sent: 21 December 2016 19:22
To: llvm-commits at lists.llvm.org
Subject: [llvm] r290266 - Revert "[InstCombine] New opportunities for FoldAndOfICmp and FoldXorOfICmp"

Author: majnemer
Date: Wed Dec 21 13:21:59 2016
New Revision: 290266

URL: http://llvm.org/viewvc/llvm-project?rev=290266&view=rev
Log:
Revert "[InstCombine] New opportunities for FoldAndOfICmp and FoldXorOfICmp"

This reverts commit r289813, it caused PR31449.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=290266&r1=290265&r2=290266&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Wed Dec 21 13:21:59 2016
@@ -733,44 +733,6 @@ static Value *foldLogOpOfMaskedICmps(ICm
   return nullptr;
 }
 
-namespace {
-
-struct BitGroupCheck {
-  // If the Cmp, checks the bits in the group are nonzero?
-  bool CheckIfSet {false};
-  // The mask that identifies the bitgroup in question.
-  const APInt *Mask {nullptr};
-};
-}
-/// For an ICMP where RHS is zero, we want to check if the ICMP is equivalent to
-/// comparing a group of bits in an integer value against zero.
-BitGroupCheck isAnyBitSet(Value *LHS, ICmpInst::Predicate CC) {
-
-  BitGroupCheck BGC;
-  auto *Inst = dyn_cast<Instruction>(LHS);
-
-  if (!Inst || Inst->getOpcode() != Instruction::And)
-    return BGC;
-
-  // TODO Currently this does not work for vectors.
-  ConstantInt *Mask;
-  if (!match(LHS, m_And(m_Value(), m_ConstantInt(Mask))))
-    return BGC;
-  // At this point we know that LHS of ICMP is "and" of a value with a constant.
-  // Also we know that the RHS is zero. That means we are checking if a certain
-  // group of bits in a given integer value are all zero or at least one of them
-  // is set to one.
-  if (CC == ICmpInst::ICMP_EQ)
-    BGC.CheckIfSet = false;
-  else if (CC == ICmpInst::ICMP_NE)
-    BGC.CheckIfSet = true;
-  else
-    return BGC;
-
-  BGC.Mask = &Mask->getValue();
-  return BGC;
-}
-
 /// Try to fold a signed range checked with lower bound 0 to an unsigned icmp.
 /// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
 /// If \p Inverted is true then the check is for the inverted range, e.g.
@@ -827,32 +789,6 @@ Value *InstCombiner::simplifyRangeCheck(
   return Builder->CreateICmp(NewPred, Input, RangeEnd);
 }
 
-Value *InstCombiner::FoldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
-
-  Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
-  // TODO The lines below does not work for vectors. ConstantInt is scalar.
-  auto *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
-  auto *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
-  if (!LHSCst || !RHSCst)
-    return nullptr;
-  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
-
-  // E.g. (icmp ne %x, 0) ^ (icmp ne %y, 0) => icmp ne %x, %y if the following
-  // conditions hold:
-  // 1- (%x = and %a, %mask) and (%y = and %b, %mask)
-  // 2- %mask is a power of 2.
-  if (RHSCst->isZero() && LHSCst == RHSCst) {
-
-    BitGroupCheck BGC1 = isAnyBitSet(Val, LHSCC);
-    BitGroupCheck BGC2 = isAnyBitSet(Val2, RHSCC);
-    if (BGC1.Mask && BGC2.Mask && BGC1.CheckIfSet == BGC2.CheckIfSet &&
-        *BGC1.Mask == *BGC2.Mask && BGC1.Mask->isPowerOf2()) {
-      return Builder->CreateICmp(ICmpInst::ICMP_NE, Val2, Val);
-    }
-  }
-  return nullptr;
-}
-
 /// Fold (icmp)&(icmp) if possible.
 Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -935,29 +871,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmp
     }
   }
 
-  // E.g. (icmp eq %x, 0) & (icmp ne %y, 0) => icmp ult %x, %y if the following
-  // conditions hold:
-  // 1- (%x = and %a, %mask1) and (%y = and %b, %mask2)
-  // 2- Let %t be the smallest power of 2 where %mask1 & %t != 0. Then for any
-  //    %s that is a power of 2 and %s & %mask2 != 0, we must have %s <= %t.
-  // For example if %mask1 = 24 and %mask2 = 16, setting %s = 16 and %t = 8
-  // violates condition (2) above. So this optimization cannot be applied.
-  if (RHSCst->isZero() && LHSCst == RHSCst) {
-    BitGroupCheck BGC1 = isAnyBitSet(Val, LHSCC);
-    BitGroupCheck BGC2 = isAnyBitSet(Val2, RHSCC);
-
-    if (BGC1.Mask && BGC2.Mask && (BGC1.CheckIfSet != BGC2.CheckIfSet)) {
-      if (!BGC1.CheckIfSet &&
-          BGC1.Mask->countTrailingZeros() >=
-          BGC2.Mask->getBitWidth() - BGC2.Mask->countLeadingZeros() - 1)
-        return Builder->CreateICmp(ICmpInst::ICMP_ULT, Val, Val2);
-      else if (!BGC2.CheckIfSet &&
-          BGC2.Mask->countTrailingZeros() >=
-          BGC1.Mask->getBitWidth() - BGC1.Mask->countLeadingZeros() - 1)
-        return Builder->CreateICmp(ICmpInst::ICMP_ULT, Val2, Val);
-    }
-  }
-
   // From here on, we only handle:
   //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
   if (Val != Val2) return nullptr;
@@ -2808,16 +2721,9 @@ Instruction *InstCombiner::visitXor(Bina
       match(Op1, m_Not(m_Specific(A))))
     return BinaryOperator::CreateNot(Builder->CreateAnd(A, B));
 
+  // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
-    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) {
-
-      // E.g. if we have xor (icmp eq %A, 0), (icmp eq %B, 0)
-      // and we know both A and B are either 8 (power of 2) or 0
-      // we can simplify to (icmp ne A, B).
-      if (Value *Res = FoldXorOfICmps(LHS, RHS))
-        return replaceInstUsesWith(I, Res);
-
-      // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
       if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
         if (LHS->getOperand(0) == RHS->getOperand(1) &&
             LHS->getOperand(1) == RHS->getOperand(0))
@@ -2832,7 +2738,6 @@ Instruction *InstCombiner::visitXor(Bina
                                                Builder));
         }
       }
-    }
 
   if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
     return CastedXor;

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h?rev=290266&r1=290265&r2=290266&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h Wed Dec 21 13:21:59 2016
@@ -243,7 +243,6 @@ public:
   Instruction *visitFDiv(BinaryOperator &I);
   Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted);
   Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
-  Value *FoldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS);
   Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
   Instruction *visitAnd(BinaryOperator &I);
   Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI);

Modified: llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll?rev=290266&r1=290265&r2=290266&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll Wed Dec 21 13:21:59 2016
@@ -51,207 +51,3 @@ define i1 @test(i32 %tmp1030) {
   ret i1 %tmp1042
 }
 
-; Last three instructions (ignoring ret) are equivalent of %val2 < %val1.
-define i1 @test2(i32 %a, i32 %b) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i32 %a, 8
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 8
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]]
-; CHECK-NEXT:    ret i1 [[TMP1]]
-;
-  %val1 = and i32 %a, 8
-  %val2 = and i32 %b, 8
-  %cmp.a = icmp ne i32 %val1, 0
-  %cmp.b = icmp eq i32 %val2, 0
-  %and = and i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; Last three instructions (ignoring ret) are equivalent of %val2 < %val1.
-define i1 @test3(i32 %a, i32 %b) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i32 %a, 8
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 8
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]]
-; CHECK-NEXT:    ret i1 [[TMP1]]
-;
-  %val1 = and i32 %a, 8
-  %val2 = and i32 %b, 8
-  %cmp.a = icmp ne i32 %val1, 0
-  %cmp.b = icmp eq i32 %val2, 0
-  %and = and i1 %cmp.a, %cmp.b
-  ret i1 %and
-}
-
-; Last three instructions (ignoring ret) are equivalent of %val2 < %val1.
-define i1 @test4(i32 %a, i32 %b) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i32 %a, 15
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 24
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]]
-; CHECK-NEXT:    ret i1 [[TMP1]]
-;
-  %val1 = and i32 %a, 15
-  %val2 = and i32 %b, 24
-  %cmp.a = icmp ne i32 %val1, 0
-  %cmp.b = icmp eq i32 %val2, 0
-  %and = and i1 %cmp.a, %cmp.b
-  ret i1 %and
-}
-
-; Last three instructions (ignoring ret) are equivalent of %val2 < %val1.
-define i1 @test5(i32 %a, i32 %b) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i32 %a, 15
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 24
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]]
-; CHECK-NEXT:    ret i1 [[TMP1]]
-;
-  %val1 = and i32 %a, 15
-  %val2 = and i32 %b, 24
-  %cmp.a = icmp ne i32 %val1, 0
-  %cmp.b = icmp eq i32 %val2, 0
-  %and = and i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; An optimization like those of previous tests is not possible
-; for example if %b = 8 and %a = 16, we have %val2 = 8 and
-; % %val1 = 16 so %val2 < %val1 but %and == 0.
-define i1 @test6(i32 %a, i32 %b) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i32 %a, 16
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 24
-; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ne i32 [[VAL1]], 0
-; CHECK-NEXT:    [[CMP_B:%.*]] = icmp eq i32 [[VAL2]], 0
-; CHECK-NEXT:    [[AND:%.*]] = and i1 [[CMP_B]], [[CMP_A]]
-; CHECK-NEXT:    ret i1 [[AND]]
-;
-  %val1 = and i32 %a, 16
-  %val2 = and i32 %b, 24
-  %cmp.a = icmp ne i32 %val1, 0
-  %cmp.b = icmp eq i32 %val2, 0
-  %and = and i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; %a and %b have different widths. So optimization is not possible.
-define i1 @test7(i16 %a, i32 %b) {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i16 %a, 15
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 24
-; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ne i16 [[VAL1]], 0
-; CHECK-NEXT:    [[CMP_B:%.*]] = icmp eq i32 [[VAL2]], 0
-; CHECK-NEXT:    [[AND:%.*]] = and i1 [[CMP_B]], [[CMP_A]]
-; CHECK-NEXT:    ret i1 [[AND]]
-;
-  %val1 = and i16 %a, 15
-  %val2 = and i32 %b, 24
-  %cmp.a = icmp ne i16 %val1, 0
-  %cmp.b = icmp eq i32 %val2, 0
-  %and = and i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; The last three instructions can be simplified to checking %val1 != %val2.
-; After that other transformations change the code further.
-define i1 @test8(i32 %a, i32 %b) {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 8
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK-NEXT:    ret i1 [[TMP3]]
-;
-  %val1 = and i32 %a, 8
-  %val2 = and i32 %b, 8
-  %cmp.a = icmp ne i32 %val1, 0
-  %cmp.b = icmp ne i32 %val2, 0
-  %and = xor i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; Operands of and instructions, must be identical powers of 2 otherwise
-; a simplification, like that of previous testcase is not possible.
-define i1 @test9(i32 %a, i32 %b) {
-; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i32 %a, 24
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 24
-; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ne i32 [[VAL1]], 0
-; CHECK-NEXT:    [[CMP_B:%.*]] = icmp ne i32 [[VAL2]], 0
-; CHECK-NEXT:    [[AND:%.*]] = xor i1 [[CMP_B]], [[CMP_A]]
-; CHECK-NEXT:    ret i1 [[AND]]
-;
-  %val1 = and i32 %a, 24
-  %val2 = and i32 %b, 24
-  %cmp.a = icmp ne i32 %val1, 0
-  %cmp.b = icmp ne i32 %val2, 0
-  %and = xor i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; The last three instructions are equivalent of checking %val1 != %val2.
-; After making this change, other transformation further change the code.
-define i1 @test10(i32 %a, i32 %b) {
-; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 8
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK-NEXT:    ret i1 [[TMP3]]
-;
-  %val1 = and i32 %a, 8
-  %val2 = and i32 %b, 8
-  %cmp.a = icmp eq i32 %val1, 0
-  %cmp.b = icmp eq i32 %val2, 0
-  %and = xor i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; Cannot be simplified because of different width of %a and %b
-define i1 @test11(i16 %a, i32 %b) {
-; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i16 %a, 8
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 8
-; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ne i16 [[VAL1]], 0
-; CHECK-NEXT:    [[CMP_B:%.*]] = icmp ne i32 [[VAL2]], 0
-; CHECK-NEXT:    [[AND:%.*]] = xor i1 [[CMP_B]], [[CMP_A]]
-; CHECK-NEXT:    ret i1 [[AND]]
-;
-  %val1 = and i16 %a, 8
-  %val2 = and i32 %b, 8
-  %cmp.a = icmp ne i16 %val1, 0
-  %cmp.b = icmp ne i32 %val2, 0
-  %and = xor i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; Similar to @test8 except that icmp instns use ugt here instead of ne.
-define i1 @test12(i32 %a, i32 %b) {
-; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 8
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK-NEXT:    ret i1 [[TMP3]]
-;
-  %val1 = and i32 %a, 8
-  %val2 = and i32 %b, 8
-  %cmp.a = icmp ugt i32 %val1, 0
-  %cmp.b = icmp ugt i32 %val2, 0
-  %and = xor i1 %cmp.b, %cmp.a
-  ret i1 %and
-}
-
-; Similar to @test3 except that the first icmp uses ugt instead of ne.
-define i1 @test13(i32 %a, i32 %b) {
-; CHECK-LABEL: @test13(
-; CHECK-NEXT:    [[VAL1:%.*]] = and i32 %a, 8
-; CHECK-NEXT:    [[VAL2:%.*]] = and i32 %b, 8
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[VAL2]], [[VAL1]]
-; CHECK-NEXT:    ret i1 [[TMP1]]
-;
-  %val1 = and i32 %a, 8
-  %val2 = and i32 %b, 8
-  %cmp.a = icmp ugt i32 %val1, 0
-  %cmp.b = icmp eq i32 %val2, 0
-  %and = and i1 %cmp.a, %cmp.b
-  ret i1 %and
-}


_______________________________________________
llvm-commits mailing list
llvm-commits at lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits