[llvm] 97a4e7b - [InstCombine] remove a buggy set of zext-icmp transforms

Thu Sep 9 06:00:49 PDT 2021

Author: Sanjay Patel
Date: 2021-09-09T08:49:39-04:00
New Revision: 97a4e7b7ff9ffdd005517c04680c1a73b17928ea

URL: https://github.com/llvm/llvm-project/commit/97a4e7b7ff9ffdd005517c04680c1a73b17928ea
DIFF: https://github.com/llvm/llvm-project/commit/97a4e7b7ff9ffdd005517c04680c1a73b17928ea.diff

LOG: [InstCombine] remove a buggy set of zext-icmp transforms

The motivating case is an infinite loop shown with a reduced test from:
https://llvm.org/PR51762

To solve this, I'm proposing we delete the most obviously broken part of this code.

The bug example shows a fundamental problem: we ask computeKnownBits if a transform
will be profitable, alter the code by creating new instructions, then rely on
computeKnownBits to return the same answer to actually eliminate instructions.

But there's no guarantee that the results will be the same between the 1st and 2nd
calls. In the infinite loop example, we get different answers, so we add
instructions that conflict with some other transform, and we're stuck.

There's at least one other problem visible in the test diff for
`@zext_or_masked_bit_test_uses`: the code doesn't check uses properly, so we can
end up with extra instructions created.

Last, it's not clear if this set of transforms actually improves analysis or
codegen. I spot-checked a few targets and don't see a clear win:
https://godbolt.org/z/x87EWovso

If we do see a regression from this change, codegen seems like the right place to
add a cmp -> bit-hack fold.

If this is too big of a step, we could limit the computeKnownBits calls by not
passing a context instruction and/or limiting the recursion. I checked that those
would stop the infinite loop for PR51762, but that won't guarantee that some other
example does not fall into the same loop.

Differential Revision: https://reviews.llvm.org/D109440

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/test/Transforms/InstCombine/zext-or-icmp.ll
    llvm/test/Transforms/InstCombine/zext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 86caf6edeff8f..bb9120d3e1739 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -978,11 +978,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
   return nullptr;
 }
 
-/// Transform (zext icmp) to bitwise / integer operations in order to
-/// eliminate it. If DoTransform is false, just test whether the given
-/// (zext icmp) can be transformed.
-Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
-                                                 bool DoTransform) {
+Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {
   // If we are just checking for a icmp eq of a single bit and zext'ing it
   // to an integer, then shift the bit to the appropriate place and then
   // cast to integer to avoid the comparison.
@@ -993,8 +989,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
     // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
     if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
         (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
-      if (!DoTransform) return Cmp;
-
       Value *In = Cmp->getOperand(0);
       Value *Sh = ConstantInt::get(In->getType(),
                                    In->getType()->getScalarSizeInBits() - 1);
@@ -1026,8 +1020,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
 
       APInt KnownZeroMask(~Known.Zero);
       if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
-        if (!DoTransform) return Cmp;
-
         bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
         if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
           // (X&4) == 2 --> false
@@ -1067,9 +1059,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
     if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
         match(Cmp->getOperand(0),
               m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
-      if (!DoTransform)
-        return Cmp;
-
       if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
         X = Builder.CreateNot(X);
       Value *Lshr = Builder.CreateLShr(X, ShAmt);
@@ -1091,8 +1080,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
         APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
         APInt UnknownBit = ~KnownBits;
         if (UnknownBit.countPopulation() == 1) {
-          if (!DoTransform) return Cmp;
-
           Value *Result = Builder.CreateXor(LHS, RHS);
 
           // Mask off any bits that are set and won't be shifted away.
@@ -1330,45 +1317,16 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
   if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
     return transformZExtICmp(Cmp, CI);
 
-  BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
-  if (SrcI && SrcI->getOpcode() == Instruction::Or) {
-    // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) if at least one
-    // of the (zext icmp) can be eliminated. If so, immediately perform the
-    // according elimination.
-    ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
-    ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
-    if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
-        LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
-        (transformZExtICmp(LHS, CI, false) ||
-         transformZExtICmp(RHS, CI, false))) {
-      // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
-      Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
-      Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
-      Value *Or = Builder.CreateOr(LCast, RCast, CI.getName());
-      if (auto *OrInst = dyn_cast<Instruction>(Or))
-        Builder.SetInsertPoint(OrInst);
-
-      // Perform the elimination.
-      if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
-        transformZExtICmp(LHS, *LZExt);
-      if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
-        transformZExtICmp(RHS, *RZExt);
-
-      return replaceInstUsesWith(CI, Or);
-    }
-  }
-
   // zext(trunc(X) & C) -> (X & zext(C)).
   Constant *C;
   Value *X;
-  if (SrcI &&
-      match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
+  if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
       X->getType() == CI.getType())
     return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
 
   // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
   Value *And;
-  if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
+  if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
       match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
       X->getType() == CI.getType()) {
     Constant *ZC = ConstantExpr::getZExt(C, CI.getType());

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 83bc797da74c4..8f3b238cd1242 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -241,15 +241,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   ///
   /// \param ICI The icmp of the (zext icmp) pair we are interested in.
   /// \parem CI The zext of the (zext icmp) pair we are interested in.
-  /// \param DoTransform Pass false to just test whether the given (zext icmp)
-  /// would be transformed. Pass true to actually perform the transformation.
   ///
   /// \return null if the transformation cannot be performed. If the
   /// transformation can be performed the new instruction that replaces the
-  /// (zext icmp) pair will be returned (if \p DoTransform is false the
-  /// unmodified \p ICI will be returned in this case).
-  Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
-                                 bool DoTransform = true);
+  /// (zext icmp) pair will be returned.
+  Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI);
 
   Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
 

diff  --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
index bc77739ec6a26..1e7dda128d7a7 100644
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -1,16 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; Remove an icmp by using its operand in the subsequent logic directly.
-
 define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) {
 ; CHECK-LABEL: @zext_or_icmp_icmp(
 ; CHECK-NEXT:    [[MASK:%.*]] = and i8 [[A:%.*]], 1
+; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp eq i8 [[MASK]], 0
 ; CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp eq i8 [[B:%.*]], 0
-; CHECK-NEXT:    [[TOBOOL22:%.*]] = zext i1 [[TOBOOL2]] to i8
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[MASK]], 1
-; CHECK-NEXT:    [[ZEXT3:%.*]] = or i8 [[TMP1]], [[TOBOOL22]]
-; CHECK-NEXT:    ret i8 [[ZEXT3]]
+; CHECK-NEXT:    [[BOTHCOND:%.*]] = or i1 [[TOBOOL1]], [[TOBOOL2]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[BOTHCOND]] to i8
+; CHECK-NEXT:    ret i8 [[ZEXT]]
 ;
   %mask = and i8 %a, 1
   %toBool1 = icmp eq i8 %mask, 0
@@ -165,4 +163,84 @@ define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) {
   ret i8 %inc
 }
 
+; This would infinite loop because knownbits changed between checking
+; if a transform was profitable and actually doing the transform.
+
+define i1 @PR51762(i32 *%i, i32 %t0, i16 %t1, i64* %p, i32* %d, i32* %f, i32 %p2) {
+; CHECK-LABEL: @PR51762(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[I_SROA_8_0:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[I_SROA_8_0_EXTRACT_TRUNC:%.*]], [[COND_TRUE:%.*]] ]
+; CHECK-NEXT:    br i1 undef, label [[COND_TRUE]], label [[FOR_END11:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[I_SROA_8_0_EXTRACT_TRUNC]] = ashr i32 [[T0:%.*]], 31
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end11:
+; CHECK-NEXT:    [[S1:%.*]] = sext i16 [[T1:%.*]] to i64
+; CHECK-NEXT:    [[SROA38:%.*]] = load i32, i32* [[I:%.*]], align 8
+; CHECK-NEXT:    [[INSERT_EXT51:%.*]] = zext i32 [[I_SROA_8_0]] to i64
+; CHECK-NEXT:    [[INSERT_SHIFT52:%.*]] = shl nuw i64 [[INSERT_EXT51]], 32
+; CHECK-NEXT:    [[INSERT_EXT39:%.*]] = zext i32 [[SROA38]] to i64
+; CHECK-NEXT:    [[INSERT_INSERT41:%.*]] = or i64 [[INSERT_SHIFT52]], [[INSERT_EXT39]]
+; CHECK-NEXT:    [[REM:%.*]] = urem i64 [[S1]], [[INSERT_INSERT41]]
+; CHECK-NEXT:    [[NE:%.*]] = icmp ne i64 [[REM]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INSERT_INSERT41]], 0
+; CHECK-NEXT:    [[SPEC_SELECT57:%.*]] = or i1 [[NE]], [[CMP]]
+; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[SPEC_SELECT57]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[CONV15:%.*]] = sext i16 [[T1]] to i32
+; CHECK-NEXT:    [[CMP16:%.*]] = icmp sge i32 [[T2]], [[CONV15]]
+; CHECK-NEXT:    [[CONV17:%.*]] = zext i1 [[CMP16]] to i32
+; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[F:%.*]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[T3]], [[CONV17]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[F]], align 4
+; CHECK-NEXT:    [[REM18:%.*]] = srem i32 [[LOR_EXT]], [[ADD]]
+; CHECK-NEXT:    [[CONV19:%.*]] = zext i32 [[REM18]] to i64
+; CHECK-NEXT:    store i32 0, i32* [[D]], align 8
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i64 [[INSERT_INSERT41]], [[CONV19]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[R]])
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.sroa.8.0 = phi i32 [ undef, %entry ], [ %i.sroa.8.0.extract.trunc, %cond.true ]
+  br i1 undef, label %cond.true, label %for.end11
+
+cond.true:
+  %i.sroa.8.0.extract.trunc = ashr i32 %t0, 31
+  br label %for.cond
+
+for.end11:
+  %s1 = sext i16 %t1 to i64
+  %sroa38 = load i32, i32* %i, align 8
+  %insert.ext51 = zext i32 %i.sroa.8.0 to i64
+  %insert.shift52 = shl nuw i64 %insert.ext51, 32
+  %insert.ext39 = zext i32 %sroa38 to i64
+  %insert.insert41 = or i64 %insert.shift52, %insert.ext39
+  %rem = urem i64 %s1, %insert.insert41
+  %ne = icmp ne i64 %rem, 0
+  %cmp = icmp eq i64 %insert.insert41, 0
+  %spec.select57 = or i1 %ne, %cmp
+
+  %lor.ext = zext i1 %spec.select57 to i32
+  %t2 = load i32, i32* %d, align 4
+  %conv15 = sext i16 %t1 to i32
+  %cmp16 = icmp sge i32 %t2, %conv15
+  %conv17 = zext i1 %cmp16 to i32
+  %t3 = load i32, i32* %f, align 4
+  %add = add nsw i32 %t3, %conv17
+  store i32 %add, i32* %f, align 4
+  %rem18 = srem i32 %lor.ext, %add
+  %conv19 = zext i32 %rem18 to i64
+  %div = udiv i64 %insert.insert41, %conv19
+  %trunc33 = trunc i64 %div to i32
+  store i32 %trunc33, i32* %d, align 8
+  %r = icmp ult i64 %insert.insert41, %conv19
+  call void @llvm.assume(i1 %r)
+  ret i1 %r
+}
+
 declare void @llvm.assume(i1 noundef)

diff  --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll
index 5af31fed042b0..be605b82e7563 100644
--- a/llvm/test/Transforms/InstCombine/zext.ll
+++ b/llvm/test/Transforms/InstCombine/zext.ll
@@ -410,17 +410,15 @@ define i32 @masked_bit_wrong_pred(i32 %x, i32 %y) {
   ret i32 %r
 }
 
-; Assert that zext(or(masked_bit_test, icmp)) can be correctly transformed to
-; or(shifted_masked_bit, zext(icmp))
-
 define i32 @zext_or_masked_bit_test(i32 %a, i32 %b, i32 %x) {
 ; CHECK-LABEL: @zext_or_masked_bit_test(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], [[B]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1
-; CHECK-NEXT:    [[CMP2:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    [[Z3:%.*]] = or i32 [[TMP2]], [[CMP2]]
-; CHECK-NEXT:    ret i32 [[Z3]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[TOBOOL]], [[CMP]]
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[OR]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
 ;
   %shl = shl i32 1, %b
   %and = and i32 %shl, %a
@@ -439,9 +437,8 @@ define i32 @zext_or_masked_bit_test_uses(i32 %a, i32 %b, i32 %x) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[B]]
 ; CHECK-NEXT:    [[OR:%.*]] = or i1 [[TOBOOL]], [[CMP]]
 ; CHECK-NEXT:    call void @use1(i1 [[OR]])
-; CHECK-NEXT:    [[Z34:%.*]] = or i1 [[TOBOOL]], [[CMP]]
-; CHECK-NEXT:    [[Z3:%.*]] = zext i1 [[Z34]] to i32
-; CHECK-NEXT:    ret i32 [[Z3]]
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[OR]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
 ;
   %shl = shl i32 1, %b
   %and = and i32 %shl, %a