[llvm] r370351 - [InstSimplify] Drop leftover "division-by-zero guard" around `@llvm.umul.with.overflow` inverted overflow bit

Thu Aug 29 05:48:04 PDT 2019

Author: lebedevri
Date: Thu Aug 29 05:48:04 2019
New Revision: 370351

URL: http://llvm.org/viewvc/llvm-project?rev=370351&view=rev
Log:
[InstSimplify] Drop leftover "division-by-zero guard" around `@llvm.umul.with.overflow` inverted overflow bit

Summary:
Now that with D65143/D65144 we've produce `@llvm.umul.with.overflow`,
and with D65147 we've flattened the CFG, we now can see that
the guard may have been there to prevent division by zero is redundant.
We can simply drop it:
```
----------------------------------------
Name: no overflow or zero
  %iszero = icmp eq i4 %y, 0
  %umul = smul_overflow i4 %x, %y
  %umul.ov = extractvalue {i4, i1} %umul, 1
  %umul.ov.not = xor %umul.ov, -1
  %retval.0 = or i1 %iszero, %umul.ov.not
  ret i1 %retval.0
=>
  %iszero = icmp eq i4 %y, 0
  %umul = smul_overflow i4 %x, %y
  %umul.ov = extractvalue {i4, i1} %umul, 1
  %umul.ov.not = xor %umul.ov, -1
  %retval.0 = or i1 %iszero, %umul.ov.not
  ret i1 %umul.ov.not

Done: 1
Optimization is correct!
```
Note that this is inverted from what we have in a previous patch,
here we are looking for the inverted overflow bit.
And that inversion is kinda problematic - given this particular
pattern we neither hoist that `not` closer to `ret` (then the pattern
would have been identical to the one without inversion,
and would have been handled by the previous patch), neither
do the opposite transform. But regardless, we should handle this too.
I've filled [[ https://bugs.llvm.org/show_bug.cgi?id=42720 | PR42720 ]].

Reviewers: nikic, spatel, xbolva00, RKSimon

Reviewed By: spatel

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65151

Modified:
    llvm/trunk/lib/Analysis/InstructionSimplify.cpp
    llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll
    llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll
    llvm/trunk/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll

Modified: llvm/trunk/lib/Analysis/InstructionSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InstructionSimplify.cpp?rev=370351&r1=370350&r2=370351&view=diff
==============================================================================

--- llvm/trunk/lib/Analysis/InstructionSimplify.cpp (original)
+++ llvm/trunk/lib/Analysis/InstructionSimplify.cpp Thu Aug 29 05:48:04 2019
@@ -1759,6 +1759,27 @@ static Value *simplifyAndOrOfCmps(const
   return nullptr;
 }
 
+/// Check that the Op1 is in expected form, i.e.:
+///   %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+///   %Op1 = extractvalue { i4, i1 } %Agg, 1
+static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1,
+                                                          Value *X) {
+  auto *Extract = dyn_cast<ExtractValueInst>(Op1);
+  // We should only be extracting the overflow bit.
+  if (!Extract || !Extract->getIndices().equals(1))
+    return false;
+  Value *Agg = Extract->getAggregateOperand();
+  // This should be a multiplication-with-overflow intrinsic.
+  if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(),
+                              m_Intrinsic<Intrinsic::smul_with_overflow>())))
+    return false;
+  // One of its multipliers should be the value we checked for zero before.
+  if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
+                              m_Argument<1>(m_Specific(X)))))
+    return false;
+  return true;
+}
+
 /// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
 /// other form of check, e.g. one that was using division; it may have been
 /// guarded against division-by-zero. We can drop that check now.
@@ -1774,23 +1795,41 @@ static Value *omitCheckForZeroBeforeMulW
   if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
       Pred != ICmpInst::Predicate::ICMP_NE)
     return nullptr;
-  auto *Extract = dyn_cast<ExtractValueInst>(Op1);
-  // We should only be extracting the overflow bit.
-  if (!Extract || !Extract->getIndices().equals(1))
-    return nullptr;
-  Value *Agg = Extract->getAggregateOperand();
-  // This should be a multiplication-with-overflow intrinsic.
-  if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(),
-                              m_Intrinsic<Intrinsic::smul_with_overflow>())))
-    return nullptr;
-  // One of its multipliers should be the value we checked for zero before.
-  if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
-                              m_Argument<1>(m_Specific(X)))))
+  // Is Op1 in expected form?
+  if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
     return nullptr;
   // Can omit 'and', and just return the overflow bit.
   return Op1;
 }
 
+/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
+/// other form of check, e.g. one that was using division; it may have been
+/// guarded against division-by-zero. We can drop that check now.
+/// Look for:
+///   %Op0 = icmp eq i4 %X, 0
+///   %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+///   %Op1 = extractvalue { i4, i1 } %Agg, 1
+///   %NotOp1 = xor i1 %Op1, true
+///   %or = or i1 %Op0, %NotOp1
+/// We can just return  %NotOp1
+static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0,
+                                                            Value *NotOp1) {
+  ICmpInst::Predicate Pred;
+  Value *X;
+  if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
+      Pred != ICmpInst::Predicate::ICMP_EQ)
+    return nullptr;
+  // We expect the other hand of an 'or' to be a 'not'.
+  Value *Op1;
+  if (!match(NotOp1, m_Not(m_Value(Op1))))
+    return nullptr;
+  // Is Op1 in expected form?
+  if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
+    return nullptr;
+  // Can omit 'and', and just return the inverted overflow bit.
+  return NotOp1;
+}
+
 /// Given operands for an And, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -2027,6 +2066,14 @@ static Value *SimplifyOrInst(Value *Op0,
   if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
     return V;
 
+  // If we have a multiplication overflow check that is being 'and'ed with a
+  // check that one of the multipliers is not zero, we can omit the 'and', and
+  // only keep the overflow check.
+  if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1))
+    return V;
+  if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0))
+    return V;
+
   // Try some generic simplifications for associative operations.
   if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
                                           MaxRecurse))

Modified: llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll?rev=370351&r1=370350&r2=370351&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll (original)
+++ llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-smul_ov-not.ll Thu Aug 29 05:48:04 2019
@@ -5,12 +5,10 @@ declare { i4, i1 } @llvm.smul.with.overf
 
 define i1 @t0_umul(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @t0_umul(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
+; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
 ; CHECK-NEXT:    [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
 ; CHECK-NEXT:    [[PHITMP:%.*]] = xor i1 [[SMUL_OV]], true
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[CMP]], [[PHITMP]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    ret i1 [[PHITMP]]
 ;
   %cmp = icmp eq i4 %size, 0
   %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
@@ -22,12 +20,10 @@ define i1 @t0_umul(i4 %size, i4 %nmemb)
 
 define i1 @t1_commutative(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @t1_commutative(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
+; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
 ; CHECK-NEXT:    [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
 ; CHECK-NEXT:    [[PHITMP:%.*]] = xor i1 [[SMUL_OV]], true
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[PHITMP]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    ret i1 [[PHITMP]]
 ;
   %cmp = icmp eq i4 %size, 0
   %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)

Modified: llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll?rev=370351&r1=370350&r2=370351&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll (original)
+++ llvm/trunk/test/Transforms/InstSimplify/div-by-0-guard-before-umul_ov-not.ll Thu Aug 29 05:48:04 2019
@@ -5,12 +5,10 @@ declare { i4, i1 } @llvm.umul.with.overf
 
 define i1 @t0_umul(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @t0_umul(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
+; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
 ; CHECK-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
 ; CHECK-NEXT:    [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[CMP]], [[PHITMP]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    ret i1 [[PHITMP]]
 ;
   %cmp = icmp eq i4 %size, 0
   %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
@@ -22,12 +20,10 @@ define i1 @t0_umul(i4 %size, i4 %nmemb)
 
 define i1 @t1_commutative(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @t1_commutative(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
+; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE:%.*]], i4 [[NMEMB:%.*]])
 ; CHECK-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
 ; CHECK-NEXT:    [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[PHITMP]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    ret i1 [[PHITMP]]
 ;
   %cmp = icmp eq i4 %size, 0
   %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)

Modified: llvm/trunk/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll?rev=370351&r1=370350&r2=370351&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll (original)
+++ llvm/trunk/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll Thu Aug 29 05:48:04 2019
@@ -124,12 +124,10 @@ define i1 @will_overflow(i64 %arg, i64 %
 ;
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:  bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]])
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T6:%.*]] = or i1 [[T0]], [[PHITMP]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[PHITMP]]
 ;
 ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT:  bb:
@@ -142,12 +140,10 @@ define i1 @will_overflow(i64 %arg, i64 %
 ;
 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:  bb:
-; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1:%.*]])
 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
 ; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    [[T6:%.*]] = or i1 [[T0]], [[PHITMP]]
-; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    ret i1 [[T6]]
+; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT:    ret i1 [[PHITMP]]
 ;
 bb:
   %t0 = icmp eq i64 %arg, 0