[PATCH] Fold together repeated tests for divisibility by constants

Mon Jul 21 18:55:05 PDT 2014

Hi!

The attached patch teaches LLVM to fold together repeated tests for
divisibility by a constant into a single test for divisibility by the LCM
of the constants.

This was inspired by PR20205 (but doesn't directly help there, because we
can't compute a trip count for its loop, nor peel the loop like GCC does).
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140721/23aac22c/attachment.html>
-------------- next part --------------
Index: lib/Support/APInt.cpp
===================================================================

--- lib/Support/APInt.cpp	(revision 213404)
+++ lib/Support/APInt.cpp	(working copy)
@@ -802,15 +802,44 @@
   return Result;
 }
 
-APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
-                                            const APInt& API2) {
-  APInt A = API1, B = API2;
-  while (!!B) {
-    APInt T = B;
-    B = APIntOps::urem(A, B);
-    A = T;
+APInt llvm::APIntOps::GreatestCommonDivisor(APInt API1, APInt API2) {
+  // Fast-path a common case.
+  if (API1 == API2) return API1;
+
+  // Corner cases: if either operand is zero, the other is the gcd.
+  if (!API1) return API2;
+  if (!API2) return API1;
+
+  // Count common powers of 2 and remove the rest.
+  unsigned Pow2;
+  {
+    unsigned Pow2_1 = API1.countTrailingZeros();
+    unsigned Pow2_2 = API2.countTrailingZeros();
+    if (Pow2_1 > Pow2_2) {
+      API1 = API1.lshr(Pow2_1 - Pow2_2);
+      Pow2 = Pow2_2;
+    } else if (Pow2_2 > Pow2_1) {
+      API2 = API2.lshr(Pow2_2 - Pow2_1);
+      Pow2 = Pow2_1;
+    } else {
+      Pow2 = Pow2_1;
+    }
   }
-  return A;
+
+  // Both operands are odd multiples of 2^Pow_2:
+  //
+  //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
+  while (API1 != API2) {
+    if (API1.ugt(API2)) {
+      API1 -= API2;
+      API1 = API1.lshr(API1.countTrailingZeros() - Pow2);
+    } else {
+      API2 -= API1;
+      API2 = API2.lshr(API2.countTrailingZeros() - Pow2);
+    }
+  }
+
+  return API1;
 }
 
 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
Index: lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCompares.cpp	(revision 213404)
+++ lib/Transforms/InstCombine/InstCombineCompares.cpp	(working copy)
@@ -1044,7 +1044,124 @@
   return nullptr;
 }
 
+namespace {
+/// Models a check that LHS is divisible by Factor.
+class DivisibilityCheck {
+  // Signedness of the check. A bitwise and is a divisibility check,
+  // if its mask is (equivalent to) a power of 2 mask.
+  enum { DC_Null, DC_SRem, DC_URem, DC_And } Kind;
+  Value *Check;
+  Value *LHS;
+  ConstantInt *Factor;
 
+public:
+  DivisibilityCheck() : Kind(DC_Null) {}
+
+  /// Try to extract a divisibility check from V, on the assumption
+  /// that it is being compared to 0.
+  bool match(Value *V) {
+    Kind = DC_Null;
+    Check = V;
+    if (::match(V, m_SRem(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_SRem;
+    else if (::match(V, m_URem(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_URem;
+    else if (::match(V, m_And(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_And;
+    return Kind != DC_Null;
+  }
+
+  /// Merge another divisibility check into this one.
+  bool merge(const DivisibilityCheck &O) {
+    assert(Kind != DC_Null && O.Kind != DC_Null);
+    if (LHS != O.LHS)
+      // We don't have two divisibility checks on the same operand.
+      return false;
+
+    if (!(Check->hasOneUse() && Kind != DC_And) &&
+        !(O.Check->hasOneUse() && O.Kind != DC_And))
+      // We would not remove a division: bail out.
+      return false;
+
+    // Determine the factors we're checking for.
+    bool Failed = false;
+    APInt LHS = getFactor(O, Failed);
+    APInt RHS = O.getFactor(*this, Failed);
+    if (Failed)
+      return false;
+
+    // If we don't have a single signedness, we can fold the checks
+    // together if one of them is for a power of 2, because
+    // divisibility by a power of 2 is the same for srem and urem.
+    if (Kind != O.Kind && O.Kind != DC_And && LHS.isPowerOf2())
+      Kind = O.Kind;
+    if (Kind != O.Kind && !RHS.isPowerOf2())
+      return false;
+    assert(Kind == DC_SRem || Kind == DC_URem && "bad kind after merging");
+    bool Signed = Kind == DC_SRem;
+
+    // Fold them together.
+    APInt GCD = APIntOps::GreatestCommonDivisor(LHS, RHS);
+    APInt LCM = LHS.udiv(GCD);
+    bool Overflow = false;
+    LCM = Signed ? LCM.smul_ov(RHS, Overflow) : LCM.umul_ov(RHS, Overflow);
+    // On overflow, there cannot exist a non-zero value that is divisible by
+    // both factors at once.
+    if (Overflow) LCM = 0;
+    Factor = cast<ConstantInt>(ConstantInt::get(Factor->getType(), LCM));
+    return true;
+  }
+
+  Value *create(InstCombiner::BuilderTy *Builder) {
+    // Avoid division by zero.
+    if (!Factor->getValue())
+      return LHS;
+    return Kind == DC_SRem ? Builder->CreateSRem(LHS, Factor)
+                           : Builder->CreateURem(LHS, Factor);
+  }
+
+private:
+  /// Get the unsigned multiplicative factor we're checking for.
+  APInt getFactor(const DivisibilityCheck &O, bool &Failed) const {
+    switch (Kind) {
+    case DC_Null:
+      llvm_unreachable("unexpected Kind");
+
+    case DC_SRem:
+      if (Factor->getValue().isNegative())
+        return -Factor->getValue();
+      // Fall through.
+    case DC_URem:
+      return Factor->getValue();
+
+    case DC_And:
+      assert(O.Kind != DC_And && "bad kind pair");
+      // If we're also checking for divisibility by K * 2^N,
+      // the low N bits of the mask are irrelevant.
+      APInt Result =
+          Factor->getValue() |
+          APInt::getLowBitsSet(Factor->getValue().getBitWidth(),
+                               O.getFactor(*this, Failed).countTrailingZeros());
+      ++Result;
+      if (!!Result && !Result.isPowerOf2())
+        Failed = true;
+      return Result;
+    }
+  }
+};
+
+struct DivisibilityCheck_match {
+  DivisibilityCheck &Check;
+  DivisibilityCheck_match(DivisibilityCheck &Check) : Check(Check) {}
+  bool match(Value *V) { return Check.match(V); }
+};
+
+/// Matcher for divisibility checks.
+DivisibilityCheck_match m_DivisibilityCheck(DivisibilityCheck &Check) {
+  return DivisibilityCheck_match(Check);
+}
+}
+
 /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
 ///
 Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
@@ -1334,6 +1451,15 @@
         Op = BinaryOperator::CreateOr(ICIP, ICIQ);
       return Op;
     }
+    DivisibilityCheck DivL, DivR;
+    if (match(LHSI,
+              m_Or(m_DivisibilityCheck(DivL), m_DivisibilityCheck(DivR))) &&
+        DivL.merge(DivR)) {
+      // Simplifiy icmp eq (or (srem P, M), (srem P, N)), 0
+      //  -> icmp eq (srem P, lcm(M, N)), 0
+      return new ICmpInst(ICI.getPredicate(), DivL.create(Builder),
+                          Constant::getNullValue(LHSI->getType()));
+    }
     break;
   }
 
Index: test/Transforms/InstCombine/divisibility.ll
===================================================================
--- test/Transforms/InstCombine/divisibility.ll	(revision 0)
+++ test/Transforms/InstCombine/divisibility.ll	(revision 0)
@@ -0,0 +1,276 @@
+; Test that multiple divisibility checks are merged.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test1(i32 %A) {
+  %B = srem i32 %A, 2
+  %C = srem i32 %A, 3
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: srem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test2(i32 %A) {
+  %B = urem i32 %A, 2
+  %C = urem i32 %A, 3
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: urem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test3(i32 %A) {
+  %B = srem i32 %A, 2
+  %C = urem i32 %A, 3
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: urem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test4(i32 %A) {
+  %B = urem i32 %A, 2
+  %C = srem i32 %A, 3
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: srem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test5(i32 %A) {
+  %B = srem i32 %A, 8
+  %C = srem i32 %A, 12
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: srem i32 %A, 24
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test6(i32 %A) {
+  %B = and i32 %A, 6
+  %C = srem i32 %A, 12
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: srem i32 %A, 24
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test7(i32 %A) {
+  %B = and i32 %A, 8
+  %C = srem i32 %A, 12
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: and i32 %A, 8
+; CHECK-NEXT: srem i32 %A, 12
+; CHECK-NEXT: or
+; CHECK-NEXT: icmp
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test8(i32 %A, i32 %B) {
+  %C = srem i32 %A, 2
+  %D = srem i32 %B, 3
+  %E = or i32 %C, %D
+  %F = icmp eq i32 %E, 0
+  ret i1 %F
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: srem i32 %A, 2
+; CHECK-NEXT: srem i32 %B, 3
+; CHECK-NEXT: or
+; CHECK-NEXT: icmp
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test9(i32 %A) {
+  %B = srem i32 %A, 7589
+  %C = srem i32 %A, 395309
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1 %E
+}
+
+define i1 @test10(i32 %A) {
+  ; 7589 and 395309 are prime, and
+  ; 7589 * 395309 == 3000000001 == -1294967295 (2^32)
+  %B = urem i32 %A, 7589
+  %C = urem i32 %A, 395309
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: urem i32 %A, -1294967295
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test11(i32 %A) {
+  %B = urem i32 %A, 65535
+  %C = urem i32 %A, 65537
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: urem i32 %A, -1
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test12(i32 %A) {
+  %B = urem i32 %A, 65536
+  %C = urem i32 %A, 65537
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test13(i32 %A) {
+  %B = srem i32 %A, 65536
+  %C = urem i32 %A, 65535
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: urem i32 %A, -65536
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test14(i32 %A) {
+  %B = srem i32 %A, 95
+  %C = srem i32 %A, 22605091
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: srem i32 %A, 2147483645
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test15(i32 %A) {
+  %B = srem i32 %A, 97
+  %C = srem i32 %A, 22605091
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1
+}
+
+define i32 @test16(i32 %A) {
+  %B = srem i32 %A, 3
+  %C = srem i32 %A, 5
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  %F = zext i1 %E to i32
+  %G = add i32 %B, %F
+  ret i32 %G
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:  %B = srem i32 %A, 3
+; CHECK-NEXT:  %[[REM:.*]] = srem i32 %A, 15
+; CHECK-NEXT:  %E = icmp eq i32 %[[REM]], 0
+; CHECK-NEXT:  %F = zext i1 %E to i32
+; CHECK-NEXT:  %G = add i32 %B, %F
+; CHECK-NEXT:  ret i32 %G
+}
+
+define i32 @test17(i32 %A) {
+  %B = srem i32 %A, 3
+  %C = srem i32 %A, 5
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  %F = zext i1 %E to i32
+  %G = add i32 %B, %F
+  %H = add i32 %C, %G
+  ret i32 %H
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:  %B = srem i32 %A, 3
+; CHECK-NEXT:  %C = srem i32 %A, 5
+; CHECK-NOT: srem
+; CHECK: ret i32
+}
+
+define i32 @test18(i32 %A) {
+  %B = srem i32 %A, 3
+  %C = and i32 %A, 7
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  %F = zext i1 %E to i32
+  %G = add i32 %C, %F
+  ret i32 %G
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:  %C = and i32 %A, 7
+; CHECK-NEXT:  %[[REM:.*]] = srem i32 %A, 24
+; CHECK-NEXT:  %E = icmp eq i32 %[[REM]], 0
+; CHECK-NEXT:  %F = zext i1 %E to i32
+; CHECK-NEXT:  %G = add
+; CHECK-NEXT:  ret i32 %G
+}
+
+define i1 @test19(i32 %A) {
+  %B = srem i32 %A, 6
+  %C = srem i32 %A, 10
+  %D = icmp eq i32 %B, 0
+  %E = icmp eq i32 %C, 0
+  %F = and i1 %D, %E
+  ret i1 %F
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:  %[[REM:.*]] = srem i32 %A, 30
+; CHECK-NEXT:  icmp eq i32 %[[REM]], 0
+; CHECK-NEXT:  ret i1
+}
+
+define i1 @test20(i32 %A) {
+  %B = and i32 %A, 1
+  %C = srem i32 %A, 3
+  %D = and i32 %A, 3
+  %E = srem i32 %A, 5
+  %F = srem i32 %A, 6
+  %G = icmp eq i32 %B, 0
+  %H = icmp eq i32 %C, 0
+  %I = icmp eq i32 %D, 0
+  %J = icmp eq i32 %E, 0
+  %K = icmp eq i32 %F, 0
+  %L = and i1 %G, %H
+  %M = and i1 %L, %I
+  %N = and i1 %M, %J
+  %O = and i1 %N, %K
+  ret i1 %O
+; FIXME: We should recurse into operands of 'or's in comparisons to 0.
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:  srem i32 %A, 5
+; CHECK-NEXT:  srem i32 %A, 6
+; CHECK-NEXT:  srem i32 %A, 12
+; CHECK-NEXT:  or i32
+; CHECK-NEXT:  or i32
+; CHECK-NEXT:  icmp eq i32
+; CHECK-NEXT:  ret i1
+}
Index: include/llvm/ADT/APInt.h
===================================================================
--- include/llvm/ADT/APInt.h	(revision 213404)
+++ include/llvm/ADT/APInt.h	(working copy)
@@ -1756,13 +1756,13 @@
 /// \brief Returns the floor log base 2 of the specified APInt value.
 inline unsigned logBase2(const APInt &APIVal) { return APIVal.logBase2(); }
 
-/// \brief Compute GCD of two APInt values.
+/// \brief Compute GCD of two unsigned APInt values.
 ///
 /// This function returns the greatest common divisor of the two APInt values
 /// using Euclid's algorithm.
 ///
 /// \returns the greatest common divisor of Val1 and Val2
-APInt GreatestCommonDivisor(const APInt &Val1, const APInt &Val2);
+APInt GreatestCommonDivisor(APInt Val1, APInt Val2);
 
 /// \brief Converts the given APInt to a double value.
 ///