[llvm] r364738 - [InstCombine] Omit 'urem' where possible

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 02:41:43 PDT 2019


Author: lebedevri
Date: Mon Jul  1 02:41:43 2019
New Revision: 364738

URL: http://llvm.org/viewvc/llvm-project?rev=364738&view=rev
Log:
[InstCombine] Omit 'urem' where possible

This was added in D63390 / rL364286 to backend,
but it makes sense to also handle it in middle-end.
https://rise4fun.com/Alive/Zsln

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=364738&r1=364737&r2=364738&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Mon Jul  1 02:41:43 2019
@@ -1317,14 +1317,16 @@ static Instruction *processUGT_ADDCST_AD
   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
 }
 
-// Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+// Handle  icmp pred X, 0
 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
   CmpInst::Predicate Pred = Cmp.getPredicate();
-  Value *X = Cmp.getOperand(0);
+  if (!match(Cmp.getOperand(1), m_Zero()))
+    return nullptr;
 
-  if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
+  // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+  if (Pred == ICmpInst::ICMP_SGT) {
     Value *A, *B;
-    SelectPatternResult SPR = matchSelectPattern(X, A, B);
+    SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B);
     if (SPR.Flavor == SPF_SMIN) {
       if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
         return new ICmpInst(Pred, B, Cmp.getOperand(1));
@@ -1332,6 +1334,20 @@ Instruction *InstCombiner::foldICmpWithZ
         return new ICmpInst(Pred, A, Cmp.getOperand(1));
     }
   }
+
+  // Given:
+  //   icmp eq/ne (urem %x, %y), 0
+  // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+  //   icmp eq/ne %x, 0
+  Value *X, *Y;
+  if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
+      ICmpInst::isEquality(Pred)) {
+    KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
+    KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
+    if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+      return new ICmpInst(Pred, X, Cmp.getOperand(1));
+  }
+
   return nullptr;
 }
 

Modified: llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll?rev=364738&r1=364737&r2=364738&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll Mon Jul  1 02:41:43 2019
@@ -14,8 +14,7 @@
 define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) {
 ; CHECK-LABEL: @p0_scalar_urem_by_const(
 ; CHECK-NEXT:    [[T0:%.*]] = and i32 [[X:%.*]], 128
-; CHECK-NEXT:    [[T1:%.*]] = urem i32 [[T0]], 6
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T0]], 0
 ; CHECK-NEXT:    ret i1 [[T2]]
 ;
   %t0 = and i32 %x, 128 ; clearly a power-of-two or zero
@@ -27,9 +26,7 @@ define i1 @p0_scalar_urem_by_const(i32 %
 define i1 @p1_scalar_urem_by_nonconst(i32 %x, i32 %y) {
 ; CHECK-LABEL: @p1_scalar_urem_by_nonconst(
 ; CHECK-NEXT:    [[T0:%.*]] = and i32 [[X:%.*]], 128
-; CHECK-NEXT:    [[T1:%.*]] = or i32 [[Y:%.*]], 6
-; CHECK-NEXT:    [[T2:%.*]] = urem i32 [[T0]], [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp eq i32 [[T2]], 0
+; CHECK-NEXT:    [[T3:%.*]] = icmp eq i32 [[T0]], 0
 ; CHECK-NEXT:    ret i1 [[T3]]
 ;
   %t0 = and i32 %x, 128 ; clearly a power-of-two or zero
@@ -76,8 +73,7 @@ define i1 @p3_scalar_shifted2_urem_by_co
 define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @p4_vector_urem_by_const__splat(
 ; CHECK-NEXT:    [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 128, i32 128>
-; CHECK-NEXT:    [[T1:%.*]] = urem <4 x i32> [[T0]], <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq <4 x i32> [[T1]], zeroinitializer
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq <4 x i32> [[T0]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[T2]]
 ;
   %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
@@ -115,8 +111,7 @@ define <4 x i1> @p6_vector_urem_by_const
 define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @p7_vector_urem_by_const__nonsplat_undef2(
 ; CHECK-NEXT:    [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 128, i32 128>
-; CHECK-NEXT:    [[T1:%.*]] = urem <4 x i32> [[T0]], <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq <4 x i32> [[T1]], <i32 0, i32 0, i32 undef, i32 0>
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq <4 x i32> [[T0]], <i32 0, i32 0, i32 undef, i32 0>
 ; CHECK-NEXT:    ret <4 x i1> [[T2]]
 ;
   %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero




More information about the llvm-commits mailing list