[llvm] r315021 - [InstCombine] improve folds for icmp gt/lt (shr X, C1), C2

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 5 14:11:49 PDT 2017


Author: spatel
Date: Thu Oct  5 14:11:49 2017
New Revision: 315021

URL: http://llvm.org/viewvc/llvm-project?rev=315021&view=rev
Log:
[InstCombine] improve folds for icmp gt/lt (shr X, C1), C2

We can always eliminate the shift in: icmp gt/lt (shr X, C1), C2 --> icmp gt/lt X, C'
This patch was supposed to just be an efficiency improvement because we were doing this 3-step process to fold:

IC: Visiting:   %c = icmp ugt i4 %s, 1
IC: ADD:   %s = lshr i4 %x, 1
IC: ADD:   %1 = udiv i4 %x, 2
IC: Old =   %c = icmp ugt i4 %1, 1
    New =   <badref> = icmp uge i4 %x, 4
IC: ADD:   %c = icmp uge i4 %x, 4
IC: ERASE   %2 = icmp ugt i4 %1, 1
IC: Visiting:   %c = icmp uge i4 %x, 4
IC: Old =   %c = icmp uge i4 %x, 4
    New =   <badref> = icmp ugt i4 %x, 3
IC: ADD:   %c = icmp ugt i4 %x, 3
IC: ERASE   %2 = icmp uge i4 %x, 4
IC: Visiting:   %c = icmp ugt i4 %x, 3
IC: DCE:   %1 = udiv i4 %x, 2
IC: ERASE   %1 = udiv i4 %x, 2
IC: DCE:   %s = lshr i4 %x, 1
IC: ERASE   %s = lshr i4 %x, 1
IC: Visiting:   ret i1 %c

When we could go directly to canonical icmp form:

IC: Visiting:   %c = icmp ugt i4 %s, 1
IC: Old =   %c = icmp ugt i4 %s, 1
    New =   <badref> = icmp ugt i4 %x, 3
IC: ADD:   %c = icmp ugt i4 %x, 3
IC: ERASE   %1 = icmp ugt i4 %s, 1
IC: ADD:   %s = lshr i4 %x, 1
IC: DCE:   %s = lshr i4 %x, 1
IC: ERASE   %s = lshr i4 %x, 1
IC: Visiting:   %c = icmp ugt i4 %x, 3

...but then I noticed that the folds were incomplete too:
https://godbolt.org/g/aB2hLE

Here are attempts to prove the logic with Alive:
https://rise4fun.com/Alive/92o

Name: lshr_ult
Pre: ((C2 << C1) u>> C1) == C2
%sh = lshr i8 %x, C1
%r = icmp ult i8 %sh, C2
  =>
%r = icmp ult i8 %x, (C2 << C1)

Name: ashr_slt
Pre: ((C2 << C1) >> C1) == C2
%sh = ashr i8 %x, C1
%r = icmp slt i8 %sh, C2
  =>
%r = icmp slt i8 %x, (C2 << C1)

Name: lshr_ugt
Pre: (((C2+1) << C1) u>> C1) == (C2+1)
%sh = lshr i8 %x, C1
%r = icmp ugt i8 %sh, C2
  =>
%r = icmp ugt i8 %x, ((C2+1) << C1) - 1

Name: ashr_sgt
Pre: (C2 != 127) && ((C2+1) << C1 != -128) && (((C2+1) << C1) >> C1) == (C2+1)
%sh = ashr i8 %x, C1
%r = icmp sgt i8 %sh, C2
  =>
%r = icmp sgt i8 %x, ((C2+1) << C1) - 1

Name: ashr_exact_sgt
Pre: ((C2 << C1) >> C1) == C2
%sh = ashr exact i8 %x, C1
%r = icmp sgt i8 %sh, C2
  =>
%r = icmp sgt i8 %x, (C2 << C1)

Name: ashr_exact_slt
Pre: ((C2 << C1) >> C1) == C2
%sh = ashr exact i8 %x, C1
%r = icmp slt i8 %sh, C2
  =>
%r = icmp slt i8 %x, (C2 << C1)

Name: lshr_exact_ugt
Pre: ((C2 << C1) u>> C1) == C2
%sh = lshr exact i8 %x, C1
%r = icmp ugt i8 %sh, C2
  =>
%r = icmp ugt i8 %x, (C2 << C1)

Name: lshr_exact_ult
Pre: ((C2 << C1) u>> C1) == C2
%sh = lshr exact i8 %x, C1
%r = icmp ult i8 %sh, C2
  =>
%r = icmp ult i8 %x, (C2 << C1)

We did something similar for 'shl' in D28406.

Differential Revision: https://reviews.llvm.org/D38514



Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=315021&r1=315020&r2=315021&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Thu Oct  5 14:11:49 2017
@@ -2011,43 +2011,46 @@ Instruction *InstCombiner::foldICmpShrCo
     return nullptr;
 
   bool IsAShr = Shr->getOpcode() == Instruction::AShr;
-  if (!Cmp.isEquality()) {
-    // If we have an unsigned comparison and an ashr, we can't simplify this.
-    // Similarly for signed comparisons with lshr.
-    if (Cmp.isSigned() != IsAShr)
-      return nullptr;
-
-    // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
-    // by a power of 2.  Since we already have logic to simplify these,
-    // transform to div and then simplify the resultant comparison.
-    if (IsAShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1))
-      return nullptr;
-
-    // Revisit the shift (to delete it).
-    Worklist.Add(Shr);
-
-    Constant *DivCst = ConstantInt::get(
-        Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
-
-    Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact())
-                        : Builder.CreateUDiv(X, DivCst, "", Shr->isExact());
-
-    Cmp.setOperand(0, Tmp);
-
-    // If the builder folded the binop, just return it.
-    BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
-    if (!TheDiv)
-      return &Cmp;
-
-    // Otherwise, fold this div/compare.
-    assert(TheDiv->getOpcode() == Instruction::SDiv ||
-           TheDiv->getOpcode() == Instruction::UDiv);
-
-    Instruction *Res = foldICmpDivConstant(Cmp, TheDiv, C);
-    assert(Res && "This div/cst should have folded!");
-    return Res;
+  bool IsExact = Shr->isExact();
+  Type *ShrTy = Shr->getType();
+  // TODO: If we could guarantee that InstSimplify would handle all of the
+  // constant-value-based preconditions in the folds below, then we could assert
+  // those conditions rather than checking them. This is difficult because of
+  // undef/poison (PR34838).
+  if (IsAShr) {
+    if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
+      // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
+      // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
+      APInt ShiftedC = C.shl(ShAmtVal);
+      if (ShiftedC.ashr(ShAmtVal) == C)
+        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+    }
+    if (Pred == CmpInst::ICMP_SGT) {
+      // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
+      APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
+      if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
+          (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
+        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+    }
+  } else {
+    if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
+      // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
+      // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
+      APInt ShiftedC = C.shl(ShAmtVal);
+      if (ShiftedC.lshr(ShAmtVal) == C)
+        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+    }
+    if (Pred == CmpInst::ICMP_UGT) {
+      // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
+      APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
+      if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
+        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+    }
   }
 
+  if (!Cmp.isEquality())
+    return nullptr;
+
   // Handle equality comparisons of shift-by-constant.
 
   // If the comparison constant changes with the shift, the comparison cannot
@@ -2060,14 +2063,14 @@ Instruction *InstCombiner::foldICmpShrCo
   // Check if the bits shifted out are known to be zero. If so, we can compare
   // against the unshifted value:
   //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
-  Constant *ShiftedCmpRHS = ConstantInt::get(Shr->getType(), C << ShAmtVal);
+  Constant *ShiftedCmpRHS = ConstantInt::get(ShrTy, C << ShAmtVal);
   if (Shr->hasOneUse()) {
     if (Shr->isExact())
       return new ICmpInst(Pred, X, ShiftedCmpRHS);
 
     // Otherwise strength reduce the shift into an 'and'.
     APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-    Constant *Mask = ConstantInt::get(Shr->getType(), Val);
+    Constant *Mask = ConstantInt::get(ShrTy, Val);
     Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
     return new ICmpInst(Pred, And, ShiftedCmpRHS);
   }

Modified: llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll?rev=315021&r1=315020&r2=315021&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll Thu Oct  5 14:11:49 2017
@@ -888,8 +888,7 @@ define i1 @lshrult_03_15(i4 %x) {
 
 define i1 @ashrsgt_01_00(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_01_00(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], 0
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -899,8 +898,7 @@ define i1 @ashrsgt_01_00(i4 %x) {
 
 define i1 @ashrsgt_01_01(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_01_01(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 3
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -910,8 +908,7 @@ define i1 @ashrsgt_01_01(i4 %x) {
 
 define i1 @ashrsgt_01_02(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_01_02(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], 2
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 5
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1002,8 +999,7 @@ define i1 @ashrsgt_01_11(i4 %x) {
 
 define i1 @ashrsgt_01_12(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_01_12(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], -4
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -7
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1013,8 +1009,7 @@ define i1 @ashrsgt_01_12(i4 %x) {
 
 define i1 @ashrsgt_01_13(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_01_13(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], -3
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -5
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1024,8 +1019,7 @@ define i1 @ashrsgt_01_13(i4 %x) {
 
 define i1 @ashrsgt_01_14(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_01_14(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], -2
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -3
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1045,8 +1039,7 @@ define i1 @ashrsgt_01_15(i4 %x) {
 
 define i1 @ashrsgt_02_00(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_02_00(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 2
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], 0
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 3
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 2
@@ -1173,8 +1166,7 @@ define i1 @ashrsgt_02_13(i4 %x) {
 
 define i1 @ashrsgt_02_14(i4 %x) {
 ; CHECK-LABEL: @ashrsgt_02_14(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 2
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], -2
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -5
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 2
@@ -1349,8 +1341,7 @@ define i1 @ashrslt_01_00(i4 %x) {
 
 define i1 @ashrslt_01_01(i4 %x) {
 ; CHECK-LABEL: @ashrslt_01_01(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 [[S]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 2
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1360,8 +1351,7 @@ define i1 @ashrslt_01_01(i4 %x) {
 
 define i1 @ashrslt_01_02(i4 %x) {
 ; CHECK-LABEL: @ashrslt_01_02(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 [[S]], 2
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 4
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1371,8 +1361,7 @@ define i1 @ashrslt_01_02(i4 %x) {
 
 define i1 @ashrslt_01_03(i4 %x) {
 ; CHECK-LABEL: @ashrslt_01_03(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 [[S]], 3
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 6
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1463,8 +1452,7 @@ define i1 @ashrslt_01_12(i4 %x) {
 
 define i1 @ashrslt_01_13(i4 %x) {
 ; CHECK-LABEL: @ashrslt_01_13(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 [[S]], -3
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -6
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1474,8 +1462,7 @@ define i1 @ashrslt_01_13(i4 %x) {
 
 define i1 @ashrslt_01_14(i4 %x) {
 ; CHECK-LABEL: @ashrslt_01_14(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 [[S]], -2
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -4
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1485,8 +1472,7 @@ define i1 @ashrslt_01_14(i4 %x) {
 
 define i1 @ashrslt_01_15(i4 %x) {
 ; CHECK-LABEL: @ashrslt_01_15(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 [[S]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -2
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 1
@@ -1506,8 +1492,7 @@ define i1 @ashrslt_02_00(i4 %x) {
 
 define i1 @ashrslt_02_01(i4 %x) {
 ; CHECK-LABEL: @ashrslt_02_01(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 2
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 [[S]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 4
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 2
@@ -1634,8 +1619,7 @@ define i1 @ashrslt_02_14(i4 %x) {
 
 define i1 @ashrslt_02_15(i4 %x) {
 ; CHECK-LABEL: @ashrslt_02_15(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 %x, 2
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 [[S]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -4
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i4 %x, 2




More information about the llvm-commits mailing list