[llvm] r315021 - [InstCombine] improve folds for icmp gt/lt (shr X, C1), C2
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 5 14:11:49 PDT 2017
Author: spatel
Date: Thu Oct 5 14:11:49 2017
New Revision: 315021
URL: http://llvm.org/viewvc/llvm-project?rev=315021&view=rev
Log:
[InstCombine] improve folds for icmp gt/lt (shr X, C1), C2
We can always eliminate the shift in: icmp gt/lt (shr X, C1), C2 --> icmp gt/lt X, C'
This patch was supposed to just be an efficiency improvement because we were doing this 3-step process to fold:
IC: Visiting: %c = icmp ugt i4 %s, 1
IC: ADD: %s = lshr i4 %x, 1
IC: ADD: %1 = udiv i4 %x, 2
IC: Old = %c = icmp ugt i4 %1, 1
New = <badref> = icmp uge i4 %x, 4
IC: ADD: %c = icmp uge i4 %x, 4
IC: ERASE %2 = icmp ugt i4 %1, 1
IC: Visiting: %c = icmp uge i4 %x, 4
IC: Old = %c = icmp uge i4 %x, 4
New = <badref> = icmp ugt i4 %x, 3
IC: ADD: %c = icmp ugt i4 %x, 3
IC: ERASE %2 = icmp uge i4 %x, 4
IC: Visiting: %c = icmp ugt i4 %x, 3
IC: DCE: %1 = udiv i4 %x, 2
IC: ERASE %1 = udiv i4 %x, 2
IC: DCE: %s = lshr i4 %x, 1
IC: ERASE %s = lshr i4 %x, 1
IC: Visiting: ret i1 %c
When we could go directly to canonical icmp form:
IC: Visiting: %c = icmp ugt i4 %s, 1
IC: Old = %c = icmp ugt i4 %s, 1
New = <badref> = icmp ugt i4 %x, 3
IC: ADD: %c = icmp ugt i4 %x, 3
IC: ERASE %1 = icmp ugt i4 %s, 1
IC: ADD: %s = lshr i4 %x, 1
IC: DCE: %s = lshr i4 %x, 1
IC: ERASE %s = lshr i4 %x, 1
IC: Visiting: %c = icmp ugt i4 %x, 3
...but then I noticed that the folds were incomplete too:
https://godbolt.org/g/aB2hLE
Here are attempts to prove the logic with Alive:
https://rise4fun.com/Alive/92o
Name: lshr_ult
Pre: ((C2 << C1) u>> C1) == C2
%sh = lshr i8 %x, C1
%r = icmp ult i8 %sh, C2
=>
%r = icmp ult i8 %x, (C2 << C1)
Name: ashr_slt
Pre: ((C2 << C1) >> C1) == C2
%sh = ashr i8 %x, C1
%r = icmp slt i8 %sh, C2
=>
%r = icmp slt i8 %x, (C2 << C1)
Name: lshr_ugt
Pre: (((C2+1) << C1) u>> C1) == (C2+1)
%sh = lshr i8 %x, C1
%r = icmp ugt i8 %sh, C2
=>
%r = icmp ugt i8 %x, ((C2+1) << C1) - 1
Name: ashr_sgt
Pre: (C2 != 127) && ((C2+1) << C1 != -128) && (((C2+1) << C1) >> C1) == (C2+1)
%sh = ashr i8 %x, C1
%r = icmp sgt i8 %sh, C2
=>
%r = icmp sgt i8 %x, ((C2+1) << C1) - 1
Name: ashr_exact_sgt
Pre: ((C2 << C1) >> C1) == C2
%sh = ashr exact i8 %x, C1
%r = icmp sgt i8 %sh, C2
=>
%r = icmp sgt i8 %x, (C2 << C1)
Name: ashr_exact_slt
Pre: ((C2 << C1) >> C1) == C2
%sh = ashr exact i8 %x, C1
%r = icmp slt i8 %sh, C2
=>
%r = icmp slt i8 %x, (C2 << C1)
Name: lshr_exact_ugt
Pre: ((C2 << C1) u>> C1) == C2
%sh = lshr exact i8 %x, C1
%r = icmp ugt i8 %sh, C2
=>
%r = icmp ugt i8 %x, (C2 << C1)
Name: lshr_exact_ult
Pre: ((C2 << C1) u>> C1) == C2
%sh = lshr exact i8 %x, C1
%r = icmp ult i8 %sh, C2
=>
%r = icmp ult i8 %x, (C2 << C1)
We did something similar for 'shl' in D28406.
Differential Revision: https://reviews.llvm.org/D38514
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=315021&r1=315020&r2=315021&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Thu Oct 5 14:11:49 2017
@@ -2011,43 +2011,46 @@ Instruction *InstCombiner::foldICmpShrCo
return nullptr;
bool IsAShr = Shr->getOpcode() == Instruction::AShr;
- if (!Cmp.isEquality()) {
- // If we have an unsigned comparison and an ashr, we can't simplify this.
- // Similarly for signed comparisons with lshr.
- if (Cmp.isSigned() != IsAShr)
- return nullptr;
-
- // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
- // by a power of 2. Since we already have logic to simplify these,
- // transform to div and then simplify the resultant comparison.
- if (IsAShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1))
- return nullptr;
-
- // Revisit the shift (to delete it).
- Worklist.Add(Shr);
-
- Constant *DivCst = ConstantInt::get(
- Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
-
- Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact())
- : Builder.CreateUDiv(X, DivCst, "", Shr->isExact());
-
- Cmp.setOperand(0, Tmp);
-
- // If the builder folded the binop, just return it.
- BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
- if (!TheDiv)
- return &Cmp;
-
- // Otherwise, fold this div/compare.
- assert(TheDiv->getOpcode() == Instruction::SDiv ||
- TheDiv->getOpcode() == Instruction::UDiv);
-
- Instruction *Res = foldICmpDivConstant(Cmp, TheDiv, C);
- assert(Res && "This div/cst should have folded!");
- return Res;
+ bool IsExact = Shr->isExact();
+ Type *ShrTy = Shr->getType();
+ // TODO: If we could guarantee that InstSimplify would handle all of the
+ // constant-value-based preconditions in the folds below, then we could assert
+ // those conditions rather than checking them. This is difficult because of
+ // undef/poison (PR34838).
+ if (IsAShr) {
+ if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
+ // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
+ // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
+ APInt ShiftedC = C.shl(ShAmtVal);
+ if (ShiftedC.ashr(ShAmtVal) == C)
+ return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+ }
+ if (Pred == CmpInst::ICMP_SGT) {
+ // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
+ APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
+ if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
+ (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
+ return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+ }
+ } else {
+ if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
+ // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
+ // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
+ APInt ShiftedC = C.shl(ShAmtVal);
+ if (ShiftedC.lshr(ShAmtVal) == C)
+ return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+ }
+ if (Pred == CmpInst::ICMP_UGT) {
+ // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
+ APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
+ if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
+ return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+ }
}
+ if (!Cmp.isEquality())
+ return nullptr;
+
// Handle equality comparisons of shift-by-constant.
// If the comparison constant changes with the shift, the comparison cannot
@@ -2060,14 +2063,14 @@ Instruction *InstCombiner::foldICmpShrCo
// Check if the bits shifted out are known to be zero. If so, we can compare
// against the unshifted value:
// (X & 4) >> 1 == 2 --> (X & 4) == 4.
- Constant *ShiftedCmpRHS = ConstantInt::get(Shr->getType(), C << ShAmtVal);
+ Constant *ShiftedCmpRHS = ConstantInt::get(ShrTy, C << ShAmtVal);
if (Shr->hasOneUse()) {
if (Shr->isExact())
return new ICmpInst(Pred, X, ShiftedCmpRHS);
// Otherwise strength reduce the shift into an 'and'.
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
- Constant *Mask = ConstantInt::get(Shr->getType(), Val);
+ Constant *Mask = ConstantInt::get(ShrTy, Val);
Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
return new ICmpInst(Pred, And, ShiftedCmpRHS);
}
Modified: llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll?rev=315021&r1=315020&r2=315021&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/icmp-shr-lt-gt.ll Thu Oct 5 14:11:49 2017
@@ -888,8 +888,7 @@ define i1 @lshrult_03_15(i4 %x) {
define i1 @ashrsgt_01_00(i4 %x) {
; CHECK-LABEL: @ashrsgt_01_00(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 0
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 1
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -899,8 +898,7 @@ define i1 @ashrsgt_01_00(i4 %x) {
define i1 @ashrsgt_01_01(i4 %x) {
; CHECK-LABEL: @ashrsgt_01_01(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 3
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -910,8 +908,7 @@ define i1 @ashrsgt_01_01(i4 %x) {
define i1 @ashrsgt_01_02(i4 %x) {
; CHECK-LABEL: @ashrsgt_01_02(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 2
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 5
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1002,8 +999,7 @@ define i1 @ashrsgt_01_11(i4 %x) {
define i1 @ashrsgt_01_12(i4 %x) {
; CHECK-LABEL: @ashrsgt_01_12(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], -4
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -7
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1013,8 +1009,7 @@ define i1 @ashrsgt_01_12(i4 %x) {
define i1 @ashrsgt_01_13(i4 %x) {
; CHECK-LABEL: @ashrsgt_01_13(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], -3
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -5
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1024,8 +1019,7 @@ define i1 @ashrsgt_01_13(i4 %x) {
define i1 @ashrsgt_01_14(i4 %x) {
; CHECK-LABEL: @ashrsgt_01_14(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], -2
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -3
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1045,8 +1039,7 @@ define i1 @ashrsgt_01_15(i4 %x) {
define i1 @ashrsgt_02_00(i4 %x) {
; CHECK-LABEL: @ashrsgt_02_00(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 2
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 0
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, 3
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 2
@@ -1173,8 +1166,7 @@ define i1 @ashrsgt_02_13(i4 %x) {
define i1 @ashrsgt_02_14(i4 %x) {
; CHECK-LABEL: @ashrsgt_02_14(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 2
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], -2
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 %x, -5
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 2
@@ -1349,8 +1341,7 @@ define i1 @ashrslt_01_00(i4 %x) {
define i1 @ashrslt_01_01(i4 %x) {
; CHECK-LABEL: @ashrslt_01_01(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 2
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1360,8 +1351,7 @@ define i1 @ashrslt_01_01(i4 %x) {
define i1 @ashrslt_01_02(i4 %x) {
; CHECK-LABEL: @ashrslt_01_02(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], 2
+; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 4
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1371,8 +1361,7 @@ define i1 @ashrslt_01_02(i4 %x) {
define i1 @ashrslt_01_03(i4 %x) {
; CHECK-LABEL: @ashrslt_01_03(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], 3
+; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 6
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1463,8 +1452,7 @@ define i1 @ashrslt_01_12(i4 %x) {
define i1 @ashrslt_01_13(i4 %x) {
; CHECK-LABEL: @ashrslt_01_13(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], -3
+; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -6
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1474,8 +1462,7 @@ define i1 @ashrslt_01_13(i4 %x) {
define i1 @ashrslt_01_14(i4 %x) {
; CHECK-LABEL: @ashrslt_01_14(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], -2
+; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -4
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1485,8 +1472,7 @@ define i1 @ashrslt_01_14(i4 %x) {
define i1 @ashrslt_01_15(i4 %x) {
; CHECK-LABEL: @ashrslt_01_15(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 1
-; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], -1
+; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -2
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 1
@@ -1506,8 +1492,7 @@ define i1 @ashrslt_02_00(i4 %x) {
define i1 @ashrslt_02_01(i4 %x) {
; CHECK-LABEL: @ashrslt_02_01(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 2
-; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, 4
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 2
@@ -1634,8 +1619,7 @@ define i1 @ashrslt_02_14(i4 %x) {
define i1 @ashrslt_02_15(i4 %x) {
; CHECK-LABEL: @ashrslt_02_15(
-; CHECK-NEXT: [[S:%.*]] = ashr i4 %x, 2
-; CHECK-NEXT: [[C:%.*]] = icmp slt i4 [[S]], -1
+; CHECK-NEXT: [[C:%.*]] = icmp slt i4 %x, -4
; CHECK-NEXT: ret i1 [[C]]
;
%s = ashr i4 %x, 2
More information about the llvm-commits
mailing list