[PATCH] D38521: [InstCombine] Improve support for ashr in foldICmpAndShift

Tue Oct 3 17:02:14 PDT 2017

craig.topper created this revision.

We can support ashr similar to lshr, if we know that none of the shifted in bits are used. In that case SimplifyDemandedBits would normally convert it to lshr. But that conversion doesn't happen if the shift has additional users.

Here are the alive tests I used to prove this

  Name: test1                                                                                                                                                                                                                                              Pre: ((C3 << C1) u>> C1) == C3 && ((C2 << C1) u>> C1) == C2
  %a = ashr %x, C1                                                                                                                                                                                                                                         %b = and %a, C2
  %c = icmp eq %b, C3                                                                                                                                                                                                                                      =>
  %d = and %x, C2 << C1                                                                                                                                                                                                                                    %c = icmp eq %d, C3 << C1
  
  Name: test2
  Pre: ((C3 << C1) u>> C1) == C3 && ((C2 << C1) u>> C1) == C2
  %a = ashr %x, C1
  %b = and %a, C2
  %c = icmp ult %b, C3
  =>
  %d = and %x, C2 << C1
  %c = icmp ult %d, C3 << C1
  
  Name: test3
  Pre: ((C3 << C1) u>> C1) == C3 && ((C2 << C1) u>> C1) == C2
  %a = ashr %x, C1
  %b = and %a, C2
  %c = icmp ugt %b, C3
  =>
  %d = and %x, C2 << C1
  %c = icmp ugt %d, C3 << C1
  
  Name: test4
  Pre: ((C3 << C1) u>> C1) == C3 && ((C2 << C1) u>> C1) == C2
  %a = ashr %x, C1
  %b = and %a, C2
  %c = icmp uge %b, C3
  =>
  %d = and %x, C2 << C1
  %c = icmp uge %d, C3 << C1
  
  Name: test5
  Pre: ((C3 << C1) u>> C1) == C3 && ((C2 << C1) u>> C1) == C2
  %a = ashr %x, C1
  %b = and %a, C2
  %c = icmp ule %b, C3
  =>
  %d = and %x, C2 << C1
  %c = icmp ule %d, C3 << C1
  
  Name: test6
  Pre: ((C3 << C1) u>> C1) == C3 && ((C2 << C1) u>> C1) == C2
  %a = ashr %x, C1
  %b = and %a, C2
  %c = icmp ne %b, C3
  =>
  %d = and %x, C2 << C1
  %c = icmp ne %d, C3 << C1
  
  Name: test7
  Pre: ((C1 << C3) u>> C3) == C1 && ((C2 << C3) u>> C3) == C2 && (C2 << C3) >= 0 && (C1 << C3) >= 0                                                                                                                                                        %a = ashr %x, C3
  %b = and %a, C2                                                                                                                                                                                                                                          %c = icmp sgt %b, C1
  =>                                                                                                                                                                                                                                                       %d = and %x, C2 << C3
  %c = icmp sgt %d, C1 << C3
  
  Name: test8
  Pre: ((C1 << C3) u>> C3) == C1 && ((C2 << C3) u>> C3) == C2 && (C2 << C3) >= 0 && (C1 << C3) >= 0
  %a = ashr %x, C3
  %b = and %a, C2
  %c = icmp slt %b, C1
  =>
  %d = and %x, C2 << C3
  %c = icmp slt %d, C1 << C3
  `


https://reviews.llvm.org/D38521

Files:
  lib/Transforms/InstCombine/InstCombineCompares.cpp
  test/Transforms/InstCombine/icmp.ll


Index: test/Transforms/InstCombine/icmp.ll
===================================================================

--- test/Transforms/InstCombine/icmp.ll
+++ test/Transforms/InstCombine/icmp.ll
@@ -1634,6 +1634,50 @@
   ret i1 %and3
 }
 
+; Variation of the above with an ashr
+define i1 @icmp_and_ashr_multiuse(i32 %X) {
+; CHECK-LABEL: @icmp_and_ashr_multiuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 240
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[X]], 496
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[AND2]], 432
+; CHECK-NEXT:    [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]]
+; CHECK-NEXT:    ret i1 [[AND3]]
+;
+  %shr = ashr i32 %X, 4
+  %and = and i32 %shr, 15
+  %and2 = and i32 %shr, 31 ; second use of the shift
+  %tobool = icmp ne i32 %and, 14
+  %tobool2 = icmp ne i32 %and2, 27
+  %and3 = and i1 %tobool, %tobool2
+  ret i1 %and3
+}
+
+define i1 @icmp_lshr_and_overshift(i8 %X) {
+; CHECK-LABEL: @icmp_lshr_and_overshift(
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ugt i8 [[X:%.*]], 31
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = lshr i8 %X, 5
+  %and = and i8 %shr, 15
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; We shouldn't simplify this because the and uses bits that are shifted in.
+define i1 @icmp_ashr_and_overshift(i8 %X) {
+; CHECK-LABEL: @icmp_ashr_and_overshift(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[SHR]], 15
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = ashr i8 %X, 5
+  %and = and i8 %shr, 15
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
 ; PR16244
 define i1 @test71(i8* %x) {
 ; CHECK-LABEL: @test71(
Index: lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1524,25 +1524,28 @@
   const APInt *C3;
   if (match(Shift->getOperand(1), m_APInt(C3))) {
     bool CanFold = false;
-    if (ShiftOpcode == Instruction::AShr) {
-      // There may be some constraints that make this possible, but nothing
-      // simple has been discovered yet.
-      CanFold = false;
-    } else if (ShiftOpcode == Instruction::Shl) {
+    if (ShiftOpcode == Instruction::Shl) {
       // For a left shift, we can fold if the comparison is not signed. We can
       // also fold a signed comparison if the mask value and comparison value
       // are not negative. These constraints may not be obvious, but we can
       // prove that they are correct using an SMT solver.
       if (!Cmp.isSigned() || (!C2.isNegative() && !C1.isNegative()))
         CanFold = true;
-    } else if (ShiftOpcode == Instruction::LShr) {
+    } else {
+      bool IsAshr = ShiftOpcode == Instruction::AShr;
       // For a logical right shift, we can fold if the comparison is not signed.
       // We can also fold a signed comparison if the shifted mask value and the
       // shifted comparison value are not negative. These constraints may not be
       // obvious, but we can prove that they are correct using an SMT solver.
-      if (!Cmp.isSigned() ||
-          (!C2.shl(*C3).isNegative() && !C1.shl(*C3).isNegative()))
-        CanFold = true;
+      // For an arithmetic shift right we can do the same, if we we ensure
+      // the And doesn't use any bits being shifted in. Normally these would
+      // turned in lshr by SimplifyDemandedBits, but not if there is an
+      // additional user.
+      if (!IsAshr || (C2.shl(*C3).lshr(*C3) == C2)) {
+        if (!Cmp.isSigned() ||
+            (!C2.shl(*C3).isNegative() && !C1.shl(*C3).isNegative()))
+          CanFold = true;
+      }
     }
 
     if (CanFold) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D38521.117600.patch
Type: text/x-patch
Size: 3826 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171004/010d981e/attachment.bin>