[llvm] 4abab5c - [InstCombine] generalize canonicalization of masked equality comparisons

Sat Apr 25 08:36:49 PDT 2020

Author: Sanjay Patel
Date: 2020-04-25T11:31:57-04:00
New Revision: 4abab5c5ca7b562b80fdb5fb6279e6d2104dae16

URL: https://github.com/llvm/llvm-project/commit/4abab5c5ca7b562b80fdb5fb6279e6d2104dae16
DIFF: https://github.com/llvm/llvm-project/commit/4abab5c5ca7b562b80fdb5fb6279e6d2104dae16.diff

LOG: [InstCombine] generalize canonicalization of masked equality comparisons

  (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC
  (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC

We have more analyis for 'and' patterns and already lean this way
in the existing code, so this should be neutral or better in IR.

If this does not do as well in codegen, the problem already exists
and we should fix that based on target costs/heuristics.

http://volta.cs.utah.edu:8080/z/oP3ecL

define void @src(i8 %x, i8 %OrC, i8 %C, i1* %p0, i1* %p1) {
  %or = or i8 %x, %OrC
  %eq = icmp eq i8 %or, %C
  store i1 %eq, i1* %p0

  %ne = icmp ne i8 %or, %C
  store i1 %ne, i1* %p1
  ret void
}

define void @tgt(i8 %x, i8 %OrC, i8 %C, i1* %p0, i1* %p1) {
  %NotOrC = xor i8 %OrC, -1
  %a = and i8 %x, %NotOrC
  %NewC = xor i8 %C, %OrC
  %eq = icmp eq i8 %a, %NewC
  store i1 %eq, i1* %p0

  %ne = icmp ne i8 %a, %NewC
  store i1 %ne, i1* %p1
  ret void
}

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/and-or-icmps.ll
    llvm/test/Transforms/InstCombine/assume2.ll
    llvm/test/Transforms/InstCombine/icmp-or.ll
    llvm/test/Transforms/InstCombine/icmp.ll
    llvm/test/Transforms/InstCombine/load-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 37ed6a0e3888..3a9a46f8ca52 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1886,20 +1886,24 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
   }
 
   Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1);
-  if (Cmp.isEquality() && Cmp.getOperand(1) == OrOp1) {
-    // X | C == C --> X <=u C
-    // X | C != C --> X  >u C
-    //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
-    if ((C + 1).isPowerOf2()) {
+  const APInt *MaskC;
+  if (match(OrOp1, m_APInt(MaskC)) && Cmp.isEquality()) {
+    if (*MaskC == C && (C + 1).isPowerOf2()) {
+      // X | C == C --> X <=u C
+      // X | C != C --> X  >u C
+      //   iff C+1 is a power of 2 (C is a bitmask of the low bits)
       Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
       return new ICmpInst(Pred, OrOp0, OrOp1);
     }
-    // More general: are all bits outside of a mask constant set or not set?
-    // X | C == C --> (X & ~C) == 0
-    // X | C != C --> (X & ~C) != 0
+
+    // More general: canonicalize 'equality with set bits mask' to
+    // 'equality with clear bits mask'.
+    // (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC
+    // (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC
     if (Or->hasOneUse()) {
-      Value *A = Builder.CreateAnd(OrOp0, ~C);
-      return new ICmpInst(Pred, A, ConstantInt::getNullValue(OrOp0->getType()));
+      Value *And = Builder.CreateAnd(OrOp0, ~(*MaskC));
+      Constant *NewC = ConstantInt::get(Or->getType(), C ^ (*MaskC));
+      return new ICmpInst(Pred, And, NewC);
     }
   }
 

diff  --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index cf822751ae34..56d5b04c50c7 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -43,8 +43,8 @@ define i1 @PR2330(i32 %a, i32 %b) {
 
 define i1 @or_eq_with_one_bit_
diff _constants1(i32 %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_
diff _constants1(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 50
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %cmp1 = icmp eq i32 %x, 50
@@ -57,8 +57,8 @@ define i1 @or_eq_with_one_bit_
diff _constants1(i32 %x) {
 
 define i1 @and_ne_with_one_bit_
diff _constants1(i32 %x) {
 ; CHECK-LABEL: @and_ne_with_one_bit_
diff _constants1(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 51
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 50
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %cmp1 = icmp ne i32 %x, 51
@@ -71,8 +71,8 @@ define i1 @and_ne_with_one_bit_
diff _constants1(i32 %x) {
 
 define i1 @or_eq_with_one_bit_
diff _constants2(i32 %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_
diff _constants2(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 32
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -33
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 65
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %cmp1 = icmp eq i32 %x, 97
@@ -83,8 +83,8 @@ define i1 @or_eq_with_one_bit_
diff _constants2(i32 %x) {
 
 define i1 @and_ne_with_one_bit_
diff _constants2(i19 %x) {
 ; CHECK-LABEL: @and_ne_with_one_bit_
diff _constants2(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i19 [[X:%.*]], 128
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 193
+; CHECK-NEXT:    [[TMP1:%.*]] = and i19 [[X:%.*]], -129
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 65
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %cmp1 = icmp ne i19 %x, 65
@@ -97,8 +97,8 @@ define i1 @and_ne_with_one_bit_
diff _constants2(i19 %x) {
 
 define i1 @or_eq_with_one_bit_
diff _constants3(i8 %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_
diff _constants3(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], -128
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], -2
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 126
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %cmp1 = icmp eq i8 %x, 254
@@ -109,8 +109,8 @@ define i1 @or_eq_with_one_bit_
diff _constants3(i8 %x) {
 
 define i1 @and_ne_with_one_bit_
diff _constants3(i8 %x) {
 ; CHECK-LABEL: @and_ne_with_one_bit_
diff _constants3(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], -128
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], -63
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], 65
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %cmp1 = icmp ne i8 %x, 65
@@ -179,8 +179,8 @@ define i1 @and_ne_with_
diff _one_signed(i64 %x) {
 
 define <2 x i1> @or_eq_with_one_bit_
diff _constants2_splatvec(<2 x i32> %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_
diff _constants2_splatvec(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 32, i32 32>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 97, i32 97>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -33, i32 -33>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 65, i32 65>
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %cmp1 = icmp eq <2 x i32> %x, <i32 97, i32 97>

diff  --git a/llvm/test/Transforms/InstCombine/assume2.ll b/llvm/test/Transforms/InstCombine/assume2.ll
index 8dc8831fffa5..964b91aa042e 100644
--- a/llvm/test/Transforms/InstCombine/assume2.ll
+++ b/llvm/test/Transforms/InstCombine/assume2.ll
@@ -36,8 +36,8 @@ define i32 @test2(i32 %a) #0 {
 
 define i32 @test3(i32 %a) #0 {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[V:%.*]] = or i32 [[A:%.*]], -16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], -11
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 5
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    ret i32 5
 ;
@@ -50,8 +50,8 @@ define i32 @test3(i32 %a) #0 {
 
 define i32 @test4(i32 %a) #0 {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[V:%.*]] = or i32 [[A:%.*]], -16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], -6
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 10
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    ret i32 2
 ;

diff  --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll
index 5fd19edc2491..ac2274e86724 100644
--- a/llvm/test/Transforms/InstCombine/icmp-or.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-or.ll
@@ -3,8 +3,8 @@
 
 define i1 @set_low_bit_mask_eq(i8 %x) {
 ; CHECK-LABEL: @set_low_bit_mask_eq(
-; CHECK-NEXT:    [[SUB:%.*]] = or i8 [[X:%.*]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[SUB]], 19
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP1]], 18
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %sub = or i8 %x, 1
@@ -14,8 +14,8 @@ define i1 @set_low_bit_mask_eq(i8 %x) {
 
 define <2 x i1> @set_low_bit_mask_ne(<2 x i8> %x) {
 ; CHECK-LABEL: @set_low_bit_mask_ne(
-; CHECK-NEXT:    [[SUB:%.*]] = or <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[SUB]], <i8 19, i8 19>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -4>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[TMP1]], <i8 16, i8 16>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = or <2 x i8> %x, <i8 3, i8 3>

diff  --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 4786c67375f6..67d254a3fe52 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -2163,8 +2163,8 @@ define <2 x i1> @icmp_add_X_-14_ult_2_vec(<2 x i32> %X) {
 
 define i1 @icmp_sub_3_X_ult_2(i32 %X) {
 ; CHECK-LABEL: @icmp_sub_3_X_ult_2(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %add = sub i32 3, %X
@@ -2174,8 +2174,8 @@ define i1 @icmp_sub_3_X_ult_2(i32 %X) {
 
 define <2 x i1> @icmp_sub_3_X_ult_2_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_sub_3_X_ult_2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 2, i32 2>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = sub <2 x i32> <i32 3, i32 3>, %X
@@ -2207,8 +2207,8 @@ define <2 x i1> @icmp_add_X_-14_uge_2_vec(<2 x i32> %X) {
 
 define i1 @icmp_sub_3_X_uge_2(i32 %X) {
 ; CHECK-LABEL: @icmp_sub_3_X_uge_2(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP1]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %add = sub i32 3, %X
@@ -2218,8 +2218,8 @@ define i1 @icmp_sub_3_X_uge_2(i32 %X) {
 
 define <2 x i1> @icmp_sub_3_X_uge_2_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_sub_3_X_uge_2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 2, i32 2>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = sub <2 x i32> <i32 3, i32 3>, %X

diff  --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 7b24ce309a98..84f692dd4595 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -156,8 +156,8 @@ define i1 @test7(i32 %X) {
 
 define i1 @test8(i32 %X) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 8
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X