[llvm] 55991b4 - [InstCombine] foldAndOrOfICmpsOfAndWithPow2 - add vector support

Fri Oct 16 02:44:42 PDT 2020

Author: Simon Pilgrim
Date: 2020-10-16T10:41:40+01:00
New Revision: 55991b44b7f96a0aaa33ac53fc229302ca8d5d02

URL: https://github.com/llvm/llvm-project/commit/55991b44b7f96a0aaa33ac53fc229302ca8d5d02
DIFF: https://github.com/llvm/llvm-project/commit/55991b44b7f96a0aaa33ac53fc229302ca8d5d02.diff

LOG: [InstCombine] foldAndOrOfICmpsOfAndWithPow2 - add vector support

Support vector cases for folding:

 (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
 (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/onehot_merge.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 7960848fd9c6..fe585e43cf3d 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -855,10 +855,7 @@ Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS,
   if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ)
     return nullptr;
 
-  // TODO support vector splats
-  if (!match(LHS->getOperand(1), m_ConstantInt()) ||
-      !match(RHS->getOperand(1), m_ConstantInt()) ||
-      !match(LHS->getOperand(1), m_Zero()) ||
+  if (!match(LHS->getOperand(1), m_Zero()) ||
       !match(RHS->getOperand(1), m_Zero()))
     return nullptr;
 

diff  --git a/llvm/test/Transforms/InstCombine/onehot_merge.ll b/llvm/test/Transforms/InstCombine/onehot_merge.ll
index 3d40d4b8c008..d98361f1b5f6 100644
--- a/llvm/test/Transforms/InstCombine/onehot_merge.ll
+++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll
@@ -17,12 +17,9 @@ define i1 @and_consts(i32 %k, i32 %c1, i32 %c2) {
 
 define <2 x i1> @and_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @and_consts_vector(
-; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[K:%.*]], <i32 4, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T1]], zeroinitializer
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[K]], <i32 8, i32 8>
-; CHECK-NEXT:    [[T6:%.*]] = icmp eq <2 x i32> [[T5]], zeroinitializer
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i1> [[T2]], [[T6]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], <i32 12, i32 12>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 12, i32 12>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %t1 = and <2 x i32> <i32 4, i32 4>, %k
   %t2 = icmp eq <2 x i32> %t1, zeroinitializer
@@ -55,12 +52,10 @@ define <2 x i1> @foo1_and_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @foo1_and_vector(
 ; CHECK-NEXT:    [[T:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
 ; CHECK-NEXT:    [[T4:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C2:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[T]], [[K:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T1]], zeroinitializer
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], [[K]]
-; CHECK-NEXT:    [[T6:%.*]] = icmp eq <2 x i32> [[T5]], zeroinitializer
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i1> [[T2]], [[T6]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[K:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t = shl <2 x i32> <i32 1, i32 1>, %c1
   %t4 = shl <2 x i32> <i32 1, i32 1>, %c2
@@ -99,12 +94,10 @@ define <2 x i1> @foo1_and_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32>
 ; CHECK-NEXT:    [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]]
 ; CHECK-NEXT:    [[T:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
 ; CHECK-NEXT:    [[T4:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C2:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[K2]], [[T]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T1]], zeroinitializer
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], [[K2]]
-; CHECK-NEXT:    [[T6:%.*]] = icmp eq <2 x i32> [[T5]], zeroinitializer
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i1> [[T2]], [[T6]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[K2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %k2 = mul <2 x i32> %k, %k ; to trick the complexity sorting
   %t = shl <2 x i32> <i32 1, i32 1>, %c1
@@ -133,12 +126,9 @@ define i1 @or_consts(i32 %k, i32 %c1, i32 %c2) {
 
 define <2 x i1> @or_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @or_consts_vector(
-; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[K:%.*]], <i32 4, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = icmp ne <2 x i32> [[T1]], zeroinitializer
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[K]], <i32 8, i32 8>
-; CHECK-NEXT:    [[T6:%.*]] = icmp ne <2 x i32> [[T5]], zeroinitializer
-; CHECK-NEXT:    [[OR:%.*]] = and <2 x i1> [[T2]], [[T6]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], <i32 12, i32 12>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 12, i32 12>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %t1 = and <2 x i32> <i32 4, i32 4>, %k
   %t2 = icmp ne <2 x i32> %t1, zeroinitializer
@@ -171,12 +161,10 @@ define <2 x i1> @foo1_or_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @foo1_or_vector(
 ; CHECK-NEXT:    [[T:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
 ; CHECK-NEXT:    [[T4:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C2:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[T]], [[K:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp ne <2 x i32> [[T1]], zeroinitializer
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], [[K]]
-; CHECK-NEXT:    [[T6:%.*]] = icmp ne <2 x i32> [[T5]], zeroinitializer
-; CHECK-NEXT:    [[OR:%.*]] = and <2 x i1> [[T2]], [[T6]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[K:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t = shl <2 x i32> <i32 1, i32 1>, %c1
   %t4 = shl <2 x i32> <i32 1, i32 1>, %c2
@@ -215,12 +203,10 @@ define <2 x i1> @foo1_or_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32>
 ; CHECK-NEXT:    [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]]
 ; CHECK-NEXT:    [[T:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
 ; CHECK-NEXT:    [[T4:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C2:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[K2]], [[T]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp ne <2 x i32> [[T1]], zeroinitializer
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], [[K2]]
-; CHECK-NEXT:    [[T6:%.*]] = icmp ne <2 x i32> [[T5]], zeroinitializer
-; CHECK-NEXT:    [[OR:%.*]] = and <2 x i1> [[T2]], [[T6]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[K2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %k2 = mul <2 x i32> %k, %k ; to trick the complexity sorting
   %t = shl <2 x i32> <i32 1, i32 1>, %c1
@@ -256,12 +242,10 @@ define <2 x i1> @foo1_and_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x
 ; CHECK-LABEL: @foo1_and_signbit_lshr_vector(
 ; CHECK-NEXT:    [[T:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
 ; CHECK-NEXT:    [[T4:%.*]] = lshr <2 x i32> <i32 -2147483648, i32 -2147483648>, [[C2:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[T]], [[K:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T1]], zeroinitializer
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], [[K]]
-; CHECK-NEXT:    [[T6:%.*]] = icmp eq <2 x i32> [[T5]], zeroinitializer
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i1> [[T2]], [[T6]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[K:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t = shl <2 x i32> <i32 1, i32 1>, %c1
   %t4 = lshr <2 x i32> <i32 -2147483648, i32 -2147483648>, %c2
@@ -296,12 +280,10 @@ define <2 x i1> @foo1_or_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i
 ; CHECK-LABEL: @foo1_or_signbit_lshr_vector(
 ; CHECK-NEXT:    [[T:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
 ; CHECK-NEXT:    [[T4:%.*]] = lshr <2 x i32> <i32 -2147483648, i32 -2147483648>, [[C2:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[T]], [[K:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp ne <2 x i32> [[T1]], zeroinitializer
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], [[K]]
-; CHECK-NEXT:    [[T6:%.*]] = icmp ne <2 x i32> [[T5]], zeroinitializer
-; CHECK-NEXT:    [[OR:%.*]] = and <2 x i1> [[T2]], [[T6]]
-; CHECK-NEXT:    ret <2 x i1> [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[K:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t = shl <2 x i32> <i32 1, i32 1>, %c1
   %t4 = lshr <2 x i32> <i32 -2147483648, i32 -2147483648>, %c2