[llvm] 727e642 - [InstCombine] generalize fold for mask-with-signbit-splat

Fri Oct 15 13:31:55 PDT 2021

Author: Sanjay Patel
Date: 2021-10-15T16:25:48-04:00
New Revision: 727e642e970d028049d95a8af89a679a61868f15

URL: https://github.com/llvm/llvm-project/commit/727e642e970d028049d95a8af89a679a61868f15
DIFF: https://github.com/llvm/llvm-project/commit/727e642e970d028049d95a8af89a679a61868f15.diff

LOG: [InstCombine] generalize fold for mask-with-signbit-splat

(iN X s>> (N-1)) & Y --> (X < 0) ? Y : 0

https://alive2.llvm.org/ce/z/qeYhdz

I was looking at a missing abs() transform and found my way to this
generalization of an existing fold that was added with D67799.
As discussed in that review, we want to make sure codegen handles
this difference well, and for all of the targets/types that I
spot-checked, it looks good.

I am leaving the existing fold in place in this commit because
it covers a potentially missing icmp fold, but I plan to remove
that as a follow-up commit as suggested during review.

Differential Revision: https://reviews.llvm.org/D111410

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/and.ll
    llvm/test/Transforms/InstCombine/icmp.ll
    llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
    llvm/test/Transforms/InstCombine/mul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3d35399b8fc68..d7ddc6b133290 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2062,14 +2062,24 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
     return SelectInst::Create(A, Op0, Constant::getNullValue(Ty));
 
   // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0.
-  if (match(&I, m_c_And(m_OneUse(m_AShr(
-                            m_NSWSub(m_Value(Y), m_Value(X)),
-                            m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
+  // TODO: This is a specific case of the more general pattern below, so it
+  //       should be removed.
+  unsigned FullShift = Ty->getScalarSizeInBits() - 1;
+  if (match(&I, m_c_And(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)),
+                                        m_SpecificInt(FullShift))),
                         m_Deferred(X)))) {
     Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
     return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty));
   }
 
+  // (iN X s>> (N-1)) & Y --> (X < 0) ? Y : 0
+  if (match(&I, m_c_And(m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))),
+                        m_Value(Y)))) {
+    Constant *Zero = ConstantInt::getNullValue(Ty);
+    Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
+    return SelectInst::Create(Cmp, Y, Zero);
+  }
+
   // (~x) & y  -->  ~(x | (~y))  iff that gets rid of inversions
   if (sinkNotIntoOtherHandOfAndOrOr(I))
     return &I;

diff  --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index 2ce631885bef1..2b8a214b86afe 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1403,8 +1403,8 @@ define <2 x i8> @flip_masked_bit_nonuniform(<2 x i8> %A) {
 
 define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_bitwidth_mask(
-; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
-; CHECK-NEXT:    [[NEG_OR_ZERO:%.*]] = and i8 [[SIGN]], [[Y:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG_OR_ZERO:%.*]] = select i1 [[ISNEG]], i8 [[Y:%.*]], i8 0
 ; CHECK-NEXT:    ret i8 [[NEG_OR_ZERO]]
 ;
   %sign = ashr i8 %x, 7
@@ -1415,8 +1415,8 @@ define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) {
 define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
 ; CHECK-LABEL: @ashr_bitwidth_mask_vec_commute(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2>
-; CHECK-NEXT:    [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
-; CHECK-NEXT:    [[NEG_OR_ZERO:%.*]] = and <2 x i8> [[Y]], [[SIGN]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG_OR_ZERO:%.*]] = select <2 x i1> [[ISNEG]], <2 x i8> [[Y]], <2 x i8> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i8> [[NEG_OR_ZERO]]
 ;
   %y = mul <2 x i8> %py, <i8 42, i8 2>      ; thwart complexity-based ordering
@@ -1425,6 +1425,8 @@ define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
   ret <2 x i8> %neg_or_zero
 }
 
+; negative test - extra use
+
 define i8 @ashr_bitwidth_mask_use(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_bitwidth_mask_use(
 ; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
@@ -1438,6 +1440,8 @@ define i8 @ashr_bitwidth_mask_use(i8 %x, i8 %y) {
   ret i8 %r
 }
 
+; negative test - wrong shift amount
+
 define i8 @ashr_not_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_not_bitwidth_mask(
 ; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 6
@@ -1449,6 +1453,8 @@ define i8 @ashr_not_bitwidth_mask(i8 %x, i8 %y) {
   ret i8 %r
 }
 
+; negative test - wrong shift opcode
+
 define i8 @lshr_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @lshr_bitwidth_mask(
 ; CHECK-NEXT:    [[SIGN:%.*]] = lshr i8 [[X:%.*]], 7

diff  --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 598d8b882bfb1..3122743eee065 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -100,8 +100,8 @@ define <2 x i1> @test5_zero() {
 
 define i32 @test6(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[A_LOBIT_NEG:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[F:%.*]] = and i32 [[A_LOBIT_NEG]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[F:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[F]]
 ;
   %c = icmp sle i32 %a, -1

diff  --git a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
index 7d2038ece44d0..6eee8ec5440bf 100644
--- a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
@@ -45,8 +45,8 @@ define i32 @neg(i32 %i) {
 
 define i32 @test10(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %c = icmp slt i32 %a, 0
@@ -57,8 +57,8 @@ define i32 @test10(i32 %a, i32 %b) {
 
 define i32 @test11(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %c = icmp sle i32 %a, -1
@@ -72,8 +72,8 @@ declare void @use32(i32)
 define i32 @test12(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:    [[A_LOBIT:%.*]] = lshr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[A_LOBIT]])
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -310,8 +310,8 @@ define i32 @mul_bools_mixed_ext_use3(i1 %x, i1 %y) {
 
 define i32 @signbit_mul(i32 %a, i32 %b) {
 ; CHECK-LABEL: @signbit_mul(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %d = lshr i32 %a, 31
@@ -322,8 +322,8 @@ define i32 @signbit_mul(i32 %a, i32 %b) {
 define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
 ; CHECK-LABEL: @signbit_mul_commute_extra_use(
 ; CHECK-NEXT:    [[D:%.*]] = lshr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[D]])
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -337,8 +337,8 @@ define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
 
 define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @signbit_mul_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
   %d = lshr <2 x i32> %a, <i32 31, i32 31>
@@ -348,8 +348,8 @@ define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
 
 define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @signbit_mul_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
   %d = lshr <2 x i32> %a, <i32 31, i32 31>

diff  --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index 3901f58791508..76dc39598dc7f 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -45,8 +45,8 @@ define i32 @neg(i32 %i) {
 
 define i32 @test10(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %c = icmp slt i32 %a, 0
@@ -57,8 +57,8 @@ define i32 @test10(i32 %a, i32 %b) {
 
 define i32 @test11(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %c = icmp sle i32 %a, -1
@@ -72,8 +72,8 @@ declare void @use32(i32)
 define i32 @test12(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:    [[A_LOBIT:%.*]] = lshr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[A_LOBIT]])
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -376,12 +376,12 @@ define i32 @mul_bools_mixed_ext_use3(i1 %x, i1 %y) {
   ret i32 %r
 }
 
-; (A >>u 31) * B --> (A >>s 31) & B
+; (A >>u 31) * B --> (A >>s 31) & B --> A < 0 ? B : 0
 
 define i32 @signbit_mul(i32 %a, i32 %b) {
 ; CHECK-LABEL: @signbit_mul(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
   %d = lshr i32 %a, 31
@@ -392,8 +392,8 @@ define i32 @signbit_mul(i32 %a, i32 %b) {
 define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
 ; CHECK-LABEL: @signbit_mul_commute_extra_use(
 ; CHECK-NEXT:    [[D:%.*]] = lshr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[D]])
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -403,12 +403,12 @@ define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
   ret i32 %e
 }
 
-; (A >>u 31)) * B --> (A >>s 31) & B
+; (A >>u 31)) * B --> (A >>s 31) & B --> A < 0 ? B : 0
 
 define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @signbit_mul_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
   %d = lshr <2 x i32> %a, <i32 31, i32 31>
@@ -418,8 +418,8 @@ define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
 
 define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @signbit_mul_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
   %d = lshr <2 x i32> %a, <i32 31, i32 31>