[llvm] fae9009 - [InstCombine] reduce demand-limited bool math to logic

Wed Mar 11 12:49:54 PDT 2020

Author: Sanjay Patel
Date: 2020-03-11T15:45:58-04:00
New Revision: fae900921b12ea04e43a5d010a094eb6f5c19243

URL: https://github.com/llvm/llvm-project/commit/fae900921b12ea04e43a5d010a094eb6f5c19243
DIFF: https://github.com/llvm/llvm-project/commit/fae900921b12ea04e43a5d010a094eb6f5c19243.diff

LOG: [InstCombine] reduce demand-limited bool math to logic

The cmp math test is inspired by memcmp() patterns seen in D75840.
I know there's at least 1 related fold we can do here if both
values are sext'd, but I'm not seeing a way to generalize further.

We have some other bool math patterns that we want to reduce, but
that might require fixing the bogus transforms noted in D72396.

Alive proof translations of the regression tests:
https://rise4fun.com/Alive/zGWi

  Name: demand add 1
  %xz = zext i1 %x to i32
  %ys = sext i1 %y to i32
  %sub = add i32 %xz, %ys
  %r = lshr i32 %sub, 31
  =>
  %notx = xor i1 %x, 1
  %and = and i1 %y, %notx
  %r = zext i1 %and to i32

  Name: demand add 2
  %xz = zext i1 %x to i5
  %ys = sext i1 %y to i5
  %sub = add i5 %xz, %ys
  %r = and i5 %sub, 16
  =>
  %notx = xor i1 %x, 1
  %and = and i1 %y, %notx
  %r = select i1 %and, i5 -16, i5 0

  Name: demand add 3
  %xz = zext i1 %x to i8
  %ys = sext i1 %y to i8
  %a = add i8 %ys, %xz
  %r = ashr i8 %a, 7
  =>
  %notx = xor i1 %x, 1
  %and = and i1 %y, %notx
  %r = sext i1 %and to i8

  Name: cmp math
  %gt = icmp ugt i32 %x, %y
  %lt = icmp ult i32 %x, %y
  %xz = zext i1 %gt to i32
  %yz = zext i1 %lt to i32
  %s = sub i32 %xz, %yz
  %r = lshr i32 %s, 31
  =>
  %r = zext i1 %lt to i32

Differential Revision: https://reviews.llvm.org/D75961

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/test/Transforms/InstCombine/add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 0f9a008fd109..59ed96a0257a 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -454,6 +454,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   }
   case Instruction::Add:
+    if ((DemandedMask & 1) == 0) {
+      // If we do not need the low bit, try to convert bool math to logic:
+      //   add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
+      // Truth table for inputs and output signbits:
+      //       X:0 | X:1
+      //      ----------
+      // Y:0  |  0 | 0 |
+      // Y:1  | -1 | 0 |
+      //      ----------
+      Value *X, *Y;
+      if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))),
+                           m_OneUse(m_SExt(m_Value(Y))))) &&
+          X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
+        IRBuilderBase::InsertPointGuard Guard(Builder);
+        Builder.SetInsertPoint(I);
+        Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);
+        return Builder.CreateSExt(AndNot, VTy);
+      }
+    }
+    LLVM_FALLTHROUGH;
   case Instruction::Sub: {
     /// If the high-bits of an ADD/SUB are not demanded, then we do not care
     /// about the high bits of the operands.

diff  --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index f66339c62a9c..27438b3d86e6 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -1055,10 +1055,9 @@ define <2 x i32> @test44_vec_non_splat(<2 x i32> %A) {
 
 define i32 @lshr_add(i1 %x, i1 %y) {
 ; CHECK-LABEL: @lshr_add(
-; CHECK-NEXT:    [[XZ:%.*]] = zext i1 [[X:%.*]] to i32
-; CHECK-NEXT:    [[YS:%.*]] = sext i1 [[Y:%.*]] to i32
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[XZ]], [[YS]]
-; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[SUB]], 31
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %xz = zext i1 %x to i32
@@ -1070,10 +1069,9 @@ define i32 @lshr_add(i1 %x, i1 %y) {
 
 define i5 @and_add(i1 %x, i1 %y) {
 ; CHECK-LABEL: @and_add(
-; CHECK-NEXT:    [[XZ:%.*]] = zext i1 [[X:%.*]] to i5
-; CHECK-NEXT:    [[YS:%.*]] = sext i1 [[Y:%.*]] to i5
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw i5 [[XZ]], [[YS]]
-; CHECK-NEXT:    [[R:%.*]] = and i5 [[SUB]], -2
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP2]], i5 -2, i5 0
 ; CHECK-NEXT:    ret i5 [[R]]
 ;
   %xz = zext i1 %x to i5
@@ -1085,11 +1083,10 @@ define i5 @and_add(i1 %x, i1 %y) {
 
 define <2 x i8> @ashr_add_commute(<2 x i1> %x, <2 x i1> %y) {
 ; CHECK-LABEL: @ashr_add_commute(
-; CHECK-NEXT:    [[XZ:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[YS:%.*]] = sext <2 x i1> [[Y:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw <2 x i8> [[YS]], [[XZ]]
-; CHECK-NEXT:    [[R:%.*]] = ashr <2 x i8> [[SUB]], <i8 1, i8 1>
-; CHECK-NEXT:    ret <2 x i8> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[TMP3]]
 ;
   %xz = zext <2 x i1> %x to <2 x i8>
   %ys = sext <2 x i1> %y to <2 x i8>
@@ -1100,12 +1097,8 @@ define <2 x i8> @ashr_add_commute(<2 x i1> %x, <2 x i1> %y) {
 
 define i32 @cmp_math(i32 %x, i32 %y) {
 ; CHECK-LABEL: @cmp_math(
-; CHECK-NEXT:    [[GT:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[LT:%.*]] = icmp ult i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[XZ:%.*]] = zext i1 [[GT]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i1 [[LT]] to i32
-; CHECK-NEXT:    [[S:%.*]] = add nsw i32 [[XZ]], [[TMP1]]
-; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[S]], 31
+; CHECK-NEXT:    [[LT:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[LT]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %gt = icmp ugt i32 %x, %y
@@ -1117,6 +1110,8 @@ define i32 @cmp_math(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test - wrong type
+
 define i32 @lshr_add_nonbool(i2 %x, i1 %y) {
 ; CHECK-LABEL: @lshr_add_nonbool(
 ; CHECK-NEXT:    [[XZ:%.*]] = zext i2 [[X:%.*]] to i32
@@ -1132,6 +1127,8 @@ define i32 @lshr_add_nonbool(i2 %x, i1 %y) {
   ret i32 %r
 }
 
+; Negative test - wrong demand
+
 define i32 @and31_add(i1 %x, i1 %y) {
 ; CHECK-LABEL: @and31_add(
 ; CHECK-NEXT:    [[XZ:%.*]] = zext i1 [[X:%.*]] to i32
@@ -1147,6 +1144,8 @@ define i32 @and31_add(i1 %x, i1 %y) {
   ret i32 %r
 }
 
+; Negative test - extra use
+
 define i32 @lshr_add_use(i1 %x, i1 %y, i32* %p) {
 ; CHECK-LABEL: @lshr_add_use(
 ; CHECK-NEXT:    [[XZ:%.*]] = zext i1 [[X:%.*]] to i32
@@ -1164,6 +1163,8 @@ define i32 @lshr_add_use(i1 %x, i1 %y, i32* %p) {
   ret i32 %r
 }
 
+; Negative test - extra use
+
 define i32 @lshr_add_use2(i1 %x, i1 %y, i32* %p) {
 ; CHECK-LABEL: @lshr_add_use2(
 ; CHECK-NEXT:    [[XZ:%.*]] = zext i1 [[X:%.*]] to i32