[llvm] d2d2379 - [InstCombine] improve demanded bits for Sub operand 0

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 27 07:19:11 PDT 2022


Author: Sanjay Patel
Date: 2022-10-27T09:41:57-04:00
New Revision: d2d23795cac9ecfae9f5f18be05e4211d4c0cac2

URL: https://github.com/llvm/llvm-project/commit/d2d23795cac9ecfae9f5f18be05e4211d4c0cac2
DIFF: https://github.com/llvm/llvm-project/commit/d2d23795cac9ecfae9f5f18be05e4211d4c0cac2.diff

LOG: [InstCombine] improve demanded bits for Sub operand 0

This is copying the code that was added for 'add' with D130075.
(That patch removed a fallthrough in the cases, but we can
probably still share at least some code again as a follow-up
cleanup, but I didn't want to risk it here.)

The reasoning is similar to the carry propagation for 'add':
if we don't demand low bits of the subtraction and the
subtrahend (aka RHS or operand 1) is known zero in those low
bits, then there can't be any borrowing required from the
higher bits of operand 0, so the low bits don't matter.

Also, the no-wrap flags can be propagated (and I think that
should be true for add too).

Here's an attempt to prove that in Alive2:
https://alive2.llvm.org/ce/z/xqh7Pa
(can add nsw or nuw to src and tgt, and it should still pass)

Differential Revision: https://reviews.llvm.org/D136788

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/test/Transforms/InstCombine/sub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index d682f3bfb2803..25b005b31dd88 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -549,6 +549,16 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1))
       return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
 
+    // If low order bits are not demanded and are known to be zero in RHS,
+    // then we don't need to demand them from LHS, since they can't cause a
+    // borrow from any bits that are demanded in the result.
+    unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countTrailingOnes();
+    APInt DemandedFromLHS = DemandedFromOps;
+    DemandedFromLHS.clearLowBits(NTZ);
+    if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
+        SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Depth + 1))
+      return I;
+
     // If we are known to be subtracting zeros from every bit below
     // the highest demanded bit, we just return the other side.
     if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))

diff  --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 9dfea31eb92ec..8c9957e480557 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -2120,12 +2120,12 @@ define i8 @demand_low_bits_uses_commute(i8 %x, i8 %y, i8 %z) {
   ret i8 %r
 }
 
+; sub becomes negate and combines with shl
+
 define i8 @shrink_sub_from_constant_lowbits(i8 %x) {
 ; CHECK-LABEL: @shrink_sub_from_constant_lowbits(
-; CHECK-NEXT:    [[X000:%.*]] = shl i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[SUB:%.*]] = sub i8 7, [[X000]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[SUB]], -8
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    [[X000_NEG:%.*]] = mul i8 [[X:%.*]], -8
+; CHECK-NEXT:    ret i8 [[X000_NEG]]
 ;
   %x000 = shl i8 %x, 3   ; 3 low bits are known zero
   %sub = sub i8 7, %x000
@@ -2133,6 +2133,8 @@ define i8 @shrink_sub_from_constant_lowbits(i8 %x) {
   ret i8 %r
 }
 
+; negative test - extra use prevents shrinking '7'
+
 define i8 @shrink_sub_from_constant_lowbits_uses(i8 %x) {
 ; CHECK-LABEL: @shrink_sub_from_constant_lowbits_uses(
 ; CHECK-NEXT:    [[X000:%.*]] = shl i8 [[X:%.*]], 3
@@ -2148,10 +2150,12 @@ define i8 @shrink_sub_from_constant_lowbits_uses(i8 %x) {
   ret i8 %r
 }
 
+; safe to clear 3 low bits (2 higher bits remain set)
+
 define i8 @shrink_sub_from_constant_lowbits2(i8 %x) {
 ; CHECK-LABEL: @shrink_sub_from_constant_lowbits2(
 ; CHECK-NEXT:    [[X000:%.*]] = and i8 [[X:%.*]], -8
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i8 30, [[X000]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i8 24, [[X000]]
 ; CHECK-NEXT:    [[R:%.*]] = and i8 [[SUB]], -16
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
@@ -2161,11 +2165,13 @@ define i8 @shrink_sub_from_constant_lowbits2(i8 %x) {
   ret i8 %r
 }
 
+; safe to clear 3 low bits (2 higher bits remain set)
+
 define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) {
 ; CHECK-LABEL: @shrink_sub_from_constant_lowbits3(
 ; CHECK-NEXT:    [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], <i8 4, i8 4>
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> <i8 31, i8 31>, [[X0000]]
-; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i8> [[SUB]], <i8 3, i8 3>
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> <i8 24, i8 24>, [[X0000]]
+; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i8> [[SUB]], <i8 3, i8 3>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %x0000 = shl <2 x i8> %x, <i8 4, i8 4>     ; 4 low bits are known zero
@@ -2174,12 +2180,14 @@ define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) {
   ret <2 x i8> %r
 }
 
+; eliminate the mask of y or the mask of the result
+
 define i8 @demand_sub_from_variable_lowbits(i8 %x, i8 %y) {
 ; CHECK-LABEL: @demand_sub_from_variable_lowbits(
 ; CHECK-NEXT:    [[X000:%.*]] = shl i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[Y000:%.*]] = and i8 [[Y:%.*]], -8
-; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[Y000]], [[X000]]
-; CHECK-NEXT:    ret i8 [[SUB]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[Y:%.*]], [[X000]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[SUB]], -8
+; CHECK-NEXT:    ret i8 [[R]]
 ;
   %x000 = shl i8 %x, 3   ; 3 low bits are known zero
   %y000 = and i8 %y, -8
@@ -2188,11 +2196,12 @@ define i8 @demand_sub_from_variable_lowbits(i8 %x, i8 %y) {
   ret i8 %r
 }
 
+; setting the low 3 bits of y doesn't change anything
+
 define i8 @demand_sub_from_variable_lowbits2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @demand_sub_from_variable_lowbits2(
 ; CHECK-NEXT:    [[X0000:%.*]] = shl i8 [[X:%.*]], 4
-; CHECK-NEXT:    [[Y111:%.*]] = or i8 [[Y:%.*]], 7
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i8 [[Y111]], [[X0000]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i8 [[Y:%.*]], [[X0000]]
 ; CHECK-NEXT:    [[R:%.*]] = lshr i8 [[SUB]], 4
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
@@ -2203,6 +2212,8 @@ define i8 @demand_sub_from_variable_lowbits2(i8 %x, i8 %y) {
   ret i8 %r
 }
 
+; negative test - the mask of y removes an extra bit, so that instruction is needed
+
 define i8 @demand_sub_from_variable_lowbits3(i8 %x, i8 %y) {
 ; CHECK-LABEL: @demand_sub_from_variable_lowbits3(
 ; CHECK-NEXT:    [[X0000:%.*]] = shl i8 [[X:%.*]], 4


        


More information about the llvm-commits mailing list