[llvm] 995d400 - [InstCombine] reduce mul operands based on undemanded high bits

Thu Feb 10 05:11:49 PST 2022

Author: Sanjay Patel
Date: 2022-02-10T08:10:22-05:00
New Revision: 995d400f3a3c3d47bad95551dad104f686c46305

URL: https://github.com/llvm/llvm-project/commit/995d400f3a3c3d47bad95551dad104f686c46305
DIFF: https://github.com/llvm/llvm-project/commit/995d400f3a3c3d47bad95551dad104f686c46305.diff

LOG: [InstCombine] reduce mul operands based on undemanded high bits

We already do this in SDAG, but mul was left out of the fold
for unused high bits in IR.

The high bits of a mul's operands do not change the low bits
of the result:
https://alive2.llvm.org/ce/z/XRj5Ek

Verify some test diffs to confirm that they are correct:
https://alive2.llvm.org/ce/z/y_W8DW
https://alive2.llvm.org/ce/z/7DM5uf
https://alive2.llvm.org/ce/z/GDiHCK

This gets a fold that was presumed not possible in D114272:
https://alive2.llvm.org/ce/z/tAN-WY

Removing nsw/nuw is needed for general correctness (and is
also done in the codegen version), but we might be able to
recover more of those with better analysis.

Differential Revision: https://reviews.llvm.org/D119369

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/test/Transforms/InstCombine/and-or.ll
    llvm/test/Transforms/InstCombine/icmp-mul-and.ll
    llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
    llvm/test/Transforms/InstCombine/mul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8778df1845b35..208fe68f7896c 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -154,6 +154,29 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   if (Depth == 0 && !V->hasOneUse())
     DemandedMask.setAllBits();
 
+  // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care
+  // about the high bits of the operands.
+  auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {
+    unsigned NLZ = DemandedMask.countLeadingZeros();
+    // Right fill the mask of bits for the operands to demand the most
+    // significant bit and all those below it.
+    DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
+    if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
+        SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
+        ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
+        SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
+      if (NLZ > 0) {
+        // Disable the nsw and nuw flags here: We can no longer guarantee that
+        // we won't wrap after simplification. Removing the nsw/nuw flags is
+        // legal here because the top bit is not demanded.
+        I->setHasNoSignedWrap(false);
+        I->setHasNoUnsignedWrap(false);
+      }
+      return true;
+    }
+    return false;
+  };
+
   switch (I->getOpcode()) {
   default:
     computeKnownBits(I, Known, Depth, CxtI);
@@ -507,26 +530,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     }
     LLVM_FALLTHROUGH;
   case Instruction::Sub: {
-    /// If the high-bits of an ADD/SUB are not demanded, then we do not care
-    /// about the high bits of the operands.
-    unsigned NLZ = DemandedMask.countLeadingZeros();
-    // Right fill the mask of bits for this ADD/SUB to demand the most
-    // significant bit and all those below it.
-    APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
-    if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
-        SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
-        ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
-        SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
-      if (NLZ > 0) {
-        // Disable the nsw and nuw flags here: We can no longer guarantee that
-        // we won't wrap after simplification. Removing the nsw/nuw flags is
-        // legal here because the top bit is not demanded.
-        BinaryOperator &BinOP = *cast<BinaryOperator>(I);
-        BinOP.setHasNoSignedWrap(false);
-        BinOP.setHasNoUnsignedWrap(false);
-      }
+    APInt DemandedFromOps;
+    if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))
       return I;
-    }
 
     // If we are known to be adding/subtracting zeros to every bit below
     // the highest demanded bit, we just return the other side.
@@ -545,6 +551,10 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   }
   case Instruction::Mul: {
+    APInt DemandedFromOps;
+    if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))
+      return I;
+
     if (DemandedMask.isPowerOf2()) {
       // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
       // If we demand exactly one bit N and we have "X * (C' << N)" where C' is

diff  --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll
index 5876c9d0d8452..a9efd3b50366c 100644
--- a/llvm/test/Transforms/InstCombine/and-or.ll
+++ b/llvm/test/Transforms/InstCombine/and-or.ll
@@ -290,7 +290,7 @@ define <2 x i8> @and_xor_hoist_mask_vec_splat(<2 x i8> %a, <2 x i8> %b) {
 
 define i8 @and_xor_hoist_mask_commute(i8 %a, i8 %b) {
 ; CHECK-LABEL: @and_xor_hoist_mask_commute(
-; CHECK-NEXT:    [[C:%.*]] = mul i8 [[B:%.*]], 43
+; CHECK-NEXT:    [[C:%.*]] = mul i8 [[B:%.*]], 3
 ; CHECK-NEXT:    [[SH:%.*]] = lshr i8 [[A:%.*]], 6
 ; CHECK-NEXT:    [[C_MASKED:%.*]] = and i8 [[C]], 3
 ; CHECK-NEXT:    [[AND:%.*]] = xor i8 [[C_MASKED]], [[SH]]
@@ -305,7 +305,7 @@ define i8 @and_xor_hoist_mask_commute(i8 %a, i8 %b) {
 
 define <2 x i8> @and_or_hoist_mask_commute_vec_splat(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @and_or_hoist_mask_commute_vec_splat(
-; CHECK-NEXT:    [[C:%.*]] = mul <2 x i8> [[B:%.*]], <i8 43, i8 43>
+; CHECK-NEXT:    [[C:%.*]] = mul <2 x i8> [[B:%.*]], <i8 3, i8 3>
 ; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], <i8 6, i8 6>
 ; CHECK-NEXT:    [[C_MASKED:%.*]] = and <2 x i8> [[C]], <i8 3, i8 3>
 ; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[C_MASKED]], [[SH]]

diff  --git a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
index e3bf0d790b1ab..196dac1b9d010 100644
--- a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
@@ -37,8 +37,8 @@ define i1 @mul_mask_pow2_ne0_use1(i8 %x) {
 
 define i1 @mul_mask_pow2_ne0_use2(i8 %x) {
 ; CHECK-LABEL: @mul_mask_pow2_ne0_use2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[TMP1]], 8
+; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 8
 ; CHECK-NEXT:    call void @use(i8 [[AND]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -96,7 +96,7 @@ define i1 @mul_mask_pow2_eq4(i8 %x) {
 
 define i1 @mul_mask_notpow2_ne(i8 %x) {
 ; CHECK-LABEL: @mul_mask_notpow2_ne(
-; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 60
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 12
 ; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 12
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -121,7 +121,7 @@ define i1 @pr40493(i32 %area) {
 
 define i1 @pr40493_neg1(i32 %area) {
 ; CHECK-LABEL: @pr40493_neg1(
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[AREA:%.*]], 11
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[AREA:%.*]], 3
 ; CHECK-NEXT:    [[REM:%.*]] = and i32 [[MUL]], 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[REM]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -147,8 +147,8 @@ define i1 @pr40493_neg2(i32 %area) {
 
 define i32 @pr40493_neg3(i32 %area) {
 ; CHECK-LABEL: @pr40493_neg3(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[AREA:%.*]], 2
-; CHECK-NEXT:    [[REM:%.*]] = and i32 [[TMP1]], 4
+; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[AREA:%.*]], 2
+; CHECK-NEXT:    [[REM:%.*]] = and i32 [[MUL]], 4
 ; CHECK-NEXT:    ret i32 [[REM]]
 ;
   %mul = mul i32 %area, 12
@@ -222,10 +222,7 @@ define <4 x i1> @pr40493_vec5(<4 x i32> %area) {
 
 define i1 @pr51551(i32 %x, i32 %y) {
 ; CHECK-LABEL: @pr51551(
-; CHECK-NEXT:    [[T0:%.*]] = and i32 [[Y:%.*]], -8
-; CHECK-NEXT:    [[T1:%.*]] = or i32 [[T0]], 1
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[MUL]], 3
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 3
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -239,10 +236,7 @@ define i1 @pr51551(i32 %x, i32 %y) {
 
 define i1 @pr51551_2(i32 %x, i32 %y) {
 ; CHECK-LABEL: @pr51551_2(
-; CHECK-NEXT:    [[T0:%.*]] = and i32 [[Y:%.*]], -8
-; CHECK-NEXT:    [[T1:%.*]] = or i32 [[T0]], 1
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[MUL]], 1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -256,9 +250,9 @@ define i1 @pr51551_2(i32 %x, i32 %y) {
 
 define i1 @pr51551_neg1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @pr51551_neg1(
-; CHECK-NEXT:    [[T0:%.*]] = and i32 [[Y:%.*]], -4
+; CHECK-NEXT:    [[T0:%.*]] = and i32 [[Y:%.*]], 4
 ; CHECK-NEXT:    [[T1:%.*]] = or i32 [[T0]], 1
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[T1]], [[X:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[MUL]], 7
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -273,8 +267,8 @@ define i1 @pr51551_neg1(i32 %x, i32 %y) {
 
 define i1 @pr51551_neg2(i32 %x, i32 %y) {
 ; CHECK-LABEL: @pr51551_neg2(
-; CHECK-NEXT:    [[T0:%.*]] = and i32 [[Y:%.*]], -7
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[T0]], [[X:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = and i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i32 [[T0]], [[X:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[MUL]], 7
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -288,10 +282,7 @@ define i1 @pr51551_neg2(i32 %x, i32 %y) {
 
 define i32 @pr51551_demand3bits(i32 %x, i32 %y) {
 ; CHECK-LABEL: @pr51551_demand3bits(
-; CHECK-NEXT:    [[T0:%.*]] = and i32 [[Y:%.*]], -8
-; CHECK-NEXT:    [[T1:%.*]] = or i32 [[T0]], 1
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[MUL]], 7
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 7
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
   %t0 = and i32 %y, -7

diff  --git a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
index 101d97cde0263..7e37f6ba4847f 100644
--- a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
@@ -1073,7 +1073,7 @@ define <2 x i32> @muladd2_vec_nonuniform_undef(<2 x i32> %a0) {
 
 define i32 @mulmuladd2(i32 %a0, i32 %a1) {
 ; CHECK-LABEL: @mulmuladd2(
-; CHECK-NEXT:    [[ADD_NEG:%.*]] = sub i32 -16, [[A0:%.*]]
+; CHECK-NEXT:    [[ADD_NEG:%.*]] = sub i32 1073741808, [[A0:%.*]]
 ; CHECK-NEXT:    [[MUL1_NEG:%.*]] = mul i32 [[ADD_NEG]], [[A1:%.*]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = shl i32 [[MUL1_NEG]], 2
 ; CHECK-NEXT:    ret i32 [[MUL2]]

diff  --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index 9d26d36b5be3c..43957a0e2a0a1 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -1134,7 +1134,7 @@ define <2 x i32> @muladd2_vec_nonuniform_undef(<2 x i32> %a0) {
 
 define i32 @mulmuladd2(i32 %a0, i32 %a1) {
 ; CHECK-LABEL: @mulmuladd2(
-; CHECK-NEXT:    [[ADD_NEG:%.*]] = sub i32 -16, [[A0:%.*]]
+; CHECK-NEXT:    [[ADD_NEG:%.*]] = sub i32 1073741808, [[A0:%.*]]
 ; CHECK-NEXT:    [[MUL1_NEG:%.*]] = mul i32 [[ADD_NEG]], [[A1:%.*]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = shl i32 [[MUL1_NEG]], 2
 ; CHECK-NEXT:    ret i32 [[MUL2]]